granicus.if.org Git - imagemagick/blob - MagickCore/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                                  Cristy                                     %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    http://www.imagemagick.org/script/license.php                            %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "MagickCore/studio.h"
  44 #include "MagickCore/exception.h"
  45 #include "MagickCore/exception-private.h"
  46 #include "MagickCore/image.h"
  47 #include "MagickCore/memory_.h"
  48 #include "MagickCore/string_.h"
  49 #include "MagickCore/string-private.h"
  50 #include "MagickCore/token.h"
  51 #include "MagickCore/token-private.h"
  52 #include "MagickCore/utility.h"
  53 #include "MagickCore/utility-private.h"
  54 \f
  55 /*
  56   Typedef declaractions.
  57 */
  58 struct _TokenInfo
  59 {
  60   int
  61     state;
  62
  63   MagickStatusType
  64     flag;
  65
  66   ssize_t
  67     offset;
  68
  69   char
  70     quote;
  71
  72   size_t
  73     signature;
  74 };
  75 \f
  76 /*
  77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  78 %                                                                             %
  79 %                                                                             %
  80 %                                                                             %
  81 %   A c q u i r e T o k e n I n f o                                           %
  82 %                                                                             %
  83 %                                                                             %
  84 %                                                                             %
  85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  86 %
  87 %  AcquireTokenInfo() allocates the TokenInfo structure.
  88 %
  89 %  The format of the AcquireTokenInfo method is:
  90 %
  91 %      TokenInfo *AcquireTokenInfo()
  92 %
  93 */
  94 MagickExport TokenInfo *AcquireTokenInfo(void)
  95 {
  96   TokenInfo
  97     *token_info;
  98
  99   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
 100   if (token_info == (TokenInfo *) NULL)
 101     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
 102   token_info->signature=MagickSignature;
 103   return(token_info);
 104 }
 105 \f
 106 /*
 107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 108 %                                                                             %
 109 %                                                                             %
 110 %                                                                             %
 111 %   D e s t r o y T o k e n I n f o                                           %
 112 %                                                                             %
 113 %                                                                             %
 114 %                                                                             %
 115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 116 %
 117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 118 %  structure.
 119 %
 120 %  The format of the DestroyTokenInfo method is:
 121 %
 122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 123 %
 124 %  A description of each parameter follows:
 125 %
 126 %    o token_info: Specifies a pointer to an TokenInfo structure.
 127 %
 128 */
 129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 130 {
 131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 132   assert(token_info != (TokenInfo *) NULL);
 133   assert(token_info->signature == MagickSignature);
 134   token_info->signature=(~MagickSignature);
 135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 136   return(token_info);
 137 }
 138 \f
 139 /*
 140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 141 %                                                                             %
 142 %                                                                             %
 143 %                                                                             %
 144 +   G e t M a g i c k T o k e n                                               %
 145 %                                                                             %
 146 %                                                                             %
 147 %                                                                             %
 148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 149 %
 150 %  GetMagickToken() gets a token from the token stream.  A token is defined as
 151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
 152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 153 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
 154 %  separator characters: ':', '=', ',', and ';'.
 155 %
 156 %  The format of the GetMagickToken method is:
 157 %
 158 %      void GetMagickToken(const char *start,const char **end,char *token)
 159 %
 160 %  A description of each parameter follows:
 161 %
 162 %    o start: the start of the token sequence.
 163 %
 164 %    o end: point to the end of the token sequence.
 165 %
 166 %    o token: copy the token to this buffer.
 167 %
 168 */
 169 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
 170 {
 171   double
 172     value;
 173
 174   register const char
 175     *p;
 176
 177   register ssize_t
 178     i;
 179
 180   assert(start != (const char *) NULL);
 181   assert(token != (char *) NULL);
 182   i=0;
 183   p=start;
 184   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 185     p++;
 186   switch (*p)
 187   {
 188     case '\0':
 189       break;
 190     case '"':
 191     case '\'':
 192     case '`':
 193     case '{':
 194     {
 195       register char
 196         escape;
 197
 198       switch (*p)
 199       {
 200         case '"': escape='"'; break;
 201         case '\'': escape='\''; break;
 202         case '`': escape='\''; break;
 203         case '{': escape='}'; break;
 204         default: escape=(*p); break;
 205       }
 206       for (p++; *p != '\0'; p++)
 207       {
 208         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 209           p++;
 210         else
 211           if (*p == escape)
 212             {
 213               p++;
 214               break;
 215             }
 216         token[i++]=(*p);
 217       }
 218       break;
 219     }
 220     case '/':
 221     {
 222       token[i++]=(*p++);
 223       if ((*p == '>') || (*p == '/'))
 224         token[i++]=(*p++);
 225       break;
 226     }
 227     default:
 228     {
 229       char
 230         *q;
 231
 232       value=StringToDouble(p,&q);
 233       (void) value;
 234       if ((p != q) && (*p != ','))
 235         {
 236           for ( ; (p < q) && (*p != ','); p++)
 237             token[i++]=(*p);
 238           if (*p == '%')
 239             token[i++]=(*p++);
 240           break;
 241         }
 242       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
 243           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 244         {
 245           token[i++]=(*p++);
 246           break;
 247         }
 248       for ( ; *p != '\0'; p++)
 249       {
 250         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 251             (*p == ':') || (*p == ',') || (*p == '|') || (*p == ';')) &&
 252             (*(p-1) != '\\'))
 253           break;
 254         if ((i > 0) && (*p == '<'))
 255           break;
 256         token[i++]=(*p);
 257         if (*p == '>')
 258           break;
 259         if (*p == '(')
 260           for (p++; *p != '\0'; p++)
 261           {
 262             token[i++]=(*p);
 263             if ((*p == ')') && (*(p-1) != '\\'))
 264               break;
 265           }
 266       }
 267       break;
 268     }
 269   }
 270   token[i]='\0';
 271   if (LocaleNCompare(token,"url(",4) == 0)
 272     {
 273       ssize_t
 274         offset;
 275
 276       offset=4;
 277       if (token[offset] == '#')
 278         offset++;
 279       i=(ssize_t) strlen(token);
 280       (void) CopyMagickString(token,token+offset,MaxTextExtent);
 281       token[i-offset-1]='\0';
 282     }
 283   while (isspace((int) ((unsigned char) *p)) != 0)
 284     p++;
 285   if (end != (const char **) NULL)
 286     *end=(const char *) p;
 287 }
 288 \f
 289 /*
 290 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 291 %                                                                             %
 292 %                                                                             %
 293 %                                                                             %
 294 %   G l o b E x p r e s s i o n                                               %
 295 %                                                                             %
 296 %                                                                             %
 297 %                                                                             %
 298 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 299 %
 300 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 301 %
 302 %  The format of the GlobExpression function is:
 303 %
 304 %      MagickBooleanType GlobExpression(const char *expression,
 305 %        const char *pattern,const MagickBooleanType case_insensitive)
 306 %
 307 %  A description of each parameter follows:
 308 %
 309 %    o expression: Specifies a pointer to a text string containing a file name.
 310 %
 311 %    o pattern: Specifies a pointer to a text string containing a pattern.
 312 %
 313 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 314 %      an expression.
 315 %
 316 */
 317 MagickExport MagickBooleanType GlobExpression(const char *expression,
 318   const char *pattern,const MagickBooleanType case_insensitive)
 319 {
 320   MagickBooleanType
 321     done,
 322     match;
 323
 324   register const char
 325     *p;
 326
 327   /*
 328     Return on empty pattern or '*'.
 329   */
 330   if (pattern == (char *) NULL)
 331     return(MagickTrue);
 332   if (GetUTFCode(pattern) == 0)
 333     return(MagickTrue);
 334   if (LocaleCompare(pattern,"*") == 0)
 335     return(MagickTrue);
 336   p=pattern+strlen(pattern)-1;
 337   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 338     {
 339       ExceptionInfo
 340         *exception;
 341
 342       ImageInfo
 343         *image_info;
 344
 345       /*
 346         Determine if pattern is a scene, i.e. img0001.pcd[2].
 347       */
 348       image_info=AcquireImageInfo();
 349       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
 350       exception=AcquireExceptionInfo();
 351       (void) SetImageInfo(image_info,0,exception);
 352       exception=DestroyExceptionInfo(exception);
 353       if (LocaleCompare(image_info->filename,pattern) != 0)
 354         {
 355           image_info=DestroyImageInfo(image_info);
 356           return(MagickFalse);
 357         }
 358       image_info=DestroyImageInfo(image_info);
 359     }
 360   /*
 361     Evaluate glob expression.
 362   */
 363   done=MagickFalse;
 364   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 365   {
 366     if (GetUTFCode(expression) == 0)
 367       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 368         break;
 369     switch (GetUTFCode(pattern))
 370     {
 371       case '*':
 372       {
 373         MagickBooleanType
 374           status;
 375
 376         status=MagickFalse;
 377         pattern+=GetUTFOctets(pattern);
 378         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 379         {
 380           status=GlobExpression(expression,pattern,case_insensitive);
 381           expression+=GetUTFOctets(expression);
 382         }
 383         if (status != MagickFalse)
 384           {
 385             while (GetUTFCode(expression) != 0)
 386               expression+=GetUTFOctets(expression);
 387             while (GetUTFCode(pattern) != 0)
 388               pattern+=GetUTFOctets(pattern);
 389           }
 390         break;
 391       }
 392       case '[':
 393       {
 394         int
 395           c;
 396
 397         pattern+=GetUTFOctets(pattern);
 398         for ( ; ; )
 399         {
 400           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 401             {
 402               done=MagickTrue;
 403               break;
 404             }
 405           if (GetUTFCode(pattern) == '\\')
 406             {
 407               pattern+=GetUTFOctets(pattern);
 408               if (GetUTFCode(pattern) == 0)
 409                 {
 410                   done=MagickTrue;
 411                   break;
 412                 }
 413              }
 414           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 415             {
 416               c=GetUTFCode(pattern);
 417               pattern+=GetUTFOctets(pattern);
 418               pattern+=GetUTFOctets(pattern);
 419               if (GetUTFCode(pattern) == ']')
 420                 {
 421                   done=MagickTrue;
 422                   break;
 423                 }
 424               if (GetUTFCode(pattern) == '\\')
 425                 {
 426                   pattern+=GetUTFOctets(pattern);
 427                   if (GetUTFCode(pattern) == 0)
 428                     {
 429                       done=MagickTrue;
 430                       break;
 431                     }
 432                 }
 433               if ((GetUTFCode(expression) < c) ||
 434                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 435                 {
 436                   pattern+=GetUTFOctets(pattern);
 437                   continue;
 438                 }
 439             }
 440           else
 441             if (GetUTFCode(pattern) != GetUTFCode(expression))
 442               {
 443                 pattern+=GetUTFOctets(pattern);
 444                 continue;
 445               }
 446           pattern+=GetUTFOctets(pattern);
 447           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 448           {
 449             if ((GetUTFCode(pattern) == '\\') &&
 450                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 451               pattern+=GetUTFOctets(pattern);
 452             pattern+=GetUTFOctets(pattern);
 453           }
 454           if (GetUTFCode(pattern) != 0)
 455             {
 456               pattern+=GetUTFOctets(pattern);
 457               expression+=GetUTFOctets(expression);
 458             }
 459           break;
 460         }
 461         break;
 462       }
 463       case '?':
 464       {
 465         pattern+=GetUTFOctets(pattern);
 466         expression+=GetUTFOctets(expression);
 467         break;
 468       }
 469       case '{':
 470       {
 471         register const char
 472           *p;
 473
 474         pattern+=GetUTFOctets(pattern);
 475         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 476         {
 477           p=expression;
 478           match=MagickTrue;
 479           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
 480                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
 481                  (match != MagickFalse))
 482           {
 483             if (GetUTFCode(pattern) == '\\')
 484               pattern+=GetUTFOctets(pattern);
 485             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
 486               MagickFalse;
 487             p+=GetUTFOctets(p);
 488             pattern+=GetUTFOctets(pattern);
 489           }
 490           if (GetUTFCode(pattern) == 0)
 491             {
 492               match=MagickFalse;
 493               done=MagickTrue;
 494               break;
 495             }
 496           else
 497             if (match != MagickFalse)
 498               {
 499                 expression=p;
 500                 while ((GetUTFCode(pattern) != '}') &&
 501                        (GetUTFCode(pattern) != 0))
 502                 {
 503                   pattern+=GetUTFOctets(pattern);
 504                   if (GetUTFCode(pattern) == '\\')
 505                     {
 506                       pattern+=GetUTFOctets(pattern);
 507                       if (GetUTFCode(pattern) == '}')
 508                         pattern+=GetUTFOctets(pattern);
 509                     }
 510                 }
 511               }
 512             else
 513               {
 514                 while ((GetUTFCode(pattern) != '}') &&
 515                        (GetUTFCode(pattern) != ',') &&
 516                        (GetUTFCode(pattern) != 0))
 517                 {
 518                   pattern+=GetUTFOctets(pattern);
 519                   if (GetUTFCode(pattern) == '\\')
 520                     {
 521                       pattern+=GetUTFOctets(pattern);
 522                       if ((GetUTFCode(pattern) == '}') ||
 523                           (GetUTFCode(pattern) == ','))
 524                         pattern+=GetUTFOctets(pattern);
 525                     }
 526                 }
 527               }
 528             if (GetUTFCode(pattern) != 0)
 529               pattern+=GetUTFOctets(pattern);
 530           }
 531         break;
 532       }
 533       case '\\':
 534       {
 535         pattern+=GetUTFOctets(pattern);
 536         if (GetUTFCode(pattern) == 0)
 537           break;
 538       }
 539       default:
 540       {
 541         if (case_insensitive != MagickFalse)
 542           {
 543             if (tolower((int) GetUTFCode(expression)) !=
 544                 tolower((int) GetUTFCode(pattern)))
 545               {
 546                 done=MagickTrue;
 547                 break;
 548               }
 549           }
 550         else
 551           if (GetUTFCode(expression) != GetUTFCode(pattern))
 552             {
 553               done=MagickTrue;
 554               break;
 555             }
 556         expression+=GetUTFOctets(expression);
 557         pattern+=GetUTFOctets(pattern);
 558       }
 559     }
 560   }
 561   while (GetUTFCode(pattern) == '*')
 562     pattern+=GetUTFOctets(pattern);
 563   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 564     MagickTrue : MagickFalse;
 565   return(match);
 566 }
 567 \f
 568 /*
 569 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 570 %                                                                             %
 571 %                                                                             %
 572 %                                                                             %
 573 +     I s G l o b                                                             %
 574 %                                                                             %
 575 %                                                                             %
 576 %                                                                             %
 577 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 578 %
 579 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 580 %  pattern.
 581 %
 582 %  The format of the IsGlob method is:
 583 %
 584 %      MagickBooleanType IsGlob(const char *geometry)
 585 %
 586 %  A description of each parameter follows:
 587 %
 588 %    o path: the path.
 589 %
 590 */
 591 MagickPrivate MagickBooleanType IsGlob(const char *path)
 592 {
 593   MagickBooleanType
 594     status = MagickFalse;
 595
 596   register const char
 597     *p;
 598
 599   if (IsPathAccessible(path) != MagickFalse)
 600     return(MagickFalse);
 601   for (p=path; *p != '\0'; p++)
 602   {
 603     switch (*p)
 604     {
 605       case '*':
 606       case '?':
 607       case '{':
 608       case '}':
 609       case '[':
 610       case ']':
 611       {
 612         status=MagickTrue;
 613         break;
 614       }
 615       default:
 616         break;
 617     }
 618   }
 619   return(status);
 620 }
 621 \f
 622 /*
 623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 624 %                                                                             %
 625 %                                                                             %
 626 %                                                                             %
 627 %   T o k e n i z e r                                                         %
 628 %                                                                             %
 629 %                                                                             %
 630 %                                                                             %
 631 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 632 %
 633 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 634 %  one at a time from a string of characters.  The characters used for white
 635 %  space, for break characters, and for quotes can be specified.  Also,
 636 %  characters in the string can be preceded by a specifiable escape character
 637 %  which removes any special meaning the character may have.
 638 %
 639 %  Here is some terminology:
 640 %
 641 %    o token: A single unit of information in the form of a group of
 642 %      characters.
 643 %
 644 %    o white space: Apace that gets ignored (except within quotes or when
 645 %      escaped), like blanks and tabs. in addition, white space terminates a
 646 %      non-quoted token.
 647 %
 648 %    o break set: One or more characters that separates non-quoted tokens.
 649 %      Commas are a common break character. The usage of break characters to
 650 %      signal the end of a token is the same as that of white space, except
 651 %      multiple break characters with nothing or only white space between
 652 %      generate a null token for each two break characters together.
 653 %
 654 %      For example, if blank is set to be the white space and comma is set to
 655 %      be the break character, the line
 656 %
 657 %        A, B, C ,  , DEF
 658 %
 659 %        ... consists of 5 tokens:
 660 %
 661 %        1)  "A"
 662 %        2)  "B"
 663 %        3)  "C"
 664 %        4)  "" (the null string)
 665 %        5)  "DEF"
 666 %
 667 %    o Quote character: A character that, when surrounding a group of other
 668 %      characters, causes the group of characters to be treated as a single
 669 %      token, no matter how many white spaces or break characters exist in
 670 %      the group. Also, a token always terminates after the closing quote.
 671 %      For example, if ' is the quote character, blank is white space, and
 672 %      comma is the break character, the following string
 673 %
 674 %        A, ' B, CD'EF GHI
 675 %
 676 %        ... consists of 4 tokens:
 677 %
 678 %        1)  "A"
 679 %        2)  " B, CD" (note the blanks & comma)
 680 %        3)  "EF"
 681 %        4)  "GHI"
 682 %
 683 %      The quote characters themselves do not appear in the resultant
 684 %      tokens.  The double quotes are delimiters i use here for
 685 %      documentation purposes only.
 686 %
 687 %    o Escape character: A character which itself is ignored but which
 688 %      causes the next character to be used as is.  ^ and \ are often used
 689 %      as escape characters. An escape in the last position of the string
 690 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 691 %      and non-escape) character. For example, assume white space, break
 692 %      character, and quote are the same as in the above examples, and
 693 %      further, assume that ^ is the escape character. Then, in the string
 694 %
 695 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 696 %
 697 %        ... there are 7 tokens:
 698 %
 699 %        1)  "ABC"
 700 %        2)  " DEF ' GH"
 701 %        3)  "I"
 702 %        4)  " "     (a lone blank)
 703 %        5)  "J"
 704 %        6)  "K L"
 705 %        7)  "^"     (passed as is at end of line)
 706 %
 707 %  The format of the Tokenizer method is:
 708 %
 709 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 710 %        const size_t max_token_length,const char *line,const char *white,
 711 %        const char *break_set,const char *quote,const char escape,
 712 %        char *breaker,int *next,char *quoted)
 713 %
 714 %  A description of each parameter follows:
 715 %
 716 %    o flag: right now, only the low order 3 bits are used.
 717 %
 718 %        1 => convert non-quoted tokens to upper case
 719 %        2 => convert non-quoted tokens to lower case
 720 %        0 => do not convert non-quoted tokens
 721 %
 722 %    o token: a character string containing the returned next token
 723 %
 724 %    o max_token_length: the maximum size of "token".  Characters beyond
 725 %      "max_token_length" are truncated.
 726 %
 727 %    o string: the string to be parsed.
 728 %
 729 %    o white: a string of the valid white spaces.  example:
 730 %
 731 %        char whitesp[]={" \t"};
 732 %
 733 %      blank and tab will be valid white space.
 734 %
 735 %    o break: a string of the valid break characters. example:
 736 %
 737 %        char breakch[]={";,"};
 738 %
 739 %      semicolon and comma will be valid break characters.
 740 %
 741 %    o quote: a string of the valid quote characters. An example would be
 742 %
 743 %        char whitesp[]={"'\"");
 744 %
 745 %      (this causes single and double quotes to be valid) Note that a
 746 %      token starting with one of these characters needs the same quote
 747 %      character to terminate it.
 748 %
 749 %      for example:
 750 %
 751 %        "ABC '
 752 %
 753 %      is unterminated, but
 754 %
 755 %        "DEF" and 'GHI'
 756 %
 757 %      are properly terminated.  Note that different quote characters
 758 %      can appear on the same line; only for a given token do the quote
 759 %      characters have to be the same.
 760 %
 761 %    o escape: the escape character (NOT a string ... only one
 762 %      allowed). Use zero if none is desired.
 763 %
 764 %    o breaker: the break character used to terminate the current
 765 %      token.  If the token was quoted, this will be the quote used.  If
 766 %      the token is the last one on the line, this will be zero.
 767 %
 768 %    o next: this variable points to the first character of the
 769 %      next token.  it gets reset by "tokenizer" as it steps through the
 770 %      string.  Set it to 0 upon initialization, and leave it alone
 771 %      after that.  You can change it if you want to jump around in the
 772 %      string or re-parse from the beginning, but be careful.
 773 %
 774 %    o quoted: set to True if the token was quoted and MagickFalse
 775 %      if not.  You may need this information (for example:  in C, a
 776 %      string with quotes around it is a character string, while one
 777 %      without is an identifier).
 778 %
 779 %    o result: 0 if we haven't reached EOS (end of string), and 1
 780 %      if we have.
 781 %
 782 */
 783
 784 #define IN_WHITE 0
 785 #define IN_TOKEN 1
 786 #define IN_QUOTE 2
 787 #define IN_OZONE 3
 788
 789 static ssize_t sindex(int c,const char *string)
 790 {
 791   register const char
 792     *p;
 793
 794   for (p=string; *p != '\0'; p++)
 795     if (c == (int) (*p))
 796       return((ssize_t) (p-string));
 797   return(-1);
 798 }
 799
 800 static void StoreToken(TokenInfo *token_info,char *string,
 801   size_t max_token_length,int c)
 802 {
 803   register ssize_t
 804     i;
 805
 806   if ((token_info->offset < 0) ||
 807       ((size_t) token_info->offset >= (max_token_length-1)))
 808     return;
 809   i=token_info->offset++;
 810   string[i]=(char) c;
 811   if (token_info->state == IN_QUOTE)
 812     return;
 813   switch (token_info->flag & 0x03)
 814   {
 815     case 1:
 816     {
 817       string[i]=(char) toupper(c);
 818       break;
 819     }
 820     case 2:
 821     {
 822       string[i]=(char) tolower(c);
 823       break;
 824     }
 825     default:
 826       break;
 827   }
 828 }
 829
 830 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 831   char *token,const size_t max_token_length,const char *line,const char *white,
 832   const char *break_set,const char *quote,const char escape,char *breaker,
 833   int *next,char *quoted)
 834 {
 835   int
 836     c;
 837
 838   register ssize_t
 839     i;
 840
 841   *breaker='\0';
 842   *quoted='\0';
 843   if (line[*next] == '\0')
 844     return(1);
 845   token_info->state=IN_WHITE;
 846   token_info->quote=(char) MagickFalse;
 847   token_info->flag=flag;
 848   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 849   {
 850     c=(int) line[*next];
 851     i=sindex(c,break_set);
 852     if (i >= 0)
 853       {
 854         switch (token_info->state)
 855         {
 856           case IN_WHITE:
 857           case IN_TOKEN:
 858           case IN_OZONE:
 859           {
 860             (*next)++;
 861             *breaker=break_set[i];
 862             token[token_info->offset]='\0';
 863             return(0);
 864           }
 865           case IN_QUOTE:
 866           {
 867             StoreToken(token_info,token,max_token_length,c);
 868             break;
 869           }
 870         }
 871         continue;
 872       }
 873     i=sindex(c,quote);
 874     if (i >= 0)
 875       {
 876         switch (token_info->state)
 877         {
 878           case IN_WHITE:
 879           {
 880             token_info->state=IN_QUOTE;
 881             token_info->quote=quote[i];
 882             *quoted=(char) MagickTrue;
 883             break;
 884           }
 885           case IN_QUOTE:
 886           {
 887             if (quote[i] != token_info->quote)
 888               StoreToken(token_info,token,max_token_length,c);
 889             else
 890               {
 891                 token_info->state=IN_OZONE;
 892                 token_info->quote='\0';
 893               }
 894             break;
 895           }
 896           case IN_TOKEN:
 897           case IN_OZONE:
 898           {
 899             *breaker=(char) c;
 900             token[token_info->offset]='\0';
 901             return(0);
 902           }
 903         }
 904         continue;
 905       }
 906     i=sindex(c,white);
 907     if (i >= 0)
 908       {
 909         switch (token_info->state)
 910         {
 911           case IN_WHITE:
 912           case IN_OZONE:
 913             break;
 914           case IN_TOKEN:
 915           {
 916             token_info->state=IN_OZONE;
 917             break;
 918           }
 919           case IN_QUOTE:
 920           {
 921             StoreToken(token_info,token,max_token_length,c);
 922             break;
 923           }
 924         }
 925         continue;
 926       }
 927     if (c == (int) escape)
 928       {
 929         if (line[(*next)+1] == '\0')
 930           {
 931             *breaker='\0';
 932             StoreToken(token_info,token,max_token_length,c);
 933             (*next)++;
 934             token[token_info->offset]='\0';
 935             return(0);
 936           }
 937         switch (token_info->state)
 938         {
 939           case IN_WHITE:
 940           {
 941             (*next)--;
 942             token_info->state=IN_TOKEN;
 943             break;
 944           }
 945           case IN_TOKEN:
 946           case IN_QUOTE:
 947           {
 948             (*next)++;
 949             c=(int) line[*next];
 950             StoreToken(token_info,token,max_token_length,c);
 951             break;
 952           }
 953           case IN_OZONE:
 954           {
 955             token[token_info->offset]='\0';
 956             return(0);
 957           }
 958         }
 959         continue;
 960       }
 961     switch (token_info->state)
 962     {
 963       case IN_WHITE:
 964         token_info->state=IN_TOKEN;
 965       case IN_TOKEN:
 966       case IN_QUOTE:
 967       {
 968         StoreToken(token_info,token,max_token_length,c);
 969         break;
 970       }
 971       case IN_OZONE:
 972       {
 973         token[token_info->offset]='\0';
 974         return(0);
 975       }
 976     }
 977   }
 978   token[token_info->offset]='\0';
 979   return(0);
 980 }