granicus.if.org Git - imagemagick/blob - magick/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                               John Cristy                                   %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    http://www.imagemagick.org/script/license.php                            %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "magick/studio.h"
  44 #include "magick/exception.h"
  45 #include "magick/exception-private.h"
  46 #include "magick/image.h"
  47 #include "magick/memory_.h"
  48 #include "magick/string_.h"
  49 #include "magick/token.h"
  50 #include "magick/token-private.h"
  51 #include "magick/utility.h"
  52 \f
  53 /*
  54   Typedef declaractions.
  55 */
  56 struct _TokenInfo
  57 {
  58   int
  59     state;
  60
  61   MagickStatusType
  62     flag;
  63
  64   long
  65     offset;
  66
  67   char
  68     quote;
  69
  70   unsigned long
  71     signature;
  72 };
  73 \f
  74 /*
  75 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  76 %                                                                             %
  77 %                                                                             %
  78 %                                                                             %
  79 %   A c q u i r e T o k e n I n f o                                           %
  80 %                                                                             %
  81 %                                                                             %
  82 %                                                                             %
  83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  84 %
  85 %  AcquireTokenInfo() allocates the TokenInfo structure.
  86 %
  87 %  The format of the AcquireTokenInfo method is:
  88 %
  89 %      TokenInfo *AcquireTokenInfo()
  90 %
  91 */
  92 MagickExport TokenInfo *AcquireTokenInfo(void)
  93 {
  94   TokenInfo
  95     *token_info;
  96
  97   token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
  98   if (token_info == (TokenInfo *) NULL)
  99     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
 100   token_info->signature=MagickSignature;
 101   return(token_info);
 102 }
 103 \f
 104 /*
 105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 106 %                                                                             %
 107 %                                                                             %
 108 %                                                                             %
 109 %   D e s t r o y T o k e n I n f o                                           %
 110 %                                                                             %
 111 %                                                                             %
 112 %                                                                             %
 113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 114 %
 115 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 116 %  structure.
 117 %
 118 %  The format of the DestroyTokenInfo method is:
 119 %
 120 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 121 %
 122 %  A description of each parameter follows:
 123 %
 124 %    o token_info: Specifies a pointer to an TokenInfo structure.
 125 %
 126 */
 127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 128 {
 129   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 130   assert(token_info != (TokenInfo *) NULL);
 131   assert(token_info->signature == MagickSignature);
 132   token_info->signature=(~MagickSignature);
 133   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 134   return(token_info);
 135 }
 136 \f
 137 /*
 138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 139 %                                                                             %
 140 %                                                                             %
 141 %                                                                             %
 142 +   G e t M a g i c k T o k e n                                               %
 143 %                                                                             %
 144 %                                                                             %
 145 %                                                                             %
 146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 147 %
 148 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
 149 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
 150 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 151 %  parenthesis (e.g. rgb(0,0,0)).
 152 %
 153 %  The format of the GetMagickToken method is:
 154 %
 155 %      void GetMagickToken(const char *start,const char **end,char *token)
 156 %
 157 %  A description of each parameter follows:
 158 %
 159 %    o start: the start of the token sequence.
 160 %
 161 %    o end: point to the end of the token sequence.
 162 %
 163 %    o token: copy the token to this buffer.
 164 %
 165 */
 166 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
 167 {
 168   double
 169     value;
 170
 171   register const char
 172     *p;
 173
 174   register long
 175     i;
 176
 177   i=0;
 178   for (p=start; *p != '\0'; )
 179   {
 180     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 181       p++;
 182     if (*p == '\0')
 183       break;
 184     switch (*p)
 185     {
 186       case '"':
 187       case '\'':
 188       case '`':
 189       case '{':
 190       {
 191         register char
 192           escape;
 193
 194         switch (*p)
 195         {
 196           case '"': escape='"'; break;
 197           case '\'': escape='\''; break;
 198           case '`': escape='\''; break;
 199           case '{': escape='}'; break;
 200           default: escape=(*p); break;
 201         }
 202         for (p++; *p != '\0'; p++)
 203         {
 204           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 205             p++;
 206           else
 207             if (*p == escape)
 208               {
 209                 p++;
 210                 break;
 211               }
 212           token[i++]=(*p);
 213         }
 214         break;
 215       }
 216       case '/':
 217       {
 218         token[i++]=(*p++);
 219         if ((*p == '>') || (*p == '/'))
 220           token[i++]=(*p++);
 221         break;
 222       }
 223       default:
 224       {
 225         char
 226           *q;
 227
 228         value=strtod(p,&q);
 229         if ((p != q) && (*p != ','))
 230           {
 231             for ( ; (p < q) && (*p != ','); p++)
 232               token[i++]=(*p);
 233             if (*p == '%')
 234               token[i++]=(*p++);
 235             break;
 236           }
 237         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
 238             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 239           {
 240             token[i++]=(*p++);
 241             break;
 242           }
 243         for ( ; *p != '\0'; p++)
 244         {
 245           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 246               (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
 247             break;
 248           if ((i > 0) && (*p == '<'))
 249             break;
 250           token[i++]=(*p);
 251           if (*p == '>')
 252             break;
 253           if (*p == '(')
 254             for (p++; *p != '\0'; p++)
 255             {
 256               token[i++]=(*p);
 257               if ((*p == ')') && (*(p-1) != '\\'))
 258                 break;
 259             }
 260         }
 261         break;
 262       }
 263     }
 264     break;
 265   }
 266   token[i]='\0';
 267   if (LocaleNCompare(token,"url(",4) == 0)
 268     {
 269       ssize_t
 270         offset;
 271
 272       offset=4;
 273       if (token[offset] == '#')
 274         offset++;
 275       i=(long) strlen(token);
 276       (void) CopyMagickString(token,token+offset,MaxTextExtent);
 277       token[i-offset-1]='\0';
 278     }
 279   while (isspace((int) ((unsigned char) *p)) != 0)
 280     p++;
 281   if (end != (const char **) NULL)
 282     *end=(const char *) p;
 283 }
 284 \f
 285 /*
 286 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 287 %                                                                             %
 288 %                                                                             %
 289 %                                                                             %
 290 %   G l o b E x p r e s s i o n                                               %
 291 %                                                                             %
 292 %                                                                             %
 293 %                                                                             %
 294 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 295 %
 296 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 297 %
 298 %  The format of the GlobExpression function is:
 299 %
 300 %      MagickBooleanType GlobExpression(const char *expression,
 301 %        const char *pattern,const MagickBooleanType case_insensitive)
 302 %
 303 %  A description of each parameter follows:
 304 %
 305 %    o expression: Specifies a pointer to a text string containing a file name.
 306 %
 307 %    o pattern: Specifies a pointer to a text string containing a pattern.
 308 %
 309 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 310 %      an expression.
 311 %
 312 */
 313 MagickExport MagickBooleanType GlobExpression(const char *expression,
 314   const char *pattern,const MagickBooleanType case_insensitive)
 315 {
 316   MagickBooleanType
 317     done,
 318     match;
 319
 320   register const char
 321     *p;
 322
 323   /*
 324     Return on empty pattern or '*'.
 325   */
 326   if (pattern == (char *) NULL)
 327     return(MagickTrue);
 328   if (GetUTFCode(pattern) == 0)
 329     return(MagickTrue);
 330   if (LocaleCompare(pattern,"*") == 0)
 331     return(MagickTrue);
 332   p=pattern+strlen(pattern)-1;
 333   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 334     {
 335       ExceptionInfo
 336         *exception;
 337
 338       ImageInfo
 339         *image_info;
 340
 341       /*
 342         Determine if pattern is a scene, i.e. img0001.pcd[2].
 343       */
 344       image_info=AcquireImageInfo();
 345       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
 346       exception=AcquireExceptionInfo();
 347       (void) SetImageInfo(image_info,0,exception);
 348       exception=DestroyExceptionInfo(exception);
 349       if (LocaleCompare(image_info->filename,pattern) != 0)
 350         {
 351           image_info=DestroyImageInfo(image_info);
 352           return(MagickFalse);
 353         }
 354       image_info=DestroyImageInfo(image_info);
 355     }
 356   /*
 357     Evaluate glob expression.
 358   */
 359   done=MagickFalse;
 360   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 361   {
 362     if (GetUTFCode(expression) == 0)
 363       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 364         break;
 365     switch (GetUTFCode(pattern))
 366     {
 367       case '*':
 368       {
 369         MagickBooleanType
 370           status;
 371
 372         status=MagickFalse;
 373         pattern+=GetUTFOctets(pattern);
 374         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 375         {
 376           status=GlobExpression(expression,pattern,case_insensitive);
 377           expression+=GetUTFOctets(expression);
 378         }
 379         if (status != MagickFalse)
 380           {
 381             while (GetUTFCode(expression) != 0)
 382               expression+=GetUTFOctets(expression);
 383             while (GetUTFCode(pattern) != 0)
 384               pattern+=GetUTFOctets(pattern);
 385           }
 386         break;
 387       }
 388       case '[':
 389       {
 390         long
 391           c;
 392
 393         pattern+=GetUTFOctets(pattern);
 394         for ( ; ; )
 395         {
 396           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 397             {
 398               done=MagickTrue;
 399               break;
 400             }
 401           if (GetUTFCode(pattern) == '\\')
 402             {
 403               pattern+=GetUTFOctets(pattern);
 404               if (GetUTFCode(pattern) == 0)
 405                 {
 406                   done=MagickTrue;
 407                   break;
 408                 }
 409              }
 410           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 411             {
 412               c=GetUTFCode(pattern);
 413               pattern+=GetUTFOctets(pattern);
 414               pattern+=GetUTFOctets(pattern);
 415               if (GetUTFCode(pattern) == ']')
 416                 {
 417                   done=MagickTrue;
 418                   break;
 419                 }
 420               if (GetUTFCode(pattern) == '\\')
 421                 {
 422                   pattern+=GetUTFOctets(pattern);
 423                   if (GetUTFCode(pattern) == 0)
 424                     {
 425                       done=MagickTrue;
 426                       break;
 427                     }
 428                 }
 429               if ((GetUTFCode(expression) < c) ||
 430                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 431                 {
 432                   pattern+=GetUTFOctets(pattern);
 433                   continue;
 434                 }
 435             }
 436           else
 437             if (GetUTFCode(pattern) != GetUTFCode(expression))
 438               {
 439                 pattern+=GetUTFOctets(pattern);
 440                 continue;
 441               }
 442           pattern+=GetUTFOctets(pattern);
 443           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 444           {
 445             if ((GetUTFCode(pattern) == '\\') &&
 446                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 447               pattern+=GetUTFOctets(pattern);
 448             pattern+=GetUTFOctets(pattern);
 449           }
 450           if (GetUTFCode(pattern) != 0)
 451             {
 452               pattern+=GetUTFOctets(pattern);
 453               expression+=GetUTFOctets(expression);
 454             }
 455           break;
 456         }
 457         break;
 458       }
 459       case '?':
 460       {
 461         pattern+=GetUTFOctets(pattern);
 462         expression+=GetUTFOctets(expression);
 463         break;
 464       }
 465       case '{':
 466       {
 467         register const char
 468           *p;
 469
 470         pattern+=GetUTFOctets(pattern);
 471         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 472         {
 473           p=expression;
 474           match=MagickTrue;
 475           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
 476                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
 477                  (match != MagickFalse))
 478           {
 479             if (GetUTFCode(pattern) == '\\')
 480               pattern+=GetUTFOctets(pattern);
 481             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
 482               MagickFalse;
 483             p+=GetUTFOctets(p);
 484             pattern+=GetUTFOctets(pattern);
 485           }
 486           if (GetUTFCode(pattern) == 0)
 487             {
 488               match=MagickFalse;
 489               done=MagickTrue;
 490               break;
 491             }
 492           else
 493             if (match != MagickFalse)
 494               {
 495                 expression=p;
 496                 while ((GetUTFCode(pattern) != '}') &&
 497                        (GetUTFCode(pattern) != 0))
 498                 {
 499                   pattern+=GetUTFOctets(pattern);
 500                   if (GetUTFCode(pattern) == '\\')
 501                     {
 502                       pattern+=GetUTFOctets(pattern);
 503                       if (GetUTFCode(pattern) == '}')
 504                         pattern+=GetUTFOctets(pattern);
 505                     }
 506                 }
 507               }
 508             else
 509               {
 510                 while ((GetUTFCode(pattern) != '}') &&
 511                        (GetUTFCode(pattern) != ',') &&
 512                        (GetUTFCode(pattern) != 0))
 513                 {
 514                   pattern+=GetUTFOctets(pattern);
 515                   if (GetUTFCode(pattern) == '\\')
 516                     {
 517                       pattern+=GetUTFOctets(pattern);
 518                       if ((GetUTFCode(pattern) == '}') ||
 519                           (GetUTFCode(pattern) == ','))
 520                         pattern+=GetUTFOctets(pattern);
 521                     }
 522                 }
 523               }
 524             if (GetUTFCode(pattern) != 0)
 525               pattern+=GetUTFOctets(pattern);
 526           }
 527         break;
 528       }
 529       case '\\':
 530       {
 531         pattern+=GetUTFOctets(pattern);
 532         if (GetUTFCode(pattern) != 0)
 533           pattern+=GetUTFOctets(pattern);
 534       }
 535       default:
 536       {
 537         if (case_insensitive != MagickFalse)
 538           {
 539             if (tolower((int) GetUTFCode(expression)) !=
 540                 tolower((int) GetUTFCode(pattern)))
 541               {
 542                 done=MagickTrue;
 543                 break;
 544               }
 545           }
 546         else
 547           if (GetUTFCode(expression) != GetUTFCode(pattern))
 548             {
 549               done=MagickTrue;
 550               break;
 551             }
 552         expression+=GetUTFOctets(expression);
 553         pattern+=GetUTFOctets(pattern);
 554       }
 555     }
 556   }
 557   while (GetUTFCode(pattern) == '*')
 558     pattern+=GetUTFOctets(pattern);
 559   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 560     MagickTrue : MagickFalse;
 561   return(match);
 562 }
 563 \f
 564 /*
 565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 566 %                                                                             %
 567 %                                                                             %
 568 %                                                                             %
 569 +     I s G l o b                                                             %
 570 %                                                                             %
 571 %                                                                             %
 572 %                                                                             %
 573 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 574 %
 575 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 576 %  pattern.
 577 %
 578 %  The format of the IsGlob method is:
 579 %
 580 %      MagickBooleanType IsGlob(const char *geometry)
 581 %
 582 %  A description of each parameter follows:
 583 %
 584 %    o path: the path.
 585 %
 586 */
 587 MagickExport MagickBooleanType IsGlob(const char *path)
 588 {
 589   MagickBooleanType
 590     status;
 591
 592   if (IsPathAccessible(path) != MagickFalse)
 593     return(MagickFalse);
 594   status=(strchr(path,'*') != (char *) NULL) ||
 595     (strchr(path,'?') != (char *) NULL) ||
 596     (strchr(path,'{') != (char *) NULL) ||
 597     (strchr(path,'}') != (char *) NULL) ||
 598     (strchr(path,'[') != (char *) NULL) ||
 599     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
 600   return(status);
 601 }
 602 \f
 603 /*
 604 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 605 %                                                                             %
 606 %                                                                             %
 607 %                                                                             %
 608 %   T o k e n i z e r                                                         %
 609 %                                                                             %
 610 %                                                                             %
 611 %                                                                             %
 612 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 613 %
 614 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 615 %  one at a time from a string of characters.  The characters used for white
 616 %  space, for break characters, and for quotes can be specified.  Also,
 617 %  characters in the string can be preceded by a specifiable escape character
 618 %  which removes any special meaning the character may have.
 619 %
 620 %  Here is some terminology:
 621 %
 622 %    o token: A single unit of information in the form of a group of
 623 %      characters.
 624 %
 625 %    o white space: Apace that gets ignored (except within quotes or when
 626 %      escaped), like blanks and tabs. in addition, white space terminates a
 627 %      non-quoted token.
 628 %
 629 %    o break set: One or more characters that separates non-quoted tokens.
 630 %      Commas are a common break character. The usage of break characters to
 631 %      signal the end of a token is the same as that of white space, except
 632 %      multiple break characters with nothing or only white space between
 633 %      generate a null token for each two break characters together.
 634 %
 635 %      For example, if blank is set to be the white space and comma is set to
 636 %      be the break character, the line
 637 %
 638 %        A, B, C ,  , DEF
 639 %
 640 %        ... consists of 5 tokens:
 641 %
 642 %        1)  "A"
 643 %        2)  "B"
 644 %        3)  "C"
 645 %        4)  "" (the null string)
 646 %        5)  "DEF"
 647 %
 648 %    o Quote character: A character that, when surrounding a group of other
 649 %      characters, causes the group of characters to be treated as a single
 650 %      token, no matter how many white spaces or break characters exist in
 651 %      the group. Also, a token always terminates after the closing quote.
 652 %      For example, if ' is the quote character, blank is white space, and
 653 %      comma is the break character, the following string
 654 %
 655 %        A, ' B, CD'EF GHI
 656 %
 657 %        ... consists of 4 tokens:
 658 %
 659 %        1)  "A"
 660 %        2)  " B, CD" (note the blanks & comma)
 661 %        3)  "EF"
 662 %        4)  "GHI"
 663 %
 664 %      The quote characters themselves do not appear in the resultant
 665 %      tokens.  The double quotes are delimiters i use here for
 666 %      documentation purposes only.
 667 %
 668 %    o Escape character: A character which itself is ignored but which
 669 %      causes the next character to be used as is.  ^ and \ are often used
 670 %      as escape characters. An escape in the last position of the string
 671 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 672 %      and non-escape) character. For example, assume white space, break
 673 %      character, and quote are the same as in the above examples, and
 674 %      further, assume that ^ is the escape character. Then, in the string
 675 %
 676 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 677 %
 678 %        ... there are 7 tokens:
 679 %
 680 %        1)  "ABC"
 681 %        2)  " DEF ' GH"
 682 %        3)  "I"
 683 %        4)  " "     (a lone blank)
 684 %        5)  "J"
 685 %        6)  "K L"
 686 %        7)  "^"     (passed as is at end of line)
 687 %
 688 %  The format of the Tokenizer method is:
 689 %
 690 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 691 %        const size_t max_token_length,const char *line,const char *white,
 692 %        const char *break_set,const char *quote,const char escape,
 693 %        char *breaker,int *next,char *quoted)
 694 %
 695 %  A description of each parameter follows:
 696 %
 697 %    o flag: right now, only the low order 3 bits are used.
 698 %
 699 %        1 => convert non-quoted tokens to upper case
 700 %        2 => convert non-quoted tokens to lower case
 701 %        0 => do not convert non-quoted tokens
 702 %
 703 %    o token: a character string containing the returned next token
 704 %
 705 %    o max_token_length: the maximum size of "token".  Characters beyond
 706 %      "max_token_length" are truncated.
 707 %
 708 %    o string: the string to be parsed.
 709 %
 710 %    o white: a string of the valid white spaces.  example:
 711 %
 712 %        char whitesp[]={" \t"};
 713 %
 714 %      blank and tab will be valid white space.
 715 %
 716 %    o break: a string of the valid break characters. example:
 717 %
 718 %        char breakch[]={";,"};
 719 %
 720 %      semicolon and comma will be valid break characters.
 721 %
 722 %    o quote: a string of the valid quote characters. An example would be
 723 %
 724 %        char whitesp[]={"'\"");
 725 %
 726 %      (this causes single and double quotes to be valid) Note that a
 727 %      token starting with one of these characters needs the same quote
 728 %      character to terminate it.
 729 %
 730 %      for example:
 731 %
 732 %        "ABC '
 733 %
 734 %      is unterminated, but
 735 %
 736 %        "DEF" and 'GHI'
 737 %
 738 %      are properly terminated.  Note that different quote characters
 739 %      can appear on the same line; only for a given token do the quote
 740 %      characters have to be the same.
 741 %
 742 %    o escape: the escape character (NOT a string ... only one
 743 %      allowed). Use zero if none is desired.
 744 %
 745 %    o breaker: the break character used to terminate the current
 746 %      token.  If the token was quoted, this will be the quote used.  If
 747 %      the token is the last one on the line, this will be zero.
 748 %
 749 %    o next: this variable points to the first character of the
 750 %      next token.  it gets reset by "tokenizer" as it steps through the
 751 %      string.  Set it to 0 upon initialization, and leave it alone
 752 %      after that.  You can change it if you want to jump around in the
 753 %      string or re-parse from the beginning, but be careful.
 754 %
 755 %    o quoted: set to True if the token was quoted and MagickFalse
 756 %      if not.  You may need this information (for example:  in C, a
 757 %      string with quotes around it is a character string, while one
 758 %      without is an identifier).
 759 %
 760 %    o result: 0 if we haven't reached EOS (end of string), and 1
 761 %      if we have.
 762 %
 763 */
 764
 765 #define IN_WHITE 0
 766 #define IN_TOKEN 1
 767 #define IN_QUOTE 2
 768 #define IN_OZONE 3
 769
 770 static long sindex(int c,const char *string)
 771 {
 772   register const char
 773     *p;
 774
 775   for (p=string; *p != '\0'; p++)
 776     if (c == (int) (*p))
 777       return(p-string);
 778   return(-1);
 779 }
 780
 781 static void StoreToken(TokenInfo *token_info,char *string,
 782   size_t max_token_length,int c)
 783 {
 784   register long
 785     i;
 786
 787   if ((token_info->offset < 0) ||
 788       ((size_t) token_info->offset >= (max_token_length-1)))
 789     return;
 790   i=token_info->offset++;
 791   string[i]=(char) c;
 792   if (token_info->state == IN_QUOTE)
 793     return;
 794   switch (token_info->flag & 0x03)
 795   {
 796     case 1:
 797     {
 798       string[i]=(char) toupper(c);
 799       break;
 800     }
 801     case 2:
 802     {
 803       string[i]=(char) tolower(c);
 804       break;
 805     }
 806     default:
 807       break;
 808   }
 809 }
 810
 811 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 812   char *token,const size_t max_token_length,const char *line,const char *white,
 813   const char *break_set,const char *quote,const char escape,char *breaker,
 814   int *next,char *quoted)
 815 {
 816   int
 817     c;
 818
 819   register long
 820     i;
 821
 822   *breaker='\0';
 823   *quoted='\0';
 824   if (line[*next] == '\0')
 825     return(1);
 826   token_info->state=IN_WHITE;
 827   token_info->quote=(char) MagickFalse;
 828   token_info->flag=flag;
 829   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 830   {
 831     c=(int) line[*next];
 832     i=sindex(c,break_set);
 833     if (i >= 0)
 834       {
 835         switch (token_info->state)
 836         {
 837           case IN_WHITE:
 838           case IN_TOKEN:
 839           case IN_OZONE:
 840           {
 841             (*next)++;
 842             *breaker=break_set[i];
 843             token[token_info->offset]='\0';
 844             return(0);
 845           }
 846           case IN_QUOTE:
 847           {
 848             StoreToken(token_info,token,max_token_length,c);
 849             break;
 850           }
 851         }
 852         continue;
 853       }
 854     i=sindex(c,quote);
 855     if (i >= 0)
 856       {
 857         switch (token_info->state)
 858         {
 859           case IN_WHITE:
 860           {
 861             token_info->state=IN_QUOTE;
 862             token_info->quote=quote[i];
 863             *quoted=(char) MagickTrue;
 864             break;
 865           }
 866           case IN_QUOTE:
 867           {
 868             if (quote[i] != token_info->quote)
 869               StoreToken(token_info,token,max_token_length,c);
 870             else
 871               {
 872                 token_info->state=IN_OZONE;
 873                 token_info->quote='\0';
 874               }
 875             break;
 876           }
 877           case IN_TOKEN:
 878           case IN_OZONE:
 879           {
 880             *breaker=(char) c;
 881             token[token_info->offset]='\0';
 882             return(0);
 883           }
 884         }
 885         continue;
 886       }
 887     i=sindex(c,white);
 888     if (i >= 0)
 889       {
 890         switch (token_info->state)
 891         {
 892           case IN_WHITE:
 893           case IN_OZONE:
 894             break;
 895           case IN_TOKEN:
 896           {
 897             token_info->state=IN_OZONE;
 898             break;
 899           }
 900           case IN_QUOTE:
 901           {
 902             StoreToken(token_info,token,max_token_length,c);
 903             break;
 904           }
 905         }
 906         continue;
 907       }
 908     if (c == (int) escape)
 909       {
 910         if (line[(*next)+1] == '\0')
 911           {
 912             *breaker='\0';
 913             StoreToken(token_info,token,max_token_length,c);
 914             (*next)++;
 915             token[token_info->offset]='\0';
 916             return(0);
 917           }
 918         switch (token_info->state)
 919         {
 920           case IN_WHITE:
 921           {
 922             (*next)--;
 923             token_info->state=IN_TOKEN;
 924             break;
 925           }
 926           case IN_TOKEN:
 927           case IN_QUOTE:
 928           {
 929             (*next)++;
 930             c=(int) line[*next];
 931             StoreToken(token_info,token,max_token_length,c);
 932             break;
 933           }
 934           case IN_OZONE:
 935           {
 936             token[token_info->offset]='\0';
 937             return(0);
 938           }
 939         }
 940         continue;
 941       }
 942     switch (token_info->state)
 943     {
 944       case IN_WHITE:
 945         token_info->state=IN_TOKEN;
 946       case IN_TOKEN:
 947       case IN_QUOTE:
 948       {
 949         StoreToken(token_info,token,max_token_length,c);
 950         break;
 951       }
 952       case IN_OZONE:
 953       {
 954         token[token_info->offset]='\0';
 955         return(0);
 956       }
 957     }
 958   }
 959   token[token_info->offset]='\0';
 960   return(0);
 961 }