granicus.if.org Git - imagemagick/blob - magick/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                               John Cristy                                   %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    http://www.imagemagick.org/script/license.php                            %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "magick/studio.h"
  44 #include "magick/exception.h"
  45 #include "magick/exception-private.h"
  46 #include "magick/image.h"
  47 #include "magick/memory_.h"
  48 #include "magick/string_.h"
  49 #include "magick/token.h"
  50 #include "magick/token-private.h"
  51 #include "magick/utility.h"
  52 \f
  53 /*
  54   Typedef declaractions.
  55 */
  56 struct _TokenInfo
  57 {
  58   int
  59     state;
  60
  61   MagickStatusType
  62     flag;
  63
  64   ssize_t
  65     offset;
  66
  67   char
  68     quote;
  69
  70   size_t
  71     signature;
  72 };
  73 \f
  74 /*
  75 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  76 %                                                                             %
  77 %                                                                             %
  78 %                                                                             %
  79 %   A c q u i r e T o k e n I n f o                                           %
  80 %                                                                             %
  81 %                                                                             %
  82 %                                                                             %
  83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  84 %
  85 %  AcquireTokenInfo() allocates the TokenInfo structure.
  86 %
  87 %  The format of the AcquireTokenInfo method is:
  88 %
  89 %      TokenInfo *AcquireTokenInfo()
  90 %
  91 */
  92 MagickExport TokenInfo *AcquireTokenInfo(void)
  93 {
  94   TokenInfo
  95     *token_info;
  96
  97   token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
  98   if (token_info == (TokenInfo *) NULL)
  99     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
 100   token_info->signature=MagickSignature;
 101   return(token_info);
 102 }
 103 \f
 104 /*
 105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 106 %                                                                             %
 107 %                                                                             %
 108 %                                                                             %
 109 %   D e s t r o y T o k e n I n f o                                           %
 110 %                                                                             %
 111 %                                                                             %
 112 %                                                                             %
 113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 114 %
 115 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 116 %  structure.
 117 %
 118 %  The format of the DestroyTokenInfo method is:
 119 %
 120 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 121 %
 122 %  A description of each parameter follows:
 123 %
 124 %    o token_info: Specifies a pointer to an TokenInfo structure.
 125 %
 126 */
 127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 128 {
 129   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 130   assert(token_info != (TokenInfo *) NULL);
 131   assert(token_info->signature == MagickSignature);
 132   token_info->signature=(~MagickSignature);
 133   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 134   return(token_info);
 135 }
 136 \f
 137 /*
 138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 139 %                                                                             %
 140 %                                                                             %
 141 %                                                                             %
 142 +   G e t M a g i c k T o k e n                                               %
 143 %                                                                             %
 144 %                                                                             %
 145 %                                                                             %
 146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 147 %
 148 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
 149 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
 150 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 151 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
 152 %  separator characters: ':', '=', ',', and ';'.
 153 %
 154 %  The format of the GetMagickToken method is:
 155 %
 156 %      void GetMagickToken(const char *start,const char **end,char *token)
 157 %
 158 %  A description of each parameter follows:
 159 %
 160 %    o start: the start of the token sequence.
 161 %
 162 %    o end: point to the end of the token sequence.
 163 %
 164 %    o token: copy the token to this buffer.
 165 %
 166 */
 167 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
 168 {
 169   double
 170     value;
 171
 172   register const char
 173     *p;
 174
 175   register ssize_t
 176     i;
 177
 178   i=0;
 179   for (p=start; *p != '\0'; )
 180   {
 181     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 182       p++;
 183     if (*p == '\0')
 184       break;
 185     switch (*p)
 186     {
 187       case '"':
 188       case '\'':
 189       case '`':
 190       case '{':
 191       {
 192         register char
 193           escape;
 194
 195         switch (*p)
 196         {
 197           case '"': escape='"'; break;
 198           case '\'': escape='\''; break;
 199           case '`': escape='\''; break;
 200           case '{': escape='}'; break;
 201           default: escape=(*p); break;
 202         }
 203         for (p++; *p != '\0'; p++)
 204         {
 205           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 206             p++;
 207           else
 208             if (*p == escape)
 209               {
 210                 p++;
 211                 break;
 212               }
 213           token[i++]=(*p);
 214         }
 215         break;
 216       }
 217       case '/':
 218       {
 219         token[i++]=(*p++);
 220         if ((*p == '>') || (*p == '/'))
 221           token[i++]=(*p++);
 222         break;
 223       }
 224       default:
 225       {
 226         char
 227           *q;
 228
 229         value=strtod(p,&q);
 230         if ((p != q) && (*p != ','))
 231           {
 232             for ( ; (p < q) && (*p != ','); p++)
 233               token[i++]=(*p);
 234             if (*p == '%')
 235               token[i++]=(*p++);
 236             break;
 237           }
 238         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
 239             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 240           {
 241             token[i++]=(*p++);
 242             break;
 243           }
 244         for ( ; *p != '\0'; p++)
 245         {
 246           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 247               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
 248             break;
 249           if ((i > 0) && (*p == '<'))
 250             break;
 251           token[i++]=(*p);
 252           if (*p == '>')
 253             break;
 254           if (*p == '(')
 255             for (p++; *p != '\0'; p++)
 256             {
 257               token[i++]=(*p);
 258               if ((*p == ')') && (*(p-1) != '\\'))
 259                 break;
 260             }
 261         }
 262         break;
 263       }
 264     }
 265     break;
 266   }
 267   token[i]='\0';
 268   if (LocaleNCompare(token,"url(",4) == 0)
 269     {
 270       ssize_t
 271         offset;
 272
 273       offset=4;
 274       if (token[offset] == '#')
 275         offset++;
 276       i=(ssize_t) strlen(token);
 277       (void) CopyMagickString(token,token+offset,MaxTextExtent);
 278       token[i-offset-1]='\0';
 279     }
 280   while (isspace((int) ((unsigned char) *p)) != 0)
 281     p++;
 282   if (end != (const char **) NULL)
 283     *end=(const char *) p;
 284 }
 285 \f
 286 /*
 287 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 288 %                                                                             %
 289 %                                                                             %
 290 %                                                                             %
 291 %   G l o b E x p r e s s i o n                                               %
 292 %                                                                             %
 293 %                                                                             %
 294 %                                                                             %
 295 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 296 %
 297 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 298 %
 299 %  The format of the GlobExpression function is:
 300 %
 301 %      MagickBooleanType GlobExpression(const char *expression,
 302 %        const char *pattern,const MagickBooleanType case_insensitive)
 303 %
 304 %  A description of each parameter follows:
 305 %
 306 %    o expression: Specifies a pointer to a text string containing a file name.
 307 %
 308 %    o pattern: Specifies a pointer to a text string containing a pattern.
 309 %
 310 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 311 %      an expression.
 312 %
 313 */
 314 MagickExport MagickBooleanType GlobExpression(const char *expression,
 315   const char *pattern,const MagickBooleanType case_insensitive)
 316 {
 317   MagickBooleanType
 318     done,
 319     match;
 320
 321   register const char
 322     *p;
 323
 324   /*
 325     Return on empty pattern or '*'.
 326   */
 327   if (pattern == (char *) NULL)
 328     return(MagickTrue);
 329   if (GetUTFCode(pattern) == 0)
 330     return(MagickTrue);
 331   if (LocaleCompare(pattern,"*") == 0)
 332     return(MagickTrue);
 333   p=pattern+strlen(pattern)-1;
 334   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 335     {
 336       ExceptionInfo
 337         *exception;
 338
 339       ImageInfo
 340         *image_info;
 341
 342       /*
 343         Determine if pattern is a scene, i.e. img0001.pcd[2].
 344       */
 345       image_info=AcquireImageInfo();
 346       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
 347       exception=AcquireExceptionInfo();
 348       (void) SetImageInfo(image_info,0,exception);
 349       exception=DestroyExceptionInfo(exception);
 350       if (LocaleCompare(image_info->filename,pattern) != 0)
 351         {
 352           image_info=DestroyImageInfo(image_info);
 353           return(MagickFalse);
 354         }
 355       image_info=DestroyImageInfo(image_info);
 356     }
 357   /*
 358     Evaluate glob expression.
 359   */
 360   done=MagickFalse;
 361   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 362   {
 363     if (GetUTFCode(expression) == 0)
 364       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 365         break;
 366     switch (GetUTFCode(pattern))
 367     {
 368       case '*':
 369       {
 370         MagickBooleanType
 371           status;
 372
 373         status=MagickFalse;
 374         pattern+=GetUTFOctets(pattern);
 375         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 376         {
 377           status=GlobExpression(expression,pattern,case_insensitive);
 378           expression+=GetUTFOctets(expression);
 379         }
 380         if (status != MagickFalse)
 381           {
 382             while (GetUTFCode(expression) != 0)
 383               expression+=GetUTFOctets(expression);
 384             while (GetUTFCode(pattern) != 0)
 385               pattern+=GetUTFOctets(pattern);
 386           }
 387         break;
 388       }
 389       case '[':
 390       {
 391         ssize_t
 392           c;
 393
 394         pattern+=GetUTFOctets(pattern);
 395         for ( ; ; )
 396         {
 397           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 398             {
 399               done=MagickTrue;
 400               break;
 401             }
 402           if (GetUTFCode(pattern) == '\\')
 403             {
 404               pattern+=GetUTFOctets(pattern);
 405               if (GetUTFCode(pattern) == 0)
 406                 {
 407                   done=MagickTrue;
 408                   break;
 409                 }
 410              }
 411           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 412             {
 413               c=GetUTFCode(pattern);
 414               pattern+=GetUTFOctets(pattern);
 415               pattern+=GetUTFOctets(pattern);
 416               if (GetUTFCode(pattern) == ']')
 417                 {
 418                   done=MagickTrue;
 419                   break;
 420                 }
 421               if (GetUTFCode(pattern) == '\\')
 422                 {
 423                   pattern+=GetUTFOctets(pattern);
 424                   if (GetUTFCode(pattern) == 0)
 425                     {
 426                       done=MagickTrue;
 427                       break;
 428                     }
 429                 }
 430               if ((GetUTFCode(expression) < c) ||
 431                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 432                 {
 433                   pattern+=GetUTFOctets(pattern);
 434                   continue;
 435                 }
 436             }
 437           else
 438             if (GetUTFCode(pattern) != GetUTFCode(expression))
 439               {
 440                 pattern+=GetUTFOctets(pattern);
 441                 continue;
 442               }
 443           pattern+=GetUTFOctets(pattern);
 444           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 445           {
 446             if ((GetUTFCode(pattern) == '\\') &&
 447                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 448               pattern+=GetUTFOctets(pattern);
 449             pattern+=GetUTFOctets(pattern);
 450           }
 451           if (GetUTFCode(pattern) != 0)
 452             {
 453               pattern+=GetUTFOctets(pattern);
 454               expression+=GetUTFOctets(expression);
 455             }
 456           break;
 457         }
 458         break;
 459       }
 460       case '?':
 461       {
 462         pattern+=GetUTFOctets(pattern);
 463         expression+=GetUTFOctets(expression);
 464         break;
 465       }
 466       case '{':
 467       {
 468         register const char
 469           *p;
 470
 471         pattern+=GetUTFOctets(pattern);
 472         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 473         {
 474           p=expression;
 475           match=MagickTrue;
 476           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
 477                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
 478                  (match != MagickFalse))
 479           {
 480             if (GetUTFCode(pattern) == '\\')
 481               pattern+=GetUTFOctets(pattern);
 482             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
 483               MagickFalse;
 484             p+=GetUTFOctets(p);
 485             pattern+=GetUTFOctets(pattern);
 486           }
 487           if (GetUTFCode(pattern) == 0)
 488             {
 489               match=MagickFalse;
 490               done=MagickTrue;
 491               break;
 492             }
 493           else
 494             if (match != MagickFalse)
 495               {
 496                 expression=p;
 497                 while ((GetUTFCode(pattern) != '}') &&
 498                        (GetUTFCode(pattern) != 0))
 499                 {
 500                   pattern+=GetUTFOctets(pattern);
 501                   if (GetUTFCode(pattern) == '\\')
 502                     {
 503                       pattern+=GetUTFOctets(pattern);
 504                       if (GetUTFCode(pattern) == '}')
 505                         pattern+=GetUTFOctets(pattern);
 506                     }
 507                 }
 508               }
 509             else
 510               {
 511                 while ((GetUTFCode(pattern) != '}') &&
 512                        (GetUTFCode(pattern) != ',') &&
 513                        (GetUTFCode(pattern) != 0))
 514                 {
 515                   pattern+=GetUTFOctets(pattern);
 516                   if (GetUTFCode(pattern) == '\\')
 517                     {
 518                       pattern+=GetUTFOctets(pattern);
 519                       if ((GetUTFCode(pattern) == '}') ||
 520                           (GetUTFCode(pattern) == ','))
 521                         pattern+=GetUTFOctets(pattern);
 522                     }
 523                 }
 524               }
 525             if (GetUTFCode(pattern) != 0)
 526               pattern+=GetUTFOctets(pattern);
 527           }
 528         break;
 529       }
 530       case '\\':
 531       {
 532         pattern+=GetUTFOctets(pattern);
 533         if (GetUTFCode(pattern) == 0)
 534           break;
 535       }
 536       default:
 537       {
 538         if (case_insensitive != MagickFalse)
 539           {
 540             if (tolower((int) GetUTFCode(expression)) !=
 541                 tolower((int) GetUTFCode(pattern)))
 542               {
 543                 done=MagickTrue;
 544                 break;
 545               }
 546           }
 547         else
 548           if (GetUTFCode(expression) != GetUTFCode(pattern))
 549             {
 550               done=MagickTrue;
 551               break;
 552             }
 553         expression+=GetUTFOctets(expression);
 554         pattern+=GetUTFOctets(pattern);
 555       }
 556     }
 557   }
 558   while (GetUTFCode(pattern) == '*')
 559     pattern+=GetUTFOctets(pattern);
 560   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 561     MagickTrue : MagickFalse;
 562   return(match);
 563 }
 564 \f
 565 /*
 566 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 567 %                                                                             %
 568 %                                                                             %
 569 %                                                                             %
 570 +     I s G l o b                                                             %
 571 %                                                                             %
 572 %                                                                             %
 573 %                                                                             %
 574 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 575 %
 576 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 577 %  pattern.
 578 %
 579 %  The format of the IsGlob method is:
 580 %
 581 %      MagickBooleanType IsGlob(const char *geometry)
 582 %
 583 %  A description of each parameter follows:
 584 %
 585 %    o path: the path.
 586 %
 587 */
 588 MagickExport MagickBooleanType IsGlob(const char *path)
 589 {
 590   MagickBooleanType
 591     status;
 592
 593   if (IsPathAccessible(path) != MagickFalse)
 594     return(MagickFalse);
 595   status=(strchr(path,'*') != (char *) NULL) ||
 596     (strchr(path,'?') != (char *) NULL) ||
 597     (strchr(path,'{') != (char *) NULL) ||
 598     (strchr(path,'}') != (char *) NULL) ||
 599     (strchr(path,'[') != (char *) NULL) ||
 600     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
 601   return(status);
 602 }
 603 \f
 604 /*
 605 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 606 %                                                                             %
 607 %                                                                             %
 608 %                                                                             %
 609 %   T o k e n i z e r                                                         %
 610 %                                                                             %
 611 %                                                                             %
 612 %                                                                             %
 613 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 614 %
 615 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 616 %  one at a time from a string of characters.  The characters used for white
 617 %  space, for break characters, and for quotes can be specified.  Also,
 618 %  characters in the string can be preceded by a specifiable escape character
 619 %  which removes any special meaning the character may have.
 620 %
 621 %  Here is some terminology:
 622 %
 623 %    o token: A single unit of information in the form of a group of
 624 %      characters.
 625 %
 626 %    o white space: Apace that gets ignored (except within quotes or when
 627 %      escaped), like blanks and tabs. in addition, white space terminates a
 628 %      non-quoted token.
 629 %
 630 %    o break set: One or more characters that separates non-quoted tokens.
 631 %      Commas are a common break character. The usage of break characters to
 632 %      signal the end of a token is the same as that of white space, except
 633 %      multiple break characters with nothing or only white space between
 634 %      generate a null token for each two break characters together.
 635 %
 636 %      For example, if blank is set to be the white space and comma is set to
 637 %      be the break character, the line
 638 %
 639 %        A, B, C ,  , DEF
 640 %
 641 %        ... consists of 5 tokens:
 642 %
 643 %        1)  "A"
 644 %        2)  "B"
 645 %        3)  "C"
 646 %        4)  "" (the null string)
 647 %        5)  "DEF"
 648 %
 649 %    o Quote character: A character that, when surrounding a group of other
 650 %      characters, causes the group of characters to be treated as a single
 651 %      token, no matter how many white spaces or break characters exist in
 652 %      the group. Also, a token always terminates after the closing quote.
 653 %      For example, if ' is the quote character, blank is white space, and
 654 %      comma is the break character, the following string
 655 %
 656 %        A, ' B, CD'EF GHI
 657 %
 658 %        ... consists of 4 tokens:
 659 %
 660 %        1)  "A"
 661 %        2)  " B, CD" (note the blanks & comma)
 662 %        3)  "EF"
 663 %        4)  "GHI"
 664 %
 665 %      The quote characters themselves do not appear in the resultant
 666 %      tokens.  The double quotes are delimiters i use here for
 667 %      documentation purposes only.
 668 %
 669 %    o Escape character: A character which itself is ignored but which
 670 %      causes the next character to be used as is.  ^ and \ are often used
 671 %      as escape characters. An escape in the last position of the string
 672 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 673 %      and non-escape) character. For example, assume white space, break
 674 %      character, and quote are the same as in the above examples, and
 675 %      further, assume that ^ is the escape character. Then, in the string
 676 %
 677 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 678 %
 679 %        ... there are 7 tokens:
 680 %
 681 %        1)  "ABC"
 682 %        2)  " DEF ' GH"
 683 %        3)  "I"
 684 %        4)  " "     (a lone blank)
 685 %        5)  "J"
 686 %        6)  "K L"
 687 %        7)  "^"     (passed as is at end of line)
 688 %
 689 %  The format of the Tokenizer method is:
 690 %
 691 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 692 %        const size_t max_token_length,const char *line,const char *white,
 693 %        const char *break_set,const char *quote,const char escape,
 694 %        char *breaker,int *next,char *quoted)
 695 %
 696 %  A description of each parameter follows:
 697 %
 698 %    o flag: right now, only the low order 3 bits are used.
 699 %
 700 %        1 => convert non-quoted tokens to upper case
 701 %        2 => convert non-quoted tokens to lower case
 702 %        0 => do not convert non-quoted tokens
 703 %
 704 %    o token: a character string containing the returned next token
 705 %
 706 %    o max_token_length: the maximum size of "token".  Characters beyond
 707 %      "max_token_length" are truncated.
 708 %
 709 %    o string: the string to be parsed.
 710 %
 711 %    o white: a string of the valid white spaces.  example:
 712 %
 713 %        char whitesp[]={" \t"};
 714 %
 715 %      blank and tab will be valid white space.
 716 %
 717 %    o break: a string of the valid break characters. example:
 718 %
 719 %        char breakch[]={";,"};
 720 %
 721 %      semicolon and comma will be valid break characters.
 722 %
 723 %    o quote: a string of the valid quote characters. An example would be
 724 %
 725 %        char whitesp[]={"'\"");
 726 %
 727 %      (this causes single and double quotes to be valid) Note that a
 728 %      token starting with one of these characters needs the same quote
 729 %      character to terminate it.
 730 %
 731 %      for example:
 732 %
 733 %        "ABC '
 734 %
 735 %      is unterminated, but
 736 %
 737 %        "DEF" and 'GHI'
 738 %
 739 %      are properly terminated.  Note that different quote characters
 740 %      can appear on the same line; only for a given token do the quote
 741 %      characters have to be the same.
 742 %
 743 %    o escape: the escape character (NOT a string ... only one
 744 %      allowed). Use zero if none is desired.
 745 %
 746 %    o breaker: the break character used to terminate the current
 747 %      token.  If the token was quoted, this will be the quote used.  If
 748 %      the token is the last one on the line, this will be zero.
 749 %
 750 %    o next: this variable points to the first character of the
 751 %      next token.  it gets reset by "tokenizer" as it steps through the
 752 %      string.  Set it to 0 upon initialization, and leave it alone
 753 %      after that.  You can change it if you want to jump around in the
 754 %      string or re-parse from the beginning, but be careful.
 755 %
 756 %    o quoted: set to True if the token was quoted and MagickFalse
 757 %      if not.  You may need this information (for example:  in C, a
 758 %      string with quotes around it is a character string, while one
 759 %      without is an identifier).
 760 %
 761 %    o result: 0 if we haven't reached EOS (end of string), and 1
 762 %      if we have.
 763 %
 764 */
 765
 766 #define IN_WHITE 0
 767 #define IN_TOKEN 1
 768 #define IN_QUOTE 2
 769 #define IN_OZONE 3
 770
 771 static ssize_t sindex(int c,const char *string)
 772 {
 773   register const char
 774     *p;
 775
 776   for (p=string; *p != '\0'; p++)
 777     if (c == (int) (*p))
 778       return((ssize_t) (p-string));
 779   return(-1);
 780 }
 781
 782 static void StoreToken(TokenInfo *token_info,char *string,
 783   size_t max_token_length,int c)
 784 {
 785   register ssize_t
 786     i;
 787
 788   if ((token_info->offset < 0) ||
 789       ((size_t) token_info->offset >= (max_token_length-1)))
 790     return;
 791   i=token_info->offset++;
 792   string[i]=(char) c;
 793   if (token_info->state == IN_QUOTE)
 794     return;
 795   switch (token_info->flag & 0x03)
 796   {
 797     case 1:
 798     {
 799       string[i]=(char) toupper(c);
 800       break;
 801     }
 802     case 2:
 803     {
 804       string[i]=(char) tolower(c);
 805       break;
 806     }
 807     default:
 808       break;
 809   }
 810 }
 811
 812 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 813   char *token,const size_t max_token_length,const char *line,const char *white,
 814   const char *break_set,const char *quote,const char escape,char *breaker,
 815   int *next,char *quoted)
 816 {
 817   int
 818     c;
 819
 820   register ssize_t
 821     i;
 822
 823   *breaker='\0';
 824   *quoted='\0';
 825   if (line[*next] == '\0')
 826     return(1);
 827   token_info->state=IN_WHITE;
 828   token_info->quote=(char) MagickFalse;
 829   token_info->flag=flag;
 830   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 831   {
 832     c=(int) line[*next];
 833     i=sindex(c,break_set);
 834     if (i >= 0)
 835       {
 836         switch (token_info->state)
 837         {
 838           case IN_WHITE:
 839           case IN_TOKEN:
 840           case IN_OZONE:
 841           {
 842             (*next)++;
 843             *breaker=break_set[i];
 844             token[token_info->offset]='\0';
 845             return(0);
 846           }
 847           case IN_QUOTE:
 848           {
 849             StoreToken(token_info,token,max_token_length,c);
 850             break;
 851           }
 852         }
 853         continue;
 854       }
 855     i=sindex(c,quote);
 856     if (i >= 0)
 857       {
 858         switch (token_info->state)
 859         {
 860           case IN_WHITE:
 861           {
 862             token_info->state=IN_QUOTE;
 863             token_info->quote=quote[i];
 864             *quoted=(char) MagickTrue;
 865             break;
 866           }
 867           case IN_QUOTE:
 868           {
 869             if (quote[i] != token_info->quote)
 870               StoreToken(token_info,token,max_token_length,c);
 871             else
 872               {
 873                 token_info->state=IN_OZONE;
 874                 token_info->quote='\0';
 875               }
 876             break;
 877           }
 878           case IN_TOKEN:
 879           case IN_OZONE:
 880           {
 881             *breaker=(char) c;
 882             token[token_info->offset]='\0';
 883             return(0);
 884           }
 885         }
 886         continue;
 887       }
 888     i=sindex(c,white);
 889     if (i >= 0)
 890       {
 891         switch (token_info->state)
 892         {
 893           case IN_WHITE:
 894           case IN_OZONE:
 895             break;
 896           case IN_TOKEN:
 897           {
 898             token_info->state=IN_OZONE;
 899             break;
 900           }
 901           case IN_QUOTE:
 902           {
 903             StoreToken(token_info,token,max_token_length,c);
 904             break;
 905           }
 906         }
 907         continue;
 908       }
 909     if (c == (int) escape)
 910       {
 911         if (line[(*next)+1] == '\0')
 912           {
 913             *breaker='\0';
 914             StoreToken(token_info,token,max_token_length,c);
 915             (*next)++;
 916             token[token_info->offset]='\0';
 917             return(0);
 918           }
 919         switch (token_info->state)
 920         {
 921           case IN_WHITE:
 922           {
 923             (*next)--;
 924             token_info->state=IN_TOKEN;
 925             break;
 926           }
 927           case IN_TOKEN:
 928           case IN_QUOTE:
 929           {
 930             (*next)++;
 931             c=(int) line[*next];
 932             StoreToken(token_info,token,max_token_length,c);
 933             break;
 934           }
 935           case IN_OZONE:
 936           {
 937             token[token_info->offset]='\0';
 938             return(0);
 939           }
 940         }
 941         continue;
 942       }
 943     switch (token_info->state)
 944     {
 945       case IN_WHITE:
 946         token_info->state=IN_TOKEN;
 947       case IN_TOKEN:
 948       case IN_QUOTE:
 949       {
 950         StoreToken(token_info,token,max_token_length,c);
 951         break;
 952       }
 953       case IN_OZONE:
 954       {
 955         token[token_info->offset]='\0';
 956         return(0);
 957       }
 958     }
 959   }
 960   token[token_info->offset]='\0';
 961   return(0);
 962 }