granicus.if.org Git - imagemagick/blob - MagickCore/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                                  Cristy                                     %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2018 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    https://www.imagemagick.org/script/license.php                           %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "MagickCore/studio.h"
  44 #include "MagickCore/exception.h"
  45 #include "MagickCore/exception-private.h"
  46 #include "MagickCore/image.h"
  47 #include "MagickCore/image-private.h"
  48 #include "MagickCore/memory_.h"
  49 #include "MagickCore/memory-private.h"
  50 #include "MagickCore/string_.h"
  51 #include "MagickCore/string-private.h"
  52 #include "MagickCore/token.h"
  53 #include "MagickCore/token-private.h"
  54 #include "MagickCore/utility.h"
  55 #include "MagickCore/utility-private.h"
  56 \f
  57 /*
  58   Typedef declaractions.
  59 */
  60 struct _TokenInfo
  61 {
  62   int
  63     state;
  64
  65   MagickStatusType
  66     flag;
  67
  68   ssize_t
  69     offset;
  70
  71   char
  72     quote;
  73
  74   size_t
  75     signature;
  76 };
  77 \f
  78 /*
  79 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  80 %                                                                             %
  81 %                                                                             %
  82 %                                                                             %
  83 %   A c q u i r e T o k e n I n f o                                           %
  84 %                                                                             %
  85 %                                                                             %
  86 %                                                                             %
  87 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  88 %
  89 %  AcquireTokenInfo() allocates the TokenInfo structure.
  90 %
  91 %  The format of the AcquireTokenInfo method is:
  92 %
  93 %      TokenInfo *AcquireTokenInfo()
  94 %
  95 */
  96 MagickExport TokenInfo *AcquireTokenInfo(void)
  97 {
  98   TokenInfo
  99     *token_info;
 100
 101   token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
 102   token_info->signature=MagickCoreSignature;
 103   return(token_info);
 104 }
 105 \f
 106 /*
 107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 108 %                                                                             %
 109 %                                                                             %
 110 %                                                                             %
 111 %   D e s t r o y T o k e n I n f o                                           %
 112 %                                                                             %
 113 %                                                                             %
 114 %                                                                             %
 115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 116 %
 117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 118 %  structure.
 119 %
 120 %  The format of the DestroyTokenInfo method is:
 121 %
 122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 123 %
 124 %  A description of each parameter follows:
 125 %
 126 %    o token_info: Specifies a pointer to an TokenInfo structure.
 127 %
 128 */
 129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 130 {
 131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 132   assert(token_info != (TokenInfo *) NULL);
 133   assert(token_info->signature == MagickCoreSignature);
 134   token_info->signature=(~MagickCoreSignature);
 135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 136   return(token_info);
 137 }
 138 \f
 139 /*
 140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 141 %                                                                             %
 142 %                                                                             %
 143 %                                                                             %
 144 +   G e t N e x t T o k e n                                                   %
 145 %                                                                             %
 146 %                                                                             %
 147 %                                                                             %
 148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 149 %
 150 %  GetNextToken() gets a token from the token stream.  A token is defined as
 151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
 152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 153 %  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
 154 %  separator characters: ':', '=', ',', and ';'.
 155 %
 156 %  The format of the GetNextToken method is:
 157 %
 158 %      void GetNextToken(const char *start,const char **end,
 159 %        const size_t extent,char *token)
 160 %
 161 %  A description of each parameter follows:
 162 %
 163 %    o start: the start of the token sequence.
 164 %
 165 %    o end: point to the end of the token sequence.
 166 %
 167 %    o extent: maximum extent of the token.
 168 %
 169 %    o token: copy the token to this buffer.
 170 %
 171 */
 172 MagickExport void GetNextToken(const char *start,const char **end,
 173   const size_t extent,char *token)
 174 {
 175   double
 176     value;
 177
 178   register const char
 179     *p;
 180
 181   register ssize_t
 182     i;
 183
 184   size_t
 185     length;
 186
 187   assert(start != (const char *) NULL);
 188   assert(token != (char *) NULL);
 189   i=0;
 190   length=strlen(start);
 191   p=start;
 192   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 193     p++;
 194   switch (*p)
 195   {
 196     case '\0':
 197       break;
 198     case '"':
 199     case '\'':
 200     case '`':
 201     case '{':
 202     {
 203       register char
 204         escape;
 205
 206       switch (*p)
 207       {
 208         case '"': escape='"'; break;
 209         case '\'': escape='\''; break;
 210         case '`': escape='\''; break;
 211         case '{': escape='}'; break;
 212         default: escape=(*p); break;
 213       }
 214       for (p++; *p != '\0'; p++)
 215       {
 216         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 217           p++;
 218         else
 219           if (*p == escape)
 220             {
 221               p++;
 222               break;
 223             }
 224         if (i < (ssize_t) (extent-1))
 225           token[i++]=(*p);
 226         if ((size_t) (p-start) >= length)
 227           break;
 228       }
 229       break;
 230     }
 231     case '/':
 232     {
 233       if (i < (ssize_t) (extent-1))
 234         token[i++]=(*p);
 235       p++;
 236       if ((*p == '>') || (*p == '/'))
 237         {
 238           if (i < (ssize_t) (extent-1))
 239             token[i++]=(*p);
 240           p++;
 241         }
 242       break;
 243     }
 244     default:
 245     {
 246       char
 247         *q;
 248
 249       value=StringToDouble(p,&q);
 250       (void) value;
 251       if ((p != q) && (*p != ','))
 252         {
 253           for ( ; (p < q) && (*p != ','); p++)
 254           {
 255             if (i < (ssize_t) (extent-1))
 256               token[i++]=(*p);
 257             if ((size_t) (p-start) >= length)
 258               break;
 259           }
 260           if (*p == '%')
 261             {
 262               if (i < (ssize_t) (extent-1))
 263                 token[i++]=(*p);
 264               p++;
 265             }
 266           break;
 267         }
 268       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
 269           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 270         {
 271           if (i < (ssize_t) (extent-1))
 272             token[i++]=(*p);
 273           p++;
 274           break;
 275         }
 276       for ( ; *p != '\0'; p++)
 277       {
 278         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 279             (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
 280           break;
 281         if ((i > 0) && (*p == '<'))
 282           break;
 283         if (i < (ssize_t) (extent-1))
 284           token[i++]=(*p);
 285         if (*p == '>')
 286           break;
 287         if (*p == '(')
 288           for (p++; *p != '\0'; p++)
 289           {
 290             if (i < (ssize_t) (extent-1))
 291               token[i++]=(*p);
 292             if ((*p == ')') && (*(p-1) != '\\'))
 293               break;
 294             if ((size_t) (p-start) >= length)
 295               break;
 296           }
 297         if ((size_t) (p-start) >= length)
 298           break;
 299       }
 300       break;
 301     }
 302   }
 303   token[i]='\0';
 304   if ((LocaleNCompare(token,"url(",4) == 0) && (strlen(token) > 5))
 305     {
 306       ssize_t
 307         offset;
 308
 309       offset=4;
 310       if (token[offset] == '#')
 311         offset++;
 312       i=(ssize_t) strlen(token);
 313       if (i > offset)
 314         {
 315           (void) CopyMagickString(token,token+offset,MagickPathExtent);
 316           token[i-offset-1]='\0';
 317         }
 318     }
 319   while (isspace((int) ((unsigned char) *p)) != 0)
 320     p++;
 321   if (end != (const char **) NULL)
 322     *end=(const char *) p;
 323 }
 324 \f
 325 /*
 326 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 327 %                                                                             %
 328 %                                                                             %
 329 %                                                                             %
 330 %   G l o b E x p r e s s i o n                                               %
 331 %                                                                             %
 332 %                                                                             %
 333 %                                                                             %
 334 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 335 %
 336 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 337 %
 338 %  The format of the GlobExpression function is:
 339 %
 340 %      MagickBooleanType GlobExpression(const char *expression,
 341 %        const char *pattern,const MagickBooleanType case_insensitive)
 342 %
 343 %  A description of each parameter follows:
 344 %
 345 %    o expression: Specifies a pointer to a text string containing a file name.
 346 %
 347 %    o pattern: Specifies a pointer to a text string containing a pattern.
 348 %
 349 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 350 %      an expression.
 351 %
 352 */
 353 MagickExport MagickBooleanType GlobExpression(const char *expression,
 354   const char *pattern,const MagickBooleanType case_insensitive)
 355 {
 356   MagickBooleanType
 357     done,
 358     match;
 359
 360   register const char
 361     *p;
 362
 363   /*
 364     Return on empty pattern or '*'.
 365   */
 366   if (pattern == (char *) NULL)
 367     return(MagickTrue);
 368   if (GetUTFCode(pattern) == 0)
 369     return(MagickTrue);
 370   if (LocaleCompare(pattern,"*") == 0)
 371     return(MagickTrue);
 372   p=pattern+strlen(pattern)-1;
 373   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 374     {
 375       ExceptionInfo
 376         *exception;
 377
 378       ImageInfo
 379         *image_info;
 380
 381       /*
 382         Determine if pattern is a scene, i.e. img0001.pcd[2].
 383       */
 384       image_info=AcquireImageInfo();
 385       (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
 386       exception=AcquireExceptionInfo();
 387       (void) SetImageInfo(image_info,0,exception);
 388       exception=DestroyExceptionInfo(exception);
 389       if (LocaleCompare(image_info->filename,pattern) != 0)
 390         {
 391           image_info=DestroyImageInfo(image_info);
 392           return(MagickFalse);
 393         }
 394       image_info=DestroyImageInfo(image_info);
 395     }
 396   /*
 397     Evaluate glob expression.
 398   */
 399   done=MagickFalse;
 400   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 401   {
 402     if (GetUTFCode(expression) == 0)
 403       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 404         break;
 405     switch (GetUTFCode(pattern))
 406     {
 407       case '*':
 408       {
 409         MagickBooleanType
 410           status;
 411
 412         status=MagickFalse;
 413         while (GetUTFCode(pattern) == '*')
 414           pattern+=GetUTFOctets(pattern);
 415         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 416         {
 417           status=GlobExpression(expression,pattern,case_insensitive);
 418           expression+=GetUTFOctets(expression);
 419         }
 420         if (status != MagickFalse)
 421           {
 422             while (GetUTFCode(expression) != 0)
 423               expression+=GetUTFOctets(expression);
 424             while (GetUTFCode(pattern) != 0)
 425               pattern+=GetUTFOctets(pattern);
 426           }
 427         break;
 428       }
 429       case '[':
 430       {
 431         int
 432           c;
 433
 434         pattern+=GetUTFOctets(pattern);
 435         for ( ; ; )
 436         {
 437           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 438             {
 439               done=MagickTrue;
 440               break;
 441             }
 442           if (GetUTFCode(pattern) == '\\')
 443             {
 444               pattern+=GetUTFOctets(pattern);
 445               if (GetUTFCode(pattern) == 0)
 446                 {
 447                   done=MagickTrue;
 448                   break;
 449                 }
 450              }
 451           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 452             {
 453               c=GetUTFCode(pattern);
 454               pattern+=GetUTFOctets(pattern);
 455               pattern+=GetUTFOctets(pattern);
 456               if (GetUTFCode(pattern) == ']')
 457                 {
 458                   done=MagickTrue;
 459                   break;
 460                 }
 461               if (GetUTFCode(pattern) == '\\')
 462                 {
 463                   pattern+=GetUTFOctets(pattern);
 464                   if (GetUTFCode(pattern) == 0)
 465                     {
 466                       done=MagickTrue;
 467                       break;
 468                     }
 469                 }
 470               if ((GetUTFCode(expression) < c) ||
 471                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 472                 {
 473                   pattern+=GetUTFOctets(pattern);
 474                   continue;
 475                 }
 476             }
 477           else
 478             if (GetUTFCode(pattern) != GetUTFCode(expression))
 479               {
 480                 pattern+=GetUTFOctets(pattern);
 481                 continue;
 482               }
 483           pattern+=GetUTFOctets(pattern);
 484           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 485           {
 486             if ((GetUTFCode(pattern) == '\\') &&
 487                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 488               pattern+=GetUTFOctets(pattern);
 489             pattern+=GetUTFOctets(pattern);
 490           }
 491           if (GetUTFCode(pattern) != 0)
 492             {
 493               pattern+=GetUTFOctets(pattern);
 494               expression+=GetUTFOctets(expression);
 495             }
 496           break;
 497         }
 498         break;
 499       }
 500       case '?':
 501       {
 502         pattern+=GetUTFOctets(pattern);
 503         expression+=GetUTFOctets(expression);
 504         break;
 505       }
 506       case '{':
 507       {
 508         char
 509           *target;
 510
 511         register char
 512           *p;
 513
 514         target=AcquireString(pattern);
 515         p=target;
 516         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 517         {
 518           *p++=(*pattern++);
 519           if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
 520             {
 521               *p='\0';
 522               match=GlobExpression(expression,target,case_insensitive);
 523               if (match != MagickFalse)
 524                 {
 525                   expression+=MagickMin(strlen(expression),strlen(target));
 526                   break;
 527                 }
 528               p=target;
 529               pattern+=GetUTFOctets(pattern);
 530             }
 531         }
 532         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 533           pattern+=GetUTFOctets(pattern);
 534         if (GetUTFCode(pattern) != 0)
 535           pattern+=GetUTFOctets(pattern);
 536         target=DestroyString(target);
 537         break;
 538       }
 539       case '\\':
 540       {
 541         pattern+=GetUTFOctets(pattern);
 542         if (GetUTFCode(pattern) == 0)
 543           break;
 544       }
 545       default:
 546       {
 547         if (case_insensitive != MagickFalse)
 548           {
 549             if (tolower((int) GetUTFCode(expression)) !=
 550                 tolower((int) GetUTFCode(pattern)))
 551               {
 552                 done=MagickTrue;
 553                 break;
 554               }
 555           }
 556         else
 557           if (GetUTFCode(expression) != GetUTFCode(pattern))
 558             {
 559               done=MagickTrue;
 560               break;
 561             }
 562         expression+=GetUTFOctets(expression);
 563         pattern+=GetUTFOctets(pattern);
 564       }
 565     }
 566   }
 567   while (GetUTFCode(pattern) == '*')
 568     pattern+=GetUTFOctets(pattern);
 569   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 570     MagickTrue : MagickFalse;
 571   return(match);
 572 }
 573 \f
 574 /*
 575 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 576 %                                                                             %
 577 %                                                                             %
 578 %                                                                             %
 579 +     I s G l o b                                                             %
 580 %                                                                             %
 581 %                                                                             %
 582 %                                                                             %
 583 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 584 %
 585 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 586 %  pattern.
 587 %
 588 %  The format of the IsGlob method is:
 589 %
 590 %      MagickBooleanType IsGlob(const char *geometry)
 591 %
 592 %  A description of each parameter follows:
 593 %
 594 %    o path: the path.
 595 %
 596 */
 597 MagickPrivate MagickBooleanType IsGlob(const char *path)
 598 {
 599   MagickBooleanType
 600     status = MagickFalse;
 601
 602   register const char
 603     *p;
 604
 605   if (IsPathAccessible(path) != MagickFalse)
 606     return(MagickFalse);
 607   for (p=path; *p != '\0'; p++)
 608   {
 609     switch (*p)
 610     {
 611       case '*':
 612       case '?':
 613       case '{':
 614       case '}':
 615       case '[':
 616       case ']':
 617       {
 618         status=MagickTrue;
 619         break;
 620       }
 621       default:
 622         break;
 623     }
 624   }
 625   return(status);
 626 }
 627 \f
 628 /*
 629 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 630 %                                                                             %
 631 %                                                                             %
 632 %                                                                             %
 633 %   T o k e n i z e r                                                         %
 634 %                                                                             %
 635 %                                                                             %
 636 %                                                                             %
 637 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 638 %
 639 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 640 %  one at a time from a string of characters.  The characters used for white
 641 %  space, for break characters, and for quotes can be specified.  Also,
 642 %  characters in the string can be preceded by a specifiable escape character
 643 %  which removes any special meaning the character may have.
 644 %
 645 %  Here is some terminology:
 646 %
 647 %    o token: A single unit of information in the form of a group of
 648 %      characters.
 649 %
 650 %    o white space: Apace that gets ignored (except within quotes or when
 651 %      escaped), like blanks and tabs. in addition, white space terminates a
 652 %      non-quoted token.
 653 %
 654 %    o break set: One or more characters that separates non-quoted tokens.
 655 %      Commas are a common break character. The usage of break characters to
 656 %      signal the end of a token is the same as that of white space, except
 657 %      multiple break characters with nothing or only white space between
 658 %      generate a null token for each two break characters together.
 659 %
 660 %      For example, if blank is set to be the white space and comma is set to
 661 %      be the break character, the line
 662 %
 663 %        A, B, C ,  , DEF
 664 %
 665 %        ... consists of 5 tokens:
 666 %
 667 %        1)  "A"
 668 %        2)  "B"
 669 %        3)  "C"
 670 %        4)  "" (the null string)
 671 %        5)  "DEF"
 672 %
 673 %    o Quote character: A character that, when surrounding a group of other
 674 %      characters, causes the group of characters to be treated as a single
 675 %      token, no matter how many white spaces or break characters exist in
 676 %      the group. Also, a token always terminates after the closing quote.
 677 %      For example, if ' is the quote character, blank is white space, and
 678 %      comma is the break character, the following string
 679 %
 680 %        A, ' B, CD'EF GHI
 681 %
 682 %        ... consists of 4 tokens:
 683 %
 684 %        1)  "A"
 685 %        2)  " B, CD" (note the blanks & comma)
 686 %        3)  "EF"
 687 %        4)  "GHI"
 688 %
 689 %      The quote characters themselves do not appear in the resultant
 690 %      tokens.  The double quotes are delimiters i use here for
 691 %      documentation purposes only.
 692 %
 693 %    o Escape character: A character which itself is ignored but which
 694 %      causes the next character to be used as is.  ^ and \ are often used
 695 %      as escape characters. An escape in the last position of the string
 696 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 697 %      and non-escape) character. For example, assume white space, break
 698 %      character, and quote are the same as in the above examples, and
 699 %      further, assume that ^ is the escape character. Then, in the string
 700 %
 701 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 702 %
 703 %        ... there are 7 tokens:
 704 %
 705 %        1)  "ABC"
 706 %        2)  " DEF ' GH"
 707 %        3)  "I"
 708 %        4)  " "     (a lone blank)
 709 %        5)  "J"
 710 %        6)  "K L"
 711 %        7)  "^"     (passed as is at end of line)
 712 %
 713 %  The format of the Tokenizer method is:
 714 %
 715 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 716 %        const size_t max_token_length,const char *line,const char *white,
 717 %        const char *break_set,const char *quote,const char escape,
 718 %        char *breaker,int *next,char *quoted)
 719 %
 720 %  A description of each parameter follows:
 721 %
 722 %    o flag: right now, only the low order 3 bits are used.
 723 %
 724 %        1 => convert non-quoted tokens to upper case
 725 %        2 => convert non-quoted tokens to lower case
 726 %        0 => do not convert non-quoted tokens
 727 %
 728 %    o token: a character string containing the returned next token
 729 %
 730 %    o max_token_length: the maximum size of "token".  Characters beyond
 731 %      "max_token_length" are truncated.
 732 %
 733 %    o string: the string to be parsed.
 734 %
 735 %    o white: a string of the valid white spaces.  example:
 736 %
 737 %        char whitesp[]={" \t"};
 738 %
 739 %      blank and tab will be valid white space.
 740 %
 741 %    o break: a string of the valid break characters. example:
 742 %
 743 %        char breakch[]={";,"};
 744 %
 745 %      semicolon and comma will be valid break characters.
 746 %
 747 %    o quote: a string of the valid quote characters. An example would be
 748 %
 749 %        char whitesp[]={"'\"");
 750 %
 751 %      (this causes single and double quotes to be valid) Note that a
 752 %      token starting with one of these characters needs the same quote
 753 %      character to terminate it.
 754 %
 755 %      for example:
 756 %
 757 %        "ABC '
 758 %
 759 %      is unterminated, but
 760 %
 761 %        "DEF" and 'GHI'
 762 %
 763 %      are properly terminated.  Note that different quote characters
 764 %      can appear on the same line; only for a given token do the quote
 765 %      characters have to be the same.
 766 %
 767 %    o escape: the escape character (NOT a string ... only one
 768 %      allowed). Use zero if none is desired.
 769 %
 770 %    o breaker: the break character used to terminate the current
 771 %      token.  If the token was quoted, this will be the quote used.  If
 772 %      the token is the last one on the line, this will be zero.
 773 %
 774 %    o next: this variable points to the first character of the
 775 %      next token.  it gets reset by "tokenizer" as it steps through the
 776 %      string.  Set it to 0 upon initialization, and leave it alone
 777 %      after that.  You can change it if you want to jump around in the
 778 %      string or re-parse from the beginning, but be careful.
 779 %
 780 %    o quoted: set to True if the token was quoted and MagickFalse
 781 %      if not.  You may need this information (for example:  in C, a
 782 %      string with quotes around it is a character string, while one
 783 %      without is an identifier).
 784 %
 785 %    o result: 0 if we haven't reached EOS (end of string), and 1
 786 %      if we have.
 787 %
 788 */
 789
 790 #define IN_WHITE 0
 791 #define IN_TOKEN 1
 792 #define IN_QUOTE 2
 793 #define IN_OZONE 3
 794
 795 static ssize_t sindex(int c,const char *string)
 796 {
 797   register const char
 798     *p;
 799
 800   for (p=string; *p != '\0'; p++)
 801     if (c == (int) (*p))
 802       return((ssize_t) (p-string));
 803   return(-1);
 804 }
 805
 806 static void StoreToken(TokenInfo *token_info,char *string,
 807   size_t max_token_length,int c)
 808 {
 809   register ssize_t
 810     i;
 811
 812   if ((token_info->offset < 0) ||
 813       ((size_t) token_info->offset >= (max_token_length-1)))
 814     return;
 815   i=token_info->offset++;
 816   string[i]=(char) c;
 817   if (token_info->state == IN_QUOTE)
 818     return;
 819   switch (token_info->flag & 0x03)
 820   {
 821     case 1:
 822     {
 823       string[i]=(char) toupper(c);
 824       break;
 825     }
 826     case 2:
 827     {
 828       string[i]=(char) tolower(c);
 829       break;
 830     }
 831     default:
 832       break;
 833   }
 834 }
 835
 836 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 837   char *token,const size_t max_token_length,const char *line,const char *white,
 838   const char *break_set,const char *quote,const char escape,char *breaker,
 839   int *next,char *quoted)
 840 {
 841   int
 842     c;
 843
 844   register ssize_t
 845     i;
 846
 847   *breaker='\0';
 848   *quoted='\0';
 849   if (line[*next] == '\0')
 850     return(1);
 851   token_info->state=IN_WHITE;
 852   token_info->quote=(char) MagickFalse;
 853   token_info->flag=flag;
 854   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 855   {
 856     c=(int) line[*next];
 857     i=sindex(c,break_set);
 858     if (i >= 0)
 859       {
 860         switch (token_info->state)
 861         {
 862           case IN_WHITE:
 863           case IN_TOKEN:
 864           case IN_OZONE:
 865           {
 866             (*next)++;
 867             *breaker=break_set[i];
 868             token[token_info->offset]='\0';
 869             return(0);
 870           }
 871           case IN_QUOTE:
 872           {
 873             StoreToken(token_info,token,max_token_length,c);
 874             break;
 875           }
 876         }
 877         continue;
 878       }
 879     i=sindex(c,quote);
 880     if (i >= 0)
 881       {
 882         switch (token_info->state)
 883         {
 884           case IN_WHITE:
 885           {
 886             token_info->state=IN_QUOTE;
 887             token_info->quote=quote[i];
 888             *quoted=(char) MagickTrue;
 889             break;
 890           }
 891           case IN_QUOTE:
 892           {
 893             if (quote[i] != token_info->quote)
 894               StoreToken(token_info,token,max_token_length,c);
 895             else
 896               {
 897                 token_info->state=IN_OZONE;
 898                 token_info->quote='\0';
 899               }
 900             break;
 901           }
 902           case IN_TOKEN:
 903           case IN_OZONE:
 904           {
 905             *breaker=(char) c;
 906             token[token_info->offset]='\0';
 907             return(0);
 908           }
 909         }
 910         continue;
 911       }
 912     i=sindex(c,white);
 913     if (i >= 0)
 914       {
 915         switch (token_info->state)
 916         {
 917           case IN_WHITE:
 918           case IN_OZONE:
 919             break;
 920           case IN_TOKEN:
 921           {
 922             token_info->state=IN_OZONE;
 923             break;
 924           }
 925           case IN_QUOTE:
 926           {
 927             StoreToken(token_info,token,max_token_length,c);
 928             break;
 929           }
 930         }
 931         continue;
 932       }
 933     if (c == (int) escape)
 934       {
 935         if (line[(*next)+1] == '\0')
 936           {
 937             *breaker='\0';
 938             StoreToken(token_info,token,max_token_length,c);
 939             (*next)++;
 940             token[token_info->offset]='\0';
 941             return(0);
 942           }
 943         switch (token_info->state)
 944         {
 945           case IN_WHITE:
 946           {
 947             (*next)--;
 948             token_info->state=IN_TOKEN;
 949             break;
 950           }
 951           case IN_TOKEN:
 952           case IN_QUOTE:
 953           {
 954             (*next)++;
 955             c=(int) line[*next];
 956             StoreToken(token_info,token,max_token_length,c);
 957             break;
 958           }
 959           case IN_OZONE:
 960           {
 961             token[token_info->offset]='\0';
 962             return(0);
 963           }
 964         }
 965         continue;
 966       }
 967     switch (token_info->state)
 968     {
 969       case IN_WHITE:
 970       {
 971         token_info->state=IN_TOKEN;
 972         StoreToken(token_info,token,max_token_length,c);
 973         break;
 974       }
 975       case IN_TOKEN:
 976       case IN_QUOTE:
 977       {
 978         StoreToken(token_info,token,max_token_length,c);
 979         break;
 980       }
 981       case IN_OZONE:
 982       {
 983         token[token_info->offset]='\0';
 984         return(0);
 985       }
 986     }
 987   }
 988   token[token_info->offset]='\0';
 989   return(0);
 990 }