granicus.if.org Git - imagemagick/blob - MagickCore/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                               John Cristy                                   %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    http://www.imagemagick.org/script/license.php                            %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "MagickCore/studio.h"
  44 #include "MagickCore/exception.h"
  45 #include "MagickCore/exception-private.h"
  46 #include "MagickCore/image.h"
  47 #include "MagickCore/memory_.h"
  48 #include "MagickCore/string_.h"
  49 #include "MagickCore/string-private.h"
  50 #include "MagickCore/token.h"
  51 #include "MagickCore/token-private.h"
  52 #include "MagickCore/utility.h"
  53 #include "MagickCore/utility-private.h"
  54 \f
  55 /*
  56   Typedef declaractions.
  57 */
  58 struct _TokenInfo
  59 {
  60   int
  61     state;
  62
  63   MagickStatusType
  64     flag;
  65
  66   ssize_t
  67     offset;
  68
  69   char
  70     quote;
  71
  72   size_t
  73     signature;
  74 };
  75 \f
  76 /*
  77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  78 %                                                                             %
  79 %                                                                             %
  80 %                                                                             %
  81 %   A c q u i r e T o k e n I n f o                                           %
  82 %                                                                             %
  83 %                                                                             %
  84 %                                                                             %
  85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  86 %
  87 %  AcquireTokenInfo() allocates the TokenInfo structure.
  88 %
  89 %  The format of the AcquireTokenInfo method is:
  90 %
  91 %      TokenInfo *AcquireTokenInfo()
  92 %
  93 */
  94 MagickExport TokenInfo *AcquireTokenInfo(void)
  95 {
  96   TokenInfo
  97     *token_info;
  98
  99   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
 100   if (token_info == (TokenInfo *) NULL)
 101     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
 102   token_info->signature=MagickSignature;
 103   return(token_info);
 104 }
 105 \f
 106 /*
 107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 108 %                                                                             %
 109 %                                                                             %
 110 %                                                                             %
 111 %   D e s t r o y T o k e n I n f o                                           %
 112 %                                                                             %
 113 %                                                                             %
 114 %                                                                             %
 115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 116 %
 117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 118 %  structure.
 119 %
 120 %  The format of the DestroyTokenInfo method is:
 121 %
 122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 123 %
 124 %  A description of each parameter follows:
 125 %
 126 %    o token_info: Specifies a pointer to an TokenInfo structure.
 127 %
 128 */
 129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 130 {
 131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 132   assert(token_info != (TokenInfo *) NULL);
 133   assert(token_info->signature == MagickSignature);
 134   token_info->signature=(~MagickSignature);
 135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 136   return(token_info);
 137 }
 138 \f
 139 /*
 140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 141 %                                                                             %
 142 %                                                                             %
 143 %                                                                             %
 144 +   G e t M a g i c k T o k e n                                               %
 145 %                                                                             %
 146 %                                                                             %
 147 %                                                                             %
 148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 149 %
 150 %  GetMagickToken() gets a token from the token stream.  A token is defined as
 151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
 152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 153 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
 154 %  separator characters: ':', '=', ',', and ';'.
 155 %
 156 %  The format of the GetMagickToken method is:
 157 %
 158 %      void GetMagickToken(const char *start,const char **end,char *token)
 159 %
 160 %  A description of each parameter follows:
 161 %
 162 %    o start: the start of the token sequence.
 163 %
 164 %    o end: point to the end of the token sequence.
 165 %
 166 %    o token: copy the token to this buffer.
 167 %
 168 */
 169 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
 170 {
 171   double
 172     value;
 173
 174   register const char
 175     *p;
 176
 177   register ssize_t
 178     i;
 179
 180   assert(start != (const char *) NULL);
 181   assert(token != (char *) NULL);
 182   i=0;
 183   for (p=start; *p != '\0'; )
 184   {
 185     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 186       p++;
 187     if (*p == '\0')
 188       break;
 189     switch (*p)
 190     {
 191       case '"':
 192       case '\'':
 193       case '`':
 194       case '{':
 195       {
 196         register char
 197           escape;
 198
 199         switch (*p)
 200         {
 201           case '"': escape='"'; break;
 202           case '\'': escape='\''; break;
 203           case '`': escape='\''; break;
 204           case '{': escape='}'; break;
 205           default: escape=(*p); break;
 206         }
 207         for (p++; *p != '\0'; p++)
 208         {
 209           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 210             p++;
 211           else
 212             if (*p == escape)
 213               {
 214                 p++;
 215                 break;
 216               }
 217           token[i++]=(*p);
 218         }
 219         break;
 220       }
 221       case '/':
 222       {
 223         token[i++]=(*p++);
 224         if ((*p == '>') || (*p == '/'))
 225           token[i++]=(*p++);
 226         break;
 227       }
 228       default:
 229       {
 230         char
 231           *q;
 232
 233         value=StringToDouble(p,&q);
 234         (void) value;
 235         if ((p != q) && (*p != ','))
 236           {
 237             for ( ; (p < q) && (*p != ','); p++)
 238               token[i++]=(*p);
 239             if (*p == '%')
 240               token[i++]=(*p++);
 241             break;
 242           }
 243         if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
 244             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 245           {
 246             token[i++]=(*p++);
 247             break;
 248           }
 249         for ( ; *p != '\0'; p++)
 250         {
 251           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 252               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
 253             break;
 254           if ((i > 0) && (*p == '<'))
 255             break;
 256           token[i++]=(*p);
 257           if (*p == '>')
 258             break;
 259           if (*p == '(')
 260             for (p++; *p != '\0'; p++)
 261             {
 262               token[i++]=(*p);
 263               if ((*p == ')') && (*(p-1) != '\\'))
 264                 break;
 265             }
 266         }
 267         break;
 268       }
 269     }
 270     break;
 271   }
 272   token[i]='\0';
 273   if (LocaleNCompare(token,"url(",4) == 0)
 274     {
 275       ssize_t
 276         offset;
 277
 278       offset=4;
 279       if (token[offset] == '#')
 280         offset++;
 281       i=(ssize_t) strlen(token);
 282       (void) CopyMagickString(token,token+offset,MaxTextExtent);
 283       token[i-offset-1]='\0';
 284     }
 285   while (isspace((int) ((unsigned char) *p)) != 0)
 286     p++;
 287   if (end != (const char **) NULL)
 288     *end=(const char *) p;
 289 }
 290 \f
 291 /*
 292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 293 %                                                                             %
 294 %                                                                             %
 295 %                                                                             %
 296 %   G l o b E x p r e s s i o n                                               %
 297 %                                                                             %
 298 %                                                                             %
 299 %                                                                             %
 300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 301 %
 302 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 303 %
 304 %  The format of the GlobExpression function is:
 305 %
 306 %      MagickBooleanType GlobExpression(const char *expression,
 307 %        const char *pattern,const MagickBooleanType case_insensitive)
 308 %
 309 %  A description of each parameter follows:
 310 %
 311 %    o expression: Specifies a pointer to a text string containing a file name.
 312 %
 313 %    o pattern: Specifies a pointer to a text string containing a pattern.
 314 %
 315 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 316 %      an expression.
 317 %
 318 */
 319 MagickExport MagickBooleanType GlobExpression(const char *expression,
 320   const char *pattern,const MagickBooleanType case_insensitive)
 321 {
 322   MagickBooleanType
 323     done,
 324     match;
 325
 326   register const char
 327     *p;
 328
 329   /*
 330     Return on empty pattern or '*'.
 331   */
 332   if (pattern == (char *) NULL)
 333     return(MagickTrue);
 334   if (GetUTFCode(pattern) == 0)
 335     return(MagickTrue);
 336   if (LocaleCompare(pattern,"*") == 0)
 337     return(MagickTrue);
 338   p=pattern+strlen(pattern)-1;
 339   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 340     {
 341       ExceptionInfo
 342         *exception;
 343
 344       ImageInfo
 345         *image_info;
 346
 347       /*
 348         Determine if pattern is a scene, i.e. img0001.pcd[2].
 349       */
 350       image_info=AcquireImageInfo();
 351       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
 352       exception=AcquireExceptionInfo();
 353       (void) SetImageInfo(image_info,0,exception);
 354       exception=DestroyExceptionInfo(exception);
 355       if (LocaleCompare(image_info->filename,pattern) != 0)
 356         {
 357           image_info=DestroyImageInfo(image_info);
 358           return(MagickFalse);
 359         }
 360       image_info=DestroyImageInfo(image_info);
 361     }
 362   /*
 363     Evaluate glob expression.
 364   */
 365   done=MagickFalse;
 366   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 367   {
 368     if (GetUTFCode(expression) == 0)
 369       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 370         break;
 371     switch (GetUTFCode(pattern))
 372     {
 373       case '*':
 374       {
 375         MagickBooleanType
 376           status;
 377
 378         status=MagickFalse;
 379         pattern+=GetUTFOctets(pattern);
 380         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 381         {
 382           status=GlobExpression(expression,pattern,case_insensitive);
 383           expression+=GetUTFOctets(expression);
 384         }
 385         if (status != MagickFalse)
 386           {
 387             while (GetUTFCode(expression) != 0)
 388               expression+=GetUTFOctets(expression);
 389             while (GetUTFCode(pattern) != 0)
 390               pattern+=GetUTFOctets(pattern);
 391           }
 392         break;
 393       }
 394       case '[':
 395       {
 396         int
 397           c;
 398
 399         pattern+=GetUTFOctets(pattern);
 400         for ( ; ; )
 401         {
 402           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 403             {
 404               done=MagickTrue;
 405               break;
 406             }
 407           if (GetUTFCode(pattern) == '\\')
 408             {
 409               pattern+=GetUTFOctets(pattern);
 410               if (GetUTFCode(pattern) == 0)
 411                 {
 412                   done=MagickTrue;
 413                   break;
 414                 }
 415              }
 416           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 417             {
 418               c=GetUTFCode(pattern);
 419               pattern+=GetUTFOctets(pattern);
 420               pattern+=GetUTFOctets(pattern);
 421               if (GetUTFCode(pattern) == ']')
 422                 {
 423                   done=MagickTrue;
 424                   break;
 425                 }
 426               if (GetUTFCode(pattern) == '\\')
 427                 {
 428                   pattern+=GetUTFOctets(pattern);
 429                   if (GetUTFCode(pattern) == 0)
 430                     {
 431                       done=MagickTrue;
 432                       break;
 433                     }
 434                 }
 435               if ((GetUTFCode(expression) < c) ||
 436                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 437                 {
 438                   pattern+=GetUTFOctets(pattern);
 439                   continue;
 440                 }
 441             }
 442           else
 443             if (GetUTFCode(pattern) != GetUTFCode(expression))
 444               {
 445                 pattern+=GetUTFOctets(pattern);
 446                 continue;
 447               }
 448           pattern+=GetUTFOctets(pattern);
 449           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 450           {
 451             if ((GetUTFCode(pattern) == '\\') &&
 452                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 453               pattern+=GetUTFOctets(pattern);
 454             pattern+=GetUTFOctets(pattern);
 455           }
 456           if (GetUTFCode(pattern) != 0)
 457             {
 458               pattern+=GetUTFOctets(pattern);
 459               expression+=GetUTFOctets(expression);
 460             }
 461           break;
 462         }
 463         break;
 464       }
 465       case '?':
 466       {
 467         pattern+=GetUTFOctets(pattern);
 468         expression+=GetUTFOctets(expression);
 469         break;
 470       }
 471       case '{':
 472       {
 473         register const char
 474           *p;
 475
 476         pattern+=GetUTFOctets(pattern);
 477         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 478         {
 479           p=expression;
 480           match=MagickTrue;
 481           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
 482                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
 483                  (match != MagickFalse))
 484           {
 485             if (GetUTFCode(pattern) == '\\')
 486               pattern+=GetUTFOctets(pattern);
 487             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
 488               MagickFalse;
 489             p+=GetUTFOctets(p);
 490             pattern+=GetUTFOctets(pattern);
 491           }
 492           if (GetUTFCode(pattern) == 0)
 493             {
 494               match=MagickFalse;
 495               done=MagickTrue;
 496               break;
 497             }
 498           else
 499             if (match != MagickFalse)
 500               {
 501                 expression=p;
 502                 while ((GetUTFCode(pattern) != '}') &&
 503                        (GetUTFCode(pattern) != 0))
 504                 {
 505                   pattern+=GetUTFOctets(pattern);
 506                   if (GetUTFCode(pattern) == '\\')
 507                     {
 508                       pattern+=GetUTFOctets(pattern);
 509                       if (GetUTFCode(pattern) == '}')
 510                         pattern+=GetUTFOctets(pattern);
 511                     }
 512                 }
 513               }
 514             else
 515               {
 516                 while ((GetUTFCode(pattern) != '}') &&
 517                        (GetUTFCode(pattern) != ',') &&
 518                        (GetUTFCode(pattern) != 0))
 519                 {
 520                   pattern+=GetUTFOctets(pattern);
 521                   if (GetUTFCode(pattern) == '\\')
 522                     {
 523                       pattern+=GetUTFOctets(pattern);
 524                       if ((GetUTFCode(pattern) == '}') ||
 525                           (GetUTFCode(pattern) == ','))
 526                         pattern+=GetUTFOctets(pattern);
 527                     }
 528                 }
 529               }
 530             if (GetUTFCode(pattern) != 0)
 531               pattern+=GetUTFOctets(pattern);
 532           }
 533         break;
 534       }
 535       case '\\':
 536       {
 537         pattern+=GetUTFOctets(pattern);
 538         if (GetUTFCode(pattern) == 0)
 539           break;
 540       }
 541       default:
 542       {
 543         if (case_insensitive != MagickFalse)
 544           {
 545             if (tolower((int) GetUTFCode(expression)) !=
 546                 tolower((int) GetUTFCode(pattern)))
 547               {
 548                 done=MagickTrue;
 549                 break;
 550               }
 551           }
 552         else
 553           if (GetUTFCode(expression) != GetUTFCode(pattern))
 554             {
 555               done=MagickTrue;
 556               break;
 557             }
 558         expression+=GetUTFOctets(expression);
 559         pattern+=GetUTFOctets(pattern);
 560       }
 561     }
 562   }
 563   while (GetUTFCode(pattern) == '*')
 564     pattern+=GetUTFOctets(pattern);
 565   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 566     MagickTrue : MagickFalse;
 567   return(match);
 568 }
 569 \f
 570 /*
 571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 572 %                                                                             %
 573 %                                                                             %
 574 %                                                                             %
 575 +     I s G l o b                                                             %
 576 %                                                                             %
 577 %                                                                             %
 578 %                                                                             %
 579 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 580 %
 581 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 582 %  pattern.
 583 %
 584 %  The format of the IsGlob method is:
 585 %
 586 %      MagickBooleanType IsGlob(const char *geometry)
 587 %
 588 %  A description of each parameter follows:
 589 %
 590 %    o path: the path.
 591 %
 592 */
 593 MagickPrivate MagickBooleanType IsGlob(const char *path)
 594 {
 595   MagickBooleanType
 596     status;
 597
 598   if (IsPathAccessible(path) != MagickFalse)
 599     return(MagickFalse);
 600   status=(strchr(path,'*') != (char *) NULL) ||
 601     (strchr(path,'?') != (char *) NULL) ||
 602     (strchr(path,'{') != (char *) NULL) ||
 603     (strchr(path,'}') != (char *) NULL) ||
 604     (strchr(path,'[') != (char *) NULL) ||
 605     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
 606   return(status);
 607 }
 608 \f
 609 /*
 610 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 611 %                                                                             %
 612 %                                                                             %
 613 %                                                                             %
 614 %   T o k e n i z e r                                                         %
 615 %                                                                             %
 616 %                                                                             %
 617 %                                                                             %
 618 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 619 %
 620 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 621 %  one at a time from a string of characters.  The characters used for white
 622 %  space, for break characters, and for quotes can be specified.  Also,
 623 %  characters in the string can be preceded by a specifiable escape character
 624 %  which removes any special meaning the character may have.
 625 %
 626 %  Here is some terminology:
 627 %
 628 %    o token: A single unit of information in the form of a group of
 629 %      characters.
 630 %
 631 %    o white space: Apace that gets ignored (except within quotes or when
 632 %      escaped), like blanks and tabs. in addition, white space terminates a
 633 %      non-quoted token.
 634 %
 635 %    o break set: One or more characters that separates non-quoted tokens.
 636 %      Commas are a common break character. The usage of break characters to
 637 %      signal the end of a token is the same as that of white space, except
 638 %      multiple break characters with nothing or only white space between
 639 %      generate a null token for each two break characters together.
 640 %
 641 %      For example, if blank is set to be the white space and comma is set to
 642 %      be the break character, the line
 643 %
 644 %        A, B, C ,  , DEF
 645 %
 646 %        ... consists of 5 tokens:
 647 %
 648 %        1)  "A"
 649 %        2)  "B"
 650 %        3)  "C"
 651 %        4)  "" (the null string)
 652 %        5)  "DEF"
 653 %
 654 %    o Quote character: A character that, when surrounding a group of other
 655 %      characters, causes the group of characters to be treated as a single
 656 %      token, no matter how many white spaces or break characters exist in
 657 %      the group. Also, a token always terminates after the closing quote.
 658 %      For example, if ' is the quote character, blank is white space, and
 659 %      comma is the break character, the following string
 660 %
 661 %        A, ' B, CD'EF GHI
 662 %
 663 %        ... consists of 4 tokens:
 664 %
 665 %        1)  "A"
 666 %        2)  " B, CD" (note the blanks & comma)
 667 %        3)  "EF"
 668 %        4)  "GHI"
 669 %
 670 %      The quote characters themselves do not appear in the resultant
 671 %      tokens.  The double quotes are delimiters i use here for
 672 %      documentation purposes only.
 673 %
 674 %    o Escape character: A character which itself is ignored but which
 675 %      causes the next character to be used as is.  ^ and \ are often used
 676 %      as escape characters. An escape in the last position of the string
 677 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 678 %      and non-escape) character. For example, assume white space, break
 679 %      character, and quote are the same as in the above examples, and
 680 %      further, assume that ^ is the escape character. Then, in the string
 681 %
 682 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 683 %
 684 %        ... there are 7 tokens:
 685 %
 686 %        1)  "ABC"
 687 %        2)  " DEF ' GH"
 688 %        3)  "I"
 689 %        4)  " "     (a lone blank)
 690 %        5)  "J"
 691 %        6)  "K L"
 692 %        7)  "^"     (passed as is at end of line)
 693 %
 694 %  The format of the Tokenizer method is:
 695 %
 696 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 697 %        const size_t max_token_length,const char *line,const char *white,
 698 %        const char *break_set,const char *quote,const char escape,
 699 %        char *breaker,int *next,char *quoted)
 700 %
 701 %  A description of each parameter follows:
 702 %
 703 %    o flag: right now, only the low order 3 bits are used.
 704 %
 705 %        1 => convert non-quoted tokens to upper case
 706 %        2 => convert non-quoted tokens to lower case
 707 %        0 => do not convert non-quoted tokens
 708 %
 709 %    o token: a character string containing the returned next token
 710 %
 711 %    o max_token_length: the maximum size of "token".  Characters beyond
 712 %      "max_token_length" are truncated.
 713 %
 714 %    o string: the string to be parsed.
 715 %
 716 %    o white: a string of the valid white spaces.  example:
 717 %
 718 %        char whitesp[]={" \t"};
 719 %
 720 %      blank and tab will be valid white space.
 721 %
 722 %    o break: a string of the valid break characters. example:
 723 %
 724 %        char breakch[]={";,"};
 725 %
 726 %      semicolon and comma will be valid break characters.
 727 %
 728 %    o quote: a string of the valid quote characters. An example would be
 729 %
 730 %        char whitesp[]={"'\"");
 731 %
 732 %      (this causes single and double quotes to be valid) Note that a
 733 %      token starting with one of these characters needs the same quote
 734 %      character to terminate it.
 735 %
 736 %      for example:
 737 %
 738 %        "ABC '
 739 %
 740 %      is unterminated, but
 741 %
 742 %        "DEF" and 'GHI'
 743 %
 744 %      are properly terminated.  Note that different quote characters
 745 %      can appear on the same line; only for a given token do the quote
 746 %      characters have to be the same.
 747 %
 748 %    o escape: the escape character (NOT a string ... only one
 749 %      allowed). Use zero if none is desired.
 750 %
 751 %    o breaker: the break character used to terminate the current
 752 %      token.  If the token was quoted, this will be the quote used.  If
 753 %      the token is the last one on the line, this will be zero.
 754 %
 755 %    o next: this variable points to the first character of the
 756 %      next token.  it gets reset by "tokenizer" as it steps through the
 757 %      string.  Set it to 0 upon initialization, and leave it alone
 758 %      after that.  You can change it if you want to jump around in the
 759 %      string or re-parse from the beginning, but be careful.
 760 %
 761 %    o quoted: set to True if the token was quoted and MagickFalse
 762 %      if not.  You may need this information (for example:  in C, a
 763 %      string with quotes around it is a character string, while one
 764 %      without is an identifier).
 765 %
 766 %    o result: 0 if we haven't reached EOS (end of string), and 1
 767 %      if we have.
 768 %
 769 */
 770
 771 #define IN_WHITE 0
 772 #define IN_TOKEN 1
 773 #define IN_QUOTE 2
 774 #define IN_OZONE 3
 775
 776 static ssize_t sindex(int c,const char *string)
 777 {
 778   register const char
 779     *p;
 780
 781   for (p=string; *p != '\0'; p++)
 782     if (c == (int) (*p))
 783       return((ssize_t) (p-string));
 784   return(-1);
 785 }
 786
 787 static void StoreToken(TokenInfo *token_info,char *string,
 788   size_t max_token_length,int c)
 789 {
 790   register ssize_t
 791     i;
 792
 793   if ((token_info->offset < 0) ||
 794       ((size_t) token_info->offset >= (max_token_length-1)))
 795     return;
 796   i=token_info->offset++;
 797   string[i]=(char) c;
 798   if (token_info->state == IN_QUOTE)
 799     return;
 800   switch (token_info->flag & 0x03)
 801   {
 802     case 1:
 803     {
 804       string[i]=(char) toupper(c);
 805       break;
 806     }
 807     case 2:
 808     {
 809       string[i]=(char) tolower(c);
 810       break;
 811     }
 812     default:
 813       break;
 814   }
 815 }
 816
 817 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 818   char *token,const size_t max_token_length,const char *line,const char *white,
 819   const char *break_set,const char *quote,const char escape,char *breaker,
 820   int *next,char *quoted)
 821 {
 822   int
 823     c;
 824
 825   register ssize_t
 826     i;
 827
 828   *breaker='\0';
 829   *quoted='\0';
 830   if (line[*next] == '\0')
 831     return(1);
 832   token_info->state=IN_WHITE;
 833   token_info->quote=(char) MagickFalse;
 834   token_info->flag=flag;
 835   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 836   {
 837     c=(int) line[*next];
 838     i=sindex(c,break_set);
 839     if (i >= 0)
 840       {
 841         switch (token_info->state)
 842         {
 843           case IN_WHITE:
 844           case IN_TOKEN:
 845           case IN_OZONE:
 846           {
 847             (*next)++;
 848             *breaker=break_set[i];
 849             token[token_info->offset]='\0';
 850             return(0);
 851           }
 852           case IN_QUOTE:
 853           {
 854             StoreToken(token_info,token,max_token_length,c);
 855             break;
 856           }
 857         }
 858         continue;
 859       }
 860     i=sindex(c,quote);
 861     if (i >= 0)
 862       {
 863         switch (token_info->state)
 864         {
 865           case IN_WHITE:
 866           {
 867             token_info->state=IN_QUOTE;
 868             token_info->quote=quote[i];
 869             *quoted=(char) MagickTrue;
 870             break;
 871           }
 872           case IN_QUOTE:
 873           {
 874             if (quote[i] != token_info->quote)
 875               StoreToken(token_info,token,max_token_length,c);
 876             else
 877               {
 878                 token_info->state=IN_OZONE;
 879                 token_info->quote='\0';
 880               }
 881             break;
 882           }
 883           case IN_TOKEN:
 884           case IN_OZONE:
 885           {
 886             *breaker=(char) c;
 887             token[token_info->offset]='\0';
 888             return(0);
 889           }
 890         }
 891         continue;
 892       }
 893     i=sindex(c,white);
 894     if (i >= 0)
 895       {
 896         switch (token_info->state)
 897         {
 898           case IN_WHITE:
 899           case IN_OZONE:
 900             break;
 901           case IN_TOKEN:
 902           {
 903             token_info->state=IN_OZONE;
 904             break;
 905           }
 906           case IN_QUOTE:
 907           {
 908             StoreToken(token_info,token,max_token_length,c);
 909             break;
 910           }
 911         }
 912         continue;
 913       }
 914     if (c == (int) escape)
 915       {
 916         if (line[(*next)+1] == '\0')
 917           {
 918             *breaker='\0';
 919             StoreToken(token_info,token,max_token_length,c);
 920             (*next)++;
 921             token[token_info->offset]='\0';
 922             return(0);
 923           }
 924         switch (token_info->state)
 925         {
 926           case IN_WHITE:
 927           {
 928             (*next)--;
 929             token_info->state=IN_TOKEN;
 930             break;
 931           }
 932           case IN_TOKEN:
 933           case IN_QUOTE:
 934           {
 935             (*next)++;
 936             c=(int) line[*next];
 937             StoreToken(token_info,token,max_token_length,c);
 938             break;
 939           }
 940           case IN_OZONE:
 941           {
 942             token[token_info->offset]='\0';
 943             return(0);
 944           }
 945         }
 946         continue;
 947       }
 948     switch (token_info->state)
 949     {
 950       case IN_WHITE:
 951         token_info->state=IN_TOKEN;
 952       case IN_TOKEN:
 953       case IN_QUOTE:
 954       {
 955         StoreToken(token_info,token,max_token_length,c);
 956         break;
 957       }
 958       case IN_OZONE:
 959       {
 960         token[token_info->offset]='\0';
 961         return(0);
 962       }
 963     }
 964   }
 965   token[token_info->offset]='\0';
 966   return(0);
 967 }