granicus.if.org Git - imagemagick/blob - MagickCore/token.c

   1 /*
   2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   3 %                                                                             %
   4 %                                                                             %
   5 %                                                                             %
   6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
   7 %                      T    O   O  K  K   E      NN  N                        %
   8 %                      T    O   O  KKK    EEE    N N N                        %
   9 %                      T    O   O  K  K   E      N  NN                        %
  10 %                      T     OOO   K   K  EEEEE  N   N                        %
  11 %                                                                             %
  12 %                                                                             %
  13 %                         MagickCore Token Methods                            %
  14 %                                                                             %
  15 %                             Software Design                                 %
  16 %                               John Cristy                                   %
  17 %                              January 1993                                   %
  18 %                                                                             %
  19 %                                                                             %
  20 %  Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization      %
  21 %  dedicated to making software imaging solutions freely available.           %
  22 %                                                                             %
  23 %  You may not use this file except in compliance with the License.  You may  %
  24 %  obtain a copy of the License at                                            %
  25 %                                                                             %
  26 %    http://www.imagemagick.org/script/license.php                            %
  27 %                                                                             %
  28 %  Unless required by applicable law or agreed to in writing, software        %
  29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
  30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
  31 %  See the License for the specific language governing permissions and        %
  32 %  limitations under the License.                                             %
  33 %                                                                             %
  34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  35 %
  36 %
  37 %
  38 */
  39 \f
  40 /*
  41   Include declarations.
  42 */
  43 #include "MagickCore/studio.h"
  44 #include "MagickCore/exception.h"
  45 #include "MagickCore/exception-private.h"
  46 #include "MagickCore/image.h"
  47 #include "MagickCore/memory_.h"
  48 #include "MagickCore/string_.h"
  49 #include "MagickCore/string-private.h"
  50 #include "MagickCore/token.h"
  51 #include "MagickCore/token-private.h"
  52 #include "MagickCore/utility.h"
  53 \f
  54 /*
  55   Typedef declaractions.
  56 */
  57 struct _TokenInfo
  58 {
  59   int
  60     state;
  61
  62   MagickStatusType
  63     flag;
  64
  65   ssize_t
  66     offset;
  67
  68   char
  69     quote;
  70
  71   size_t
  72     signature;
  73 };
  74 \f
  75 /*
  76 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  77 %                                                                             %
  78 %                                                                             %
  79 %                                                                             %
  80 %   A c q u i r e T o k e n I n f o                                           %
  81 %                                                                             %
  82 %                                                                             %
  83 %                                                                             %
  84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  85 %
  86 %  AcquireTokenInfo() allocates the TokenInfo structure.
  87 %
  88 %  The format of the AcquireTokenInfo method is:
  89 %
  90 %      TokenInfo *AcquireTokenInfo()
  91 %
  92 */
  93 MagickExport TokenInfo *AcquireTokenInfo(void)
  94 {
  95   TokenInfo
  96     *token_info;
  97
  98   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
  99   if (token_info == (TokenInfo *) NULL)
 100     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
 101   token_info->signature=MagickSignature;
 102   return(token_info);
 103 }
 104 \f
 105 /*
 106 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 107 %                                                                             %
 108 %                                                                             %
 109 %                                                                             %
 110 %   D e s t r o y T o k e n I n f o                                           %
 111 %                                                                             %
 112 %                                                                             %
 113 %                                                                             %
 114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 115 %
 116 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
 117 %  structure.
 118 %
 119 %  The format of the DestroyTokenInfo method is:
 120 %
 121 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 122 %
 123 %  A description of each parameter follows:
 124 %
 125 %    o token_info: Specifies a pointer to an TokenInfo structure.
 126 %
 127 */
 128 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
 129 {
 130   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
 131   assert(token_info != (TokenInfo *) NULL);
 132   assert(token_info->signature == MagickSignature);
 133   token_info->signature=(~MagickSignature);
 134   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
 135   return(token_info);
 136 }
 137 \f
 138 /*
 139 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 140 %                                                                             %
 141 %                                                                             %
 142 %                                                                             %
 143 +   G e t M a g i c k T o k e n                                               %
 144 %                                                                             %
 145 %                                                                             %
 146 %                                                                             %
 147 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 148 %
 149 %  GetMagickToken() gets a token from the token stream.  A token is defined as
 150 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
 151 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
 152 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
 153 %  separator characters: ':', '=', ',', and ';'.
 154 %
 155 %  The format of the GetMagickToken method is:
 156 %
 157 %      void GetMagickToken(const char *start,const char **end,char *token)
 158 %
 159 %  A description of each parameter follows:
 160 %
 161 %    o start: the start of the token sequence.
 162 %
 163 %    o end: point to the end of the token sequence.
 164 %
 165 %    o token: copy the token to this buffer.
 166 %
 167 */
 168 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
 169 {
 170   double
 171     value;
 172
 173   register const char
 174     *p;
 175
 176   register ssize_t
 177     i;
 178
 179   assert(start != (const char *) NULL);
 180   assert(token != (char *) NULL);
 181   i=0;
 182   for (p=start; *p != '\0'; )
 183   {
 184     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
 185       p++;
 186     if (*p == '\0')
 187       break;
 188     switch (*p)
 189     {
 190       case '"':
 191       case '\'':
 192       case '`':
 193       case '{':
 194       {
 195         register char
 196           escape;
 197
 198         switch (*p)
 199         {
 200           case '"': escape='"'; break;
 201           case '\'': escape='\''; break;
 202           case '`': escape='\''; break;
 203           case '{': escape='}'; break;
 204           default: escape=(*p); break;
 205         }
 206         for (p++; *p != '\0'; p++)
 207         {
 208           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
 209             p++;
 210           else
 211             if (*p == escape)
 212               {
 213                 p++;
 214                 break;
 215               }
 216           token[i++]=(*p);
 217         }
 218         break;
 219       }
 220       case '/':
 221       {
 222         token[i++]=(*p++);
 223         if ((*p == '>') || (*p == '/'))
 224           token[i++]=(*p++);
 225         break;
 226       }
 227       default:
 228       {
 229         char
 230           *q;
 231
 232         value=InterpretLocaleValue(p,&q);
 233         (void) value;
 234         if ((p != q) && (*p != ','))
 235           {
 236             for ( ; (p < q) && (*p != ','); p++)
 237               token[i++]=(*p);
 238             if (*p == '%')
 239               token[i++]=(*p++);
 240             break;
 241           }
 242         if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
 243             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
 244           {
 245             token[i++]=(*p++);
 246             break;
 247           }
 248         for ( ; *p != '\0'; p++)
 249         {
 250           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
 251               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
 252             break;
 253           if ((i > 0) && (*p == '<'))
 254             break;
 255           token[i++]=(*p);
 256           if (*p == '>')
 257             break;
 258           if (*p == '(')
 259             for (p++; *p != '\0'; p++)
 260             {
 261               token[i++]=(*p);
 262               if ((*p == ')') && (*(p-1) != '\\'))
 263                 break;
 264             }
 265         }
 266         break;
 267       }
 268     }
 269     break;
 270   }
 271   token[i]='\0';
 272   if (LocaleNCompare(token,"url(",4) == 0)
 273     {
 274       ssize_t
 275         offset;
 276
 277       offset=4;
 278       if (token[offset] == '#')
 279         offset++;
 280       i=(ssize_t) strlen(token);
 281       (void) CopyMagickString(token,token+offset,MaxTextExtent);
 282       token[i-offset-1]='\0';
 283     }
 284   while (isspace((int) ((unsigned char) *p)) != 0)
 285     p++;
 286   if (end != (const char **) NULL)
 287     *end=(const char *) p;
 288 }
 289 \f
 290 /*
 291 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 292 %                                                                             %
 293 %                                                                             %
 294 %                                                                             %
 295 %   G l o b E x p r e s s i o n                                               %
 296 %                                                                             %
 297 %                                                                             %
 298 %                                                                             %
 299 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 300 %
 301 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
 302 %
 303 %  The format of the GlobExpression function is:
 304 %
 305 %      MagickBooleanType GlobExpression(const char *expression,
 306 %        const char *pattern,const MagickBooleanType case_insensitive)
 307 %
 308 %  A description of each parameter follows:
 309 %
 310 %    o expression: Specifies a pointer to a text string containing a file name.
 311 %
 312 %    o pattern: Specifies a pointer to a text string containing a pattern.
 313 %
 314 %    o case_insensitive: set to MagickTrue to ignore the case when matching
 315 %      an expression.
 316 %
 317 */
 318 MagickExport MagickBooleanType GlobExpression(const char *expression,
 319   const char *pattern,const MagickBooleanType case_insensitive)
 320 {
 321   MagickBooleanType
 322     done,
 323     match;
 324
 325   register const char
 326     *p;
 327
 328   /*
 329     Return on empty pattern or '*'.
 330   */
 331   if (pattern == (char *) NULL)
 332     return(MagickTrue);
 333   if (GetUTFCode(pattern) == 0)
 334     return(MagickTrue);
 335   if (LocaleCompare(pattern,"*") == 0)
 336     return(MagickTrue);
 337   p=pattern+strlen(pattern)-1;
 338   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
 339     {
 340       ExceptionInfo
 341         *exception;
 342
 343       ImageInfo
 344         *image_info;
 345
 346       /*
 347         Determine if pattern is a scene, i.e. img0001.pcd[2].
 348       */
 349       image_info=AcquireImageInfo();
 350       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
 351       exception=AcquireExceptionInfo();
 352       (void) SetImageInfo(image_info,0,exception);
 353       exception=DestroyExceptionInfo(exception);
 354       if (LocaleCompare(image_info->filename,pattern) != 0)
 355         {
 356           image_info=DestroyImageInfo(image_info);
 357           return(MagickFalse);
 358         }
 359       image_info=DestroyImageInfo(image_info);
 360     }
 361   /*
 362     Evaluate glob expression.
 363   */
 364   done=MagickFalse;
 365   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
 366   {
 367     if (GetUTFCode(expression) == 0)
 368       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
 369         break;
 370     switch (GetUTFCode(pattern))
 371     {
 372       case '*':
 373       {
 374         MagickBooleanType
 375           status;
 376
 377         status=MagickFalse;
 378         pattern+=GetUTFOctets(pattern);
 379         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
 380         {
 381           status=GlobExpression(expression,pattern,case_insensitive);
 382           expression+=GetUTFOctets(expression);
 383         }
 384         if (status != MagickFalse)
 385           {
 386             while (GetUTFCode(expression) != 0)
 387               expression+=GetUTFOctets(expression);
 388             while (GetUTFCode(pattern) != 0)
 389               pattern+=GetUTFOctets(pattern);
 390           }
 391         break;
 392       }
 393       case '[':
 394       {
 395         int
 396           c;
 397
 398         pattern+=GetUTFOctets(pattern);
 399         for ( ; ; )
 400         {
 401           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
 402             {
 403               done=MagickTrue;
 404               break;
 405             }
 406           if (GetUTFCode(pattern) == '\\')
 407             {
 408               pattern+=GetUTFOctets(pattern);
 409               if (GetUTFCode(pattern) == 0)
 410                 {
 411                   done=MagickTrue;
 412                   break;
 413                 }
 414              }
 415           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
 416             {
 417               c=GetUTFCode(pattern);
 418               pattern+=GetUTFOctets(pattern);
 419               pattern+=GetUTFOctets(pattern);
 420               if (GetUTFCode(pattern) == ']')
 421                 {
 422                   done=MagickTrue;
 423                   break;
 424                 }
 425               if (GetUTFCode(pattern) == '\\')
 426                 {
 427                   pattern+=GetUTFOctets(pattern);
 428                   if (GetUTFCode(pattern) == 0)
 429                     {
 430                       done=MagickTrue;
 431                       break;
 432                     }
 433                 }
 434               if ((GetUTFCode(expression) < c) ||
 435                   (GetUTFCode(expression) > GetUTFCode(pattern)))
 436                 {
 437                   pattern+=GetUTFOctets(pattern);
 438                   continue;
 439                 }
 440             }
 441           else
 442             if (GetUTFCode(pattern) != GetUTFCode(expression))
 443               {
 444                 pattern+=GetUTFOctets(pattern);
 445                 continue;
 446               }
 447           pattern+=GetUTFOctets(pattern);
 448           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
 449           {
 450             if ((GetUTFCode(pattern) == '\\') &&
 451                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
 452               pattern+=GetUTFOctets(pattern);
 453             pattern+=GetUTFOctets(pattern);
 454           }
 455           if (GetUTFCode(pattern) != 0)
 456             {
 457               pattern+=GetUTFOctets(pattern);
 458               expression+=GetUTFOctets(expression);
 459             }
 460           break;
 461         }
 462         break;
 463       }
 464       case '?':
 465       {
 466         pattern+=GetUTFOctets(pattern);
 467         expression+=GetUTFOctets(expression);
 468         break;
 469       }
 470       case '{':
 471       {
 472         register const char
 473           *p;
 474
 475         pattern+=GetUTFOctets(pattern);
 476         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
 477         {
 478           p=expression;
 479           match=MagickTrue;
 480           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
 481                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
 482                  (match != MagickFalse))
 483           {
 484             if (GetUTFCode(pattern) == '\\')
 485               pattern+=GetUTFOctets(pattern);
 486             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
 487               MagickFalse;
 488             p+=GetUTFOctets(p);
 489             pattern+=GetUTFOctets(pattern);
 490           }
 491           if (GetUTFCode(pattern) == 0)
 492             {
 493               match=MagickFalse;
 494               done=MagickTrue;
 495               break;
 496             }
 497           else
 498             if (match != MagickFalse)
 499               {
 500                 expression=p;
 501                 while ((GetUTFCode(pattern) != '}') &&
 502                        (GetUTFCode(pattern) != 0))
 503                 {
 504                   pattern+=GetUTFOctets(pattern);
 505                   if (GetUTFCode(pattern) == '\\')
 506                     {
 507                       pattern+=GetUTFOctets(pattern);
 508                       if (GetUTFCode(pattern) == '}')
 509                         pattern+=GetUTFOctets(pattern);
 510                     }
 511                 }
 512               }
 513             else
 514               {
 515                 while ((GetUTFCode(pattern) != '}') &&
 516                        (GetUTFCode(pattern) != ',') &&
 517                        (GetUTFCode(pattern) != 0))
 518                 {
 519                   pattern+=GetUTFOctets(pattern);
 520                   if (GetUTFCode(pattern) == '\\')
 521                     {
 522                       pattern+=GetUTFOctets(pattern);
 523                       if ((GetUTFCode(pattern) == '}') ||
 524                           (GetUTFCode(pattern) == ','))
 525                         pattern+=GetUTFOctets(pattern);
 526                     }
 527                 }
 528               }
 529             if (GetUTFCode(pattern) != 0)
 530               pattern+=GetUTFOctets(pattern);
 531           }
 532         break;
 533       }
 534       case '\\':
 535       {
 536         pattern+=GetUTFOctets(pattern);
 537         if (GetUTFCode(pattern) == 0)
 538           break;
 539       }
 540       default:
 541       {
 542         if (case_insensitive != MagickFalse)
 543           {
 544             if (tolower((int) GetUTFCode(expression)) !=
 545                 tolower((int) GetUTFCode(pattern)))
 546               {
 547                 done=MagickTrue;
 548                 break;
 549               }
 550           }
 551         else
 552           if (GetUTFCode(expression) != GetUTFCode(pattern))
 553             {
 554               done=MagickTrue;
 555               break;
 556             }
 557         expression+=GetUTFOctets(expression);
 558         pattern+=GetUTFOctets(pattern);
 559       }
 560     }
 561   }
 562   while (GetUTFCode(pattern) == '*')
 563     pattern+=GetUTFOctets(pattern);
 564   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
 565     MagickTrue : MagickFalse;
 566   return(match);
 567 }
 568 \f
 569 /*
 570 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 571 %                                                                             %
 572 %                                                                             %
 573 %                                                                             %
 574 +     I s G l o b                                                             %
 575 %                                                                             %
 576 %                                                                             %
 577 %                                                                             %
 578 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 579 %
 580 %  IsGlob() returns MagickTrue if the path specification contains a globbing
 581 %  pattern.
 582 %
 583 %  The format of the IsGlob method is:
 584 %
 585 %      MagickBooleanType IsGlob(const char *geometry)
 586 %
 587 %  A description of each parameter follows:
 588 %
 589 %    o path: the path.
 590 %
 591 */
 592 MagickExport MagickBooleanType IsGlob(const char *path)
 593 {
 594   MagickBooleanType
 595     status;
 596
 597   if (IsPathAccessible(path) != MagickFalse)
 598     return(MagickFalse);
 599   status=(strchr(path,'*') != (char *) NULL) ||
 600     (strchr(path,'?') != (char *) NULL) ||
 601     (strchr(path,'{') != (char *) NULL) ||
 602     (strchr(path,'}') != (char *) NULL) ||
 603     (strchr(path,'[') != (char *) NULL) ||
 604     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
 605   return(status);
 606 }
 607 \f
 608 /*
 609 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 610 %                                                                             %
 611 %                                                                             %
 612 %                                                                             %
 613 %   T o k e n i z e r                                                         %
 614 %                                                                             %
 615 %                                                                             %
 616 %                                                                             %
 617 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 618 %
 619 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
 620 %  one at a time from a string of characters.  The characters used for white
 621 %  space, for break characters, and for quotes can be specified.  Also,
 622 %  characters in the string can be preceded by a specifiable escape character
 623 %  which removes any special meaning the character may have.
 624 %
 625 %  Here is some terminology:
 626 %
 627 %    o token: A single unit of information in the form of a group of
 628 %      characters.
 629 %
 630 %    o white space: Apace that gets ignored (except within quotes or when
 631 %      escaped), like blanks and tabs. in addition, white space terminates a
 632 %      non-quoted token.
 633 %
 634 %    o break set: One or more characters that separates non-quoted tokens.
 635 %      Commas are a common break character. The usage of break characters to
 636 %      signal the end of a token is the same as that of white space, except
 637 %      multiple break characters with nothing or only white space between
 638 %      generate a null token for each two break characters together.
 639 %
 640 %      For example, if blank is set to be the white space and comma is set to
 641 %      be the break character, the line
 642 %
 643 %        A, B, C ,  , DEF
 644 %
 645 %        ... consists of 5 tokens:
 646 %
 647 %        1)  "A"
 648 %        2)  "B"
 649 %        3)  "C"
 650 %        4)  "" (the null string)
 651 %        5)  "DEF"
 652 %
 653 %    o Quote character: A character that, when surrounding a group of other
 654 %      characters, causes the group of characters to be treated as a single
 655 %      token, no matter how many white spaces or break characters exist in
 656 %      the group. Also, a token always terminates after the closing quote.
 657 %      For example, if ' is the quote character, blank is white space, and
 658 %      comma is the break character, the following string
 659 %
 660 %        A, ' B, CD'EF GHI
 661 %
 662 %        ... consists of 4 tokens:
 663 %
 664 %        1)  "A"
 665 %        2)  " B, CD" (note the blanks & comma)
 666 %        3)  "EF"
 667 %        4)  "GHI"
 668 %
 669 %      The quote characters themselves do not appear in the resultant
 670 %      tokens.  The double quotes are delimiters i use here for
 671 %      documentation purposes only.
 672 %
 673 %    o Escape character: A character which itself is ignored but which
 674 %      causes the next character to be used as is.  ^ and \ are often used
 675 %      as escape characters. An escape in the last position of the string
 676 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
 677 %      and non-escape) character. For example, assume white space, break
 678 %      character, and quote are the same as in the above examples, and
 679 %      further, assume that ^ is the escape character. Then, in the string
 680 %
 681 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
 682 %
 683 %        ... there are 7 tokens:
 684 %
 685 %        1)  "ABC"
 686 %        2)  " DEF ' GH"
 687 %        3)  "I"
 688 %        4)  " "     (a lone blank)
 689 %        5)  "J"
 690 %        6)  "K L"
 691 %        7)  "^"     (passed as is at end of line)
 692 %
 693 %  The format of the Tokenizer method is:
 694 %
 695 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
 696 %        const size_t max_token_length,const char *line,const char *white,
 697 %        const char *break_set,const char *quote,const char escape,
 698 %        char *breaker,int *next,char *quoted)
 699 %
 700 %  A description of each parameter follows:
 701 %
 702 %    o flag: right now, only the low order 3 bits are used.
 703 %
 704 %        1 => convert non-quoted tokens to upper case
 705 %        2 => convert non-quoted tokens to lower case
 706 %        0 => do not convert non-quoted tokens
 707 %
 708 %    o token: a character string containing the returned next token
 709 %
 710 %    o max_token_length: the maximum size of "token".  Characters beyond
 711 %      "max_token_length" are truncated.
 712 %
 713 %    o string: the string to be parsed.
 714 %
 715 %    o white: a string of the valid white spaces.  example:
 716 %
 717 %        char whitesp[]={" \t"};
 718 %
 719 %      blank and tab will be valid white space.
 720 %
 721 %    o break: a string of the valid break characters. example:
 722 %
 723 %        char breakch[]={";,"};
 724 %
 725 %      semicolon and comma will be valid break characters.
 726 %
 727 %    o quote: a string of the valid quote characters. An example would be
 728 %
 729 %        char whitesp[]={"'\"");
 730 %
 731 %      (this causes single and double quotes to be valid) Note that a
 732 %      token starting with one of these characters needs the same quote
 733 %      character to terminate it.
 734 %
 735 %      for example:
 736 %
 737 %        "ABC '
 738 %
 739 %      is unterminated, but
 740 %
 741 %        "DEF" and 'GHI'
 742 %
 743 %      are properly terminated.  Note that different quote characters
 744 %      can appear on the same line; only for a given token do the quote
 745 %      characters have to be the same.
 746 %
 747 %    o escape: the escape character (NOT a string ... only one
 748 %      allowed). Use zero if none is desired.
 749 %
 750 %    o breaker: the break character used to terminate the current
 751 %      token.  If the token was quoted, this will be the quote used.  If
 752 %      the token is the last one on the line, this will be zero.
 753 %
 754 %    o next: this variable points to the first character of the
 755 %      next token.  it gets reset by "tokenizer" as it steps through the
 756 %      string.  Set it to 0 upon initialization, and leave it alone
 757 %      after that.  You can change it if you want to jump around in the
 758 %      string or re-parse from the beginning, but be careful.
 759 %
 760 %    o quoted: set to True if the token was quoted and MagickFalse
 761 %      if not.  You may need this information (for example:  in C, a
 762 %      string with quotes around it is a character string, while one
 763 %      without is an identifier).
 764 %
 765 %    o result: 0 if we haven't reached EOS (end of string), and 1
 766 %      if we have.
 767 %
 768 */
 769
 770 #define IN_WHITE 0
 771 #define IN_TOKEN 1
 772 #define IN_QUOTE 2
 773 #define IN_OZONE 3
 774
 775 static ssize_t sindex(int c,const char *string)
 776 {
 777   register const char
 778     *p;
 779
 780   for (p=string; *p != '\0'; p++)
 781     if (c == (int) (*p))
 782       return((ssize_t) (p-string));
 783   return(-1);
 784 }
 785
 786 static void StoreToken(TokenInfo *token_info,char *string,
 787   size_t max_token_length,int c)
 788 {
 789   register ssize_t
 790     i;
 791
 792   if ((token_info->offset < 0) ||
 793       ((size_t) token_info->offset >= (max_token_length-1)))
 794     return;
 795   i=token_info->offset++;
 796   string[i]=(char) c;
 797   if (token_info->state == IN_QUOTE)
 798     return;
 799   switch (token_info->flag & 0x03)
 800   {
 801     case 1:
 802     {
 803       string[i]=(char) toupper(c);
 804       break;
 805     }
 806     case 2:
 807     {
 808       string[i]=(char) tolower(c);
 809       break;
 810     }
 811     default:
 812       break;
 813   }
 814 }
 815
 816 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
 817   char *token,const size_t max_token_length,const char *line,const char *white,
 818   const char *break_set,const char *quote,const char escape,char *breaker,
 819   int *next,char *quoted)
 820 {
 821   int
 822     c;
 823
 824   register ssize_t
 825     i;
 826
 827   *breaker='\0';
 828   *quoted='\0';
 829   if (line[*next] == '\0')
 830     return(1);
 831   token_info->state=IN_WHITE;
 832   token_info->quote=(char) MagickFalse;
 833   token_info->flag=flag;
 834   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
 835   {
 836     c=(int) line[*next];
 837     i=sindex(c,break_set);
 838     if (i >= 0)
 839       {
 840         switch (token_info->state)
 841         {
 842           case IN_WHITE:
 843           case IN_TOKEN:
 844           case IN_OZONE:
 845           {
 846             (*next)++;
 847             *breaker=break_set[i];
 848             token[token_info->offset]='\0';
 849             return(0);
 850           }
 851           case IN_QUOTE:
 852           {
 853             StoreToken(token_info,token,max_token_length,c);
 854             break;
 855           }
 856         }
 857         continue;
 858       }
 859     i=sindex(c,quote);
 860     if (i >= 0)
 861       {
 862         switch (token_info->state)
 863         {
 864           case IN_WHITE:
 865           {
 866             token_info->state=IN_QUOTE;
 867             token_info->quote=quote[i];
 868             *quoted=(char) MagickTrue;
 869             break;
 870           }
 871           case IN_QUOTE:
 872           {
 873             if (quote[i] != token_info->quote)
 874               StoreToken(token_info,token,max_token_length,c);
 875             else
 876               {
 877                 token_info->state=IN_OZONE;
 878                 token_info->quote='\0';
 879               }
 880             break;
 881           }
 882           case IN_TOKEN:
 883           case IN_OZONE:
 884           {
 885             *breaker=(char) c;
 886             token[token_info->offset]='\0';
 887             return(0);
 888           }
 889         }
 890         continue;
 891       }
 892     i=sindex(c,white);
 893     if (i >= 0)
 894       {
 895         switch (token_info->state)
 896         {
 897           case IN_WHITE:
 898           case IN_OZONE:
 899             break;
 900           case IN_TOKEN:
 901           {
 902             token_info->state=IN_OZONE;
 903             break;
 904           }
 905           case IN_QUOTE:
 906           {
 907             StoreToken(token_info,token,max_token_length,c);
 908             break;
 909           }
 910         }
 911         continue;
 912       }
 913     if (c == (int) escape)
 914       {
 915         if (line[(*next)+1] == '\0')
 916           {
 917             *breaker='\0';
 918             StoreToken(token_info,token,max_token_length,c);
 919             (*next)++;
 920             token[token_info->offset]='\0';
 921             return(0);
 922           }
 923         switch (token_info->state)
 924         {
 925           case IN_WHITE:
 926           {
 927             (*next)--;
 928             token_info->state=IN_TOKEN;
 929             break;
 930           }
 931           case IN_TOKEN:
 932           case IN_QUOTE:
 933           {
 934             (*next)++;
 935             c=(int) line[*next];
 936             StoreToken(token_info,token,max_token_length,c);
 937             break;
 938           }
 939           case IN_OZONE:
 940           {
 941             token[token_info->offset]='\0';
 942             return(0);
 943           }
 944         }
 945         continue;
 946       }
 947     switch (token_info->state)
 948     {
 949       case IN_WHITE:
 950         token_info->state=IN_TOKEN;
 951       case IN_TOKEN:
 952       case IN_QUOTE:
 953       {
 954         StoreToken(token_info,token,max_token_length,c);
 955         break;
 956       }
 957       case IN_OZONE:
 958       {
 959         token[token_info->offset]='\0';
 960         return(0);
 961       }
 962     }
 963   }
 964   token[token_info->offset]='\0';
 965   return(0);
 966 }