]> granicus.if.org Git - imagemagick/blob - magick/token.c
(no commit message)
[imagemagick] / magick / token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                               John Cristy                                   %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    http://www.imagemagick.org/script/license.php                            %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 \f
40 /*
41   Include declarations.
42 */
43 #include "magick/studio.h"
44 #include "magick/exception.h"
45 #include "magick/exception-private.h"
46 #include "magick/image.h"
47 #include "magick/memory_.h"
48 #include "magick/string_.h"
49 #include "magick/string-private.h"
50 #include "magick/token.h"
51 #include "magick/token-private.h"
52 #include "magick/utility.h"
53 \f
54 /*
55   Typedef declaractions.
56 */
57 struct _TokenInfo
58 {
59   int
60     state;
61
62   MagickStatusType
63     flag;
64
65   ssize_t
66     offset;
67
68   char
69     quote;
70
71   size_t
72     signature;
73 };
74 \f
75 /*
76 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
77 %                                                                             %
78 %                                                                             %
79 %                                                                             %
80 %   A c q u i r e T o k e n I n f o                                           %
81 %                                                                             %
82 %                                                                             %
83 %                                                                             %
84 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
85 %
86 %  AcquireTokenInfo() allocates the TokenInfo structure.
87 %
88 %  The format of the AcquireTokenInfo method is:
89 %
90 %      TokenInfo *AcquireTokenInfo()
91 %
92 */
93 MagickExport TokenInfo *AcquireTokenInfo(void)
94 {
95   TokenInfo
96     *token_info;
97
98   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
99   if (token_info == (TokenInfo *) NULL)
100     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
101   token_info->signature=MagickSignature;
102   return(token_info);
103 }
104 \f
105 /*
106 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
107 %                                                                             %
108 %                                                                             %
109 %                                                                             %
110 %   D e s t r o y T o k e n I n f o                                           %
111 %                                                                             %
112 %                                                                             %
113 %                                                                             %
114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115 %
116 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
117 %  structure.
118 %
119 %  The format of the DestroyTokenInfo method is:
120 %
121 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
122 %
123 %  A description of each parameter follows:
124 %
125 %    o token_info: Specifies a pointer to an TokenInfo structure.
126 %
127 */
128 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
129 {
130   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
131   assert(token_info != (TokenInfo *) NULL);
132   assert(token_info->signature == MagickSignature);
133   token_info->signature=(~MagickSignature);
134   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
135   return(token_info);
136 }
137 \f
138 /*
139 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
140 %                                                                             %
141 %                                                                             %
142 %                                                                             %
143 +   G e t M a g i c k T o k e n                                               %
144 %                                                                             %
145 %                                                                             %
146 %                                                                             %
147 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148 %
149 %  GetMagickToken() gets a token from the token stream.  A token is defined as
150 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
151 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
152 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
153 %  separator characters: ':', '=', ',', and ';'.
154 %
155 %  The format of the GetMagickToken method is:
156 %
157 %      void GetMagickToken(const char *start,const char **end,char *token)
158 %
159 %  A description of each parameter follows:
160 %
161 %    o start: the start of the token sequence.
162 %
163 %    o end: point to the end of the token sequence.
164 %
165 %    o token: copy the token to this buffer.
166 %
167 */
168 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
169 {
170   double
171     value;
172
173   register const char
174     *p;
175
176   register ssize_t
177     i;
178
179   assert(start != (const char *) NULL);
180   assert(token != (char *) NULL);
181   i=0;
182   for (p=start; *p != '\0'; )
183   {
184     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
185       p++;
186     if (*p == '\0')
187       break;
188     switch (*p)
189     {
190       case '"':
191       case '\'':
192       case '`':
193       case '{':
194       {
195         register char
196           escape;
197
198         switch (*p)
199         {
200           case '"': escape='"'; break;
201           case '\'': escape='\''; break;
202           case '`': escape='\''; break;
203           case '{': escape='}'; break;
204           default: escape=(*p); break;
205         }
206         for (p++; *p != '\0'; p++)
207         {
208           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
209             p++;
210           else
211             if (*p == escape)
212               {
213                 p++;
214                 break;
215               }
216           token[i++]=(*p);
217         }
218         break;
219       }
220       case '/':
221       {
222         token[i++]=(*p++);
223         if ((*p == '>') || (*p == '/'))
224           token[i++]=(*p++);
225         break;
226       }
227       default:
228       {
229         char
230           *q;
231
232         value=InterpretLocaleValue(p,&q);
233         (void) value;
234         if ((p != q) && (*p != ','))
235           {
236             for ( ; (p < q) && (*p != ','); p++)
237               token[i++]=(*p);
238             if (*p == '%')
239               token[i++]=(*p++);
240             break;
241           }
242         if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
243             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
244           {
245             token[i++]=(*p++);
246             break;
247           }
248         for ( ; *p != '\0'; p++)
249         {
250           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
251               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
252             break;
253           if ((i > 0) && (*p == '<'))
254             break;
255           token[i++]=(*p);
256           if (*p == '>')
257             break;
258           if (*p == '(')
259             for (p++; *p != '\0'; p++)
260             {
261               token[i++]=(*p);
262               if ((*p == ')') && (*(p-1) != '\\'))
263                 break;
264             }
265         }
266         break;
267       }
268     }
269     break;
270   }
271   token[i]='\0';
272   if (LocaleNCompare(token,"url(",4) == 0)
273     {
274       ssize_t
275         offset;
276
277       offset=4;
278       if (token[offset] == '#')
279         offset++;
280       i=(ssize_t) strlen(token);
281       (void) CopyMagickString(token,token+offset,MaxTextExtent);
282       token[i-offset-1]='\0';
283     }
284   while (isspace((int) ((unsigned char) *p)) != 0)
285     p++;
286   if (end != (const char **) NULL)
287     *end=(const char *) p;
288 }
289 \f
290 /*
291 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
292 %                                                                             %
293 %                                                                             %
294 %                                                                             %
295 %   G l o b E x p r e s s i o n                                               %
296 %                                                                             %
297 %                                                                             %
298 %                                                                             %
299 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
300 %
301 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
302 %
303 %  The format of the GlobExpression function is:
304 %
305 %      MagickBooleanType GlobExpression(const char *expression,
306 %        const char *pattern,const MagickBooleanType case_insensitive)
307 %
308 %  A description of each parameter follows:
309 %
310 %    o expression: Specifies a pointer to a text string containing a file name.
311 %
312 %    o pattern: Specifies a pointer to a text string containing a pattern.
313 %
314 %    o case_insensitive: set to MagickTrue to ignore the case when matching
315 %      an expression.
316 %
317 */
318 MagickExport MagickBooleanType GlobExpression(const char *expression,
319   const char *pattern,const MagickBooleanType case_insensitive)
320 {
321   MagickBooleanType
322     done,
323     match;
324
325   register const char
326     *p;
327
328   /*
329     Return on empty pattern or '*'.
330   */
331   if (pattern == (char *) NULL)
332     return(MagickTrue);
333   if (GetUTFCode(pattern) == 0)
334     return(MagickTrue);
335   if (LocaleCompare(pattern,"*") == 0)
336     return(MagickTrue);
337   p=pattern+strlen(pattern)-1;
338   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
339     {
340       ExceptionInfo
341         *exception;
342
343       ImageInfo
344         *image_info;
345
346       /*
347         Determine if pattern is a scene, i.e. img0001.pcd[2].
348       */
349       image_info=AcquireImageInfo();
350       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
351       exception=AcquireExceptionInfo();
352       (void) SetImageInfo(image_info,0,exception);
353       exception=DestroyExceptionInfo(exception);
354       if (LocaleCompare(image_info->filename,pattern) != 0)
355         {
356           image_info=DestroyImageInfo(image_info);
357           return(MagickFalse);
358         }
359       image_info=DestroyImageInfo(image_info);
360     }
361   /*
362     Evaluate glob expression.
363   */
364   done=MagickFalse;
365   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
366   {
367     if (GetUTFCode(expression) == 0)
368       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
369         break;
370     switch (GetUTFCode(pattern))
371     {
372       case '*':
373       {
374         MagickBooleanType
375           status;
376
377         status=MagickFalse;
378         pattern+=GetUTFOctets(pattern);
379         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
380         {
381           status=GlobExpression(expression,pattern,case_insensitive);
382           expression+=GetUTFOctets(expression);
383         }
384         if (status != MagickFalse)
385           {
386             while (GetUTFCode(expression) != 0)
387               expression+=GetUTFOctets(expression);
388             while (GetUTFCode(pattern) != 0)
389               pattern+=GetUTFOctets(pattern);
390           }
391         break;
392       }
393       case '[':
394       {
395         int
396           c;
397
398         pattern+=GetUTFOctets(pattern);
399         for ( ; ; )
400         {
401           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
402             {
403               done=MagickTrue;
404               break;
405             }
406           if (GetUTFCode(pattern) == '\\')
407             {
408               pattern+=GetUTFOctets(pattern);
409               if (GetUTFCode(pattern) == 0)
410                 {
411                   done=MagickTrue;
412                   break;
413                 }
414              }
415           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
416             {
417               c=GetUTFCode(pattern);
418               pattern+=GetUTFOctets(pattern);
419               pattern+=GetUTFOctets(pattern);
420               if (GetUTFCode(pattern) == ']')
421                 {
422                   done=MagickTrue;
423                   break;
424                 }
425               if (GetUTFCode(pattern) == '\\')
426                 {
427                   pattern+=GetUTFOctets(pattern);
428                   if (GetUTFCode(pattern) == 0)
429                     {
430                       done=MagickTrue;
431                       break;
432                     }
433                 }
434               if ((GetUTFCode(expression) < c) ||
435                   (GetUTFCode(expression) > GetUTFCode(pattern)))
436                 {
437                   pattern+=GetUTFOctets(pattern);
438                   continue;
439                 }
440             }
441           else
442             if (GetUTFCode(pattern) != GetUTFCode(expression))
443               {
444                 pattern+=GetUTFOctets(pattern);
445                 continue;
446               }
447           pattern+=GetUTFOctets(pattern);
448           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
449           {
450             if ((GetUTFCode(pattern) == '\\') &&
451                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
452               pattern+=GetUTFOctets(pattern);
453             pattern+=GetUTFOctets(pattern);
454           }
455           if (GetUTFCode(pattern) != 0)
456             {
457               pattern+=GetUTFOctets(pattern);
458               expression+=GetUTFOctets(expression);
459             }
460           break;
461         }
462         break;
463       }
464       case '?':
465       {
466         pattern+=GetUTFOctets(pattern);
467         expression+=GetUTFOctets(expression);
468         break;
469       }
470       case '{':
471       {
472         register const char
473           *p;
474
475         pattern+=GetUTFOctets(pattern);
476         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
477         {
478           p=expression;
479           match=MagickTrue;
480           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
481                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
482                  (match != MagickFalse))
483           {
484             if (GetUTFCode(pattern) == '\\')
485               pattern+=GetUTFOctets(pattern);
486             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
487               MagickFalse;
488             p+=GetUTFOctets(p);
489             pattern+=GetUTFOctets(pattern);
490           }
491           if (GetUTFCode(pattern) == 0)
492             {
493               match=MagickFalse;
494               done=MagickTrue;
495               break;
496             }
497           else
498             if (match != MagickFalse)
499               {
500                 expression=p;
501                 while ((GetUTFCode(pattern) != '}') &&
502                        (GetUTFCode(pattern) != 0))
503                 {
504                   pattern+=GetUTFOctets(pattern);
505                   if (GetUTFCode(pattern) == '\\')
506                     {
507                       pattern+=GetUTFOctets(pattern);
508                       if (GetUTFCode(pattern) == '}')
509                         pattern+=GetUTFOctets(pattern);
510                     }
511                 }
512               }
513             else
514               {
515                 while ((GetUTFCode(pattern) != '}') &&
516                        (GetUTFCode(pattern) != ',') &&
517                        (GetUTFCode(pattern) != 0))
518                 {
519                   pattern+=GetUTFOctets(pattern);
520                   if (GetUTFCode(pattern) == '\\')
521                     {
522                       pattern+=GetUTFOctets(pattern);
523                       if ((GetUTFCode(pattern) == '}') ||
524                           (GetUTFCode(pattern) == ','))
525                         pattern+=GetUTFOctets(pattern);
526                     }
527                 }
528               }
529             if (GetUTFCode(pattern) != 0)
530               pattern+=GetUTFOctets(pattern);
531           }
532         break;
533       }
534       case '\\':
535       {
536         pattern+=GetUTFOctets(pattern);
537         if (GetUTFCode(pattern) == 0)
538           break;
539       }
540       default:
541       {
542         if (case_insensitive != MagickFalse)
543           {
544             if (tolower((int) GetUTFCode(expression)) !=
545                 tolower((int) GetUTFCode(pattern)))
546               {
547                 done=MagickTrue;
548                 break;
549               }
550           }
551         else
552           if (GetUTFCode(expression) != GetUTFCode(pattern))
553             {
554               done=MagickTrue;
555               break;
556             }
557         expression+=GetUTFOctets(expression);
558         pattern+=GetUTFOctets(pattern);
559       }
560     }
561   }
562   while (GetUTFCode(pattern) == '*')
563     pattern+=GetUTFOctets(pattern);
564   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
565     MagickTrue : MagickFalse;
566   return(match);
567 }
568 \f
569 /*
570 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
571 %                                                                             %
572 %                                                                             %
573 %                                                                             %
574 +     I s G l o b                                                             %
575 %                                                                             %
576 %                                                                             %
577 %                                                                             %
578 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
579 %
580 %  IsGlob() returns MagickTrue if the path specification contains a globbing
581 %  pattern.
582 %
583 %  The format of the IsGlob method is:
584 %
585 %      MagickBooleanType IsGlob(const char *geometry)
586 %
587 %  A description of each parameter follows:
588 %
589 %    o path: the path.
590 %
591 */
592 MagickExport MagickBooleanType IsGlob(const char *path)
593 {
594   MagickBooleanType
595     status;
596
597   if (IsPathAccessible(path) != MagickFalse)
598     return(MagickFalse);
599   status=(strchr(path,'*') != (char *) NULL) ||
600     (strchr(path,'?') != (char *) NULL) ||
601     (strchr(path,'{') != (char *) NULL) ||
602     (strchr(path,'}') != (char *) NULL) ||
603     (strchr(path,'[') != (char *) NULL) ||
604     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
605   return(status);
606 }
607 \f
608 /*
609 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
610 %                                                                             %
611 %                                                                             %
612 %                                                                             %
613 %   T o k e n i z e r                                                         %
614 %                                                                             %
615 %                                                                             %
616 %                                                                             %
617 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
618 %
619 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
620 %  one at a time from a string of characters.  The characters used for white
621 %  space, for break characters, and for quotes can be specified.  Also,
622 %  characters in the string can be preceded by a specifiable escape character
623 %  which removes any special meaning the character may have.
624 %
625 %  Here is some terminology:
626 %
627 %    o token: A single unit of information in the form of a group of
628 %      characters.
629 %
630 %    o white space: Apace that gets ignored (except within quotes or when
631 %      escaped), like blanks and tabs. in addition, white space terminates a
632 %      non-quoted token.
633 %
634 %    o break set: One or more characters that separates non-quoted tokens.
635 %      Commas are a common break character. The usage of break characters to
636 %      signal the end of a token is the same as that of white space, except
637 %      multiple break characters with nothing or only white space between
638 %      generate a null token for each two break characters together.
639 %
640 %      For example, if blank is set to be the white space and comma is set to
641 %      be the break character, the line
642 %
643 %        A, B, C ,  , DEF
644 %
645 %        ... consists of 5 tokens:
646 %
647 %        1)  "A"
648 %        2)  "B"
649 %        3)  "C"
650 %        4)  "" (the null string)
651 %        5)  "DEF"
652 %
653 %    o Quote character: A character that, when surrounding a group of other
654 %      characters, causes the group of characters to be treated as a single
655 %      token, no matter how many white spaces or break characters exist in
656 %      the group. Also, a token always terminates after the closing quote.
657 %      For example, if ' is the quote character, blank is white space, and
658 %      comma is the break character, the following string
659 %
660 %        A, ' B, CD'EF GHI
661 %
662 %        ... consists of 4 tokens:
663 %
664 %        1)  "A"
665 %        2)  " B, CD" (note the blanks & comma)
666 %        3)  "EF"
667 %        4)  "GHI"
668 %
669 %      The quote characters themselves do not appear in the resultant
670 %      tokens.  The double quotes are delimiters i use here for
671 %      documentation purposes only.
672 %
673 %    o Escape character: A character which itself is ignored but which
674 %      causes the next character to be used as is.  ^ and \ are often used
675 %      as escape characters. An escape in the last position of the string
676 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
677 %      and non-escape) character. For example, assume white space, break
678 %      character, and quote are the same as in the above examples, and
679 %      further, assume that ^ is the escape character. Then, in the string
680 %
681 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
682 %
683 %        ... there are 7 tokens:
684 %
685 %        1)  "ABC"
686 %        2)  " DEF ' GH"
687 %        3)  "I"
688 %        4)  " "     (a lone blank)
689 %        5)  "J"
690 %        6)  "K L"
691 %        7)  "^"     (passed as is at end of line)
692 %
693 %  The format of the Tokenizer method is:
694 %
695 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
696 %        const size_t max_token_length,const char *line,const char *white,
697 %        const char *break_set,const char *quote,const char escape,
698 %        char *breaker,int *next,char *quoted)
699 %
700 %  A description of each parameter follows:
701 %
702 %    o flag: right now, only the low order 3 bits are used.
703 %
704 %        1 => convert non-quoted tokens to upper case
705 %        2 => convert non-quoted tokens to lower case
706 %        0 => do not convert non-quoted tokens
707 %
708 %    o token: a character string containing the returned next token
709 %
710 %    o max_token_length: the maximum size of "token".  Characters beyond
711 %      "max_token_length" are truncated.
712 %
713 %    o string: the string to be parsed.
714 %
715 %    o white: a string of the valid white spaces.  example:
716 %
717 %        char whitesp[]={" \t"};
718 %
719 %      blank and tab will be valid white space.
720 %
721 %    o break: a string of the valid break characters. example:
722 %
723 %        char breakch[]={";,"};
724 %
725 %      semicolon and comma will be valid break characters.
726 %
727 %    o quote: a string of the valid quote characters. An example would be
728 %
729 %        char whitesp[]={"'\"");
730 %
731 %      (this causes single and double quotes to be valid) Note that a
732 %      token starting with one of these characters needs the same quote
733 %      character to terminate it.
734 %
735 %      for example:
736 %
737 %        "ABC '
738 %
739 %      is unterminated, but
740 %
741 %        "DEF" and 'GHI'
742 %
743 %      are properly terminated.  Note that different quote characters
744 %      can appear on the same line; only for a given token do the quote
745 %      characters have to be the same.
746 %
747 %    o escape: the escape character (NOT a string ... only one
748 %      allowed). Use zero if none is desired.
749 %
750 %    o breaker: the break character used to terminate the current
751 %      token.  If the token was quoted, this will be the quote used.  If
752 %      the token is the last one on the line, this will be zero.
753 %
754 %    o next: this variable points to the first character of the
755 %      next token.  it gets reset by "tokenizer" as it steps through the
756 %      string.  Set it to 0 upon initialization, and leave it alone
757 %      after that.  You can change it if you want to jump around in the
758 %      string or re-parse from the beginning, but be careful.
759 %
760 %    o quoted: set to True if the token was quoted and MagickFalse
761 %      if not.  You may need this information (for example:  in C, a
762 %      string with quotes around it is a character string, while one
763 %      without is an identifier).
764 %
765 %    o result: 0 if we haven't reached EOS (end of string), and 1
766 %      if we have.
767 %
768 */
769
770 #define IN_WHITE 0
771 #define IN_TOKEN 1
772 #define IN_QUOTE 2
773 #define IN_OZONE 3
774
775 static ssize_t sindex(int c,const char *string)
776 {
777   register const char
778     *p;
779
780   for (p=string; *p != '\0'; p++)
781     if (c == (int) (*p))
782       return((ssize_t) (p-string));
783   return(-1);
784 }
785
786 static void StoreToken(TokenInfo *token_info,char *string,
787   size_t max_token_length,int c)
788 {
789   register ssize_t
790     i;
791
792   if ((token_info->offset < 0) ||
793       ((size_t) token_info->offset >= (max_token_length-1)))
794     return;
795   i=token_info->offset++;
796   string[i]=(char) c;
797   if (token_info->state == IN_QUOTE)
798     return;
799   switch (token_info->flag & 0x03)
800   {
801     case 1:
802     {
803       string[i]=(char) toupper(c);
804       break;
805     }
806     case 2:
807     {
808       string[i]=(char) tolower(c);
809       break;
810     }
811     default:
812       break;
813   }
814 }
815
816 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
817   char *token,const size_t max_token_length,const char *line,const char *white,
818   const char *break_set,const char *quote,const char escape,char *breaker,
819   int *next,char *quoted)
820 {
821   int
822     c;
823
824   register ssize_t
825     i;
826
827   *breaker='\0';
828   *quoted='\0';
829   if (line[*next] == '\0')
830     return(1);
831   token_info->state=IN_WHITE;
832   token_info->quote=(char) MagickFalse;
833   token_info->flag=flag;
834   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
835   {
836     c=(int) line[*next];
837     i=sindex(c,break_set);
838     if (i >= 0)
839       {
840         switch (token_info->state)
841         {
842           case IN_WHITE:
843           case IN_TOKEN:
844           case IN_OZONE:
845           {
846             (*next)++;
847             *breaker=break_set[i];
848             token[token_info->offset]='\0';
849             return(0);
850           }
851           case IN_QUOTE:
852           {
853             StoreToken(token_info,token,max_token_length,c);
854             break;
855           }
856         }
857         continue;
858       }
859     i=sindex(c,quote);
860     if (i >= 0)
861       {
862         switch (token_info->state)
863         {
864           case IN_WHITE:
865           {
866             token_info->state=IN_QUOTE;
867             token_info->quote=quote[i];
868             *quoted=(char) MagickTrue;
869             break;
870           }
871           case IN_QUOTE:
872           {
873             if (quote[i] != token_info->quote)
874               StoreToken(token_info,token,max_token_length,c);
875             else
876               {
877                 token_info->state=IN_OZONE;
878                 token_info->quote='\0';
879               }
880             break;
881           }
882           case IN_TOKEN:
883           case IN_OZONE:
884           {
885             *breaker=(char) c;
886             token[token_info->offset]='\0';
887             return(0);
888           }
889         }
890         continue;
891       }
892     i=sindex(c,white);
893     if (i >= 0)
894       {
895         switch (token_info->state)
896         {
897           case IN_WHITE:
898           case IN_OZONE:
899             break;
900           case IN_TOKEN:
901           {
902             token_info->state=IN_OZONE;
903             break;
904           }
905           case IN_QUOTE:
906           {
907             StoreToken(token_info,token,max_token_length,c);
908             break;
909           }
910         }
911         continue;
912       }
913     if (c == (int) escape)
914       {
915         if (line[(*next)+1] == '\0')
916           {
917             *breaker='\0';
918             StoreToken(token_info,token,max_token_length,c);
919             (*next)++;
920             token[token_info->offset]='\0';
921             return(0);
922           }
923         switch (token_info->state)
924         {
925           case IN_WHITE:
926           {
927             (*next)--;
928             token_info->state=IN_TOKEN;
929             break;
930           }
931           case IN_TOKEN:
932           case IN_QUOTE:
933           {
934             (*next)++;
935             c=(int) line[*next];
936             StoreToken(token_info,token,max_token_length,c);
937             break;
938           }
939           case IN_OZONE:
940           {
941             token[token_info->offset]='\0';
942             return(0);
943           }
944         }
945         continue;
946       }
947     switch (token_info->state)
948     {
949       case IN_WHITE:
950         token_info->state=IN_TOKEN;
951       case IN_TOKEN:
952       case IN_QUOTE:
953       {
954         StoreToken(token_info,token,max_token_length,c);
955         break;
956       }
957       case IN_OZONE:
958       {
959         token[token_info->offset]='\0';
960         return(0);
961       }
962     }
963   }
964   token[token_info->offset]='\0';
965   return(0);
966 }