]> granicus.if.org Git - imagemagick/blob - magick/token.c
(no commit message)
[imagemagick] / magick / token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                               John Cristy                                   %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    http://www.imagemagick.org/script/license.php                            %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 \f
40 /*
41   Include declarations.
42 */
43 #include "magick/studio.h"
44 #include "magick/exception.h"
45 #include "magick/exception-private.h"
46 #include "magick/image.h"
47 #include "magick/memory_.h"
48 #include "magick/string_.h"
49 #include "magick/token.h"
50 #include "magick/token-private.h"
51 #include "magick/utility.h"
52 \f
53 /*
54   Typedef declaractions.
55 */
56 struct _TokenInfo
57 {
58   int
59     state;
60
61   MagickStatusType
62     flag;
63
64   long
65     offset;
66
67   char
68     quote;
69
70   unsigned long
71     signature;
72 };
73 \f
74 /*
75 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76 %                                                                             %
77 %                                                                             %
78 %                                                                             %
79 %   A c q u i r e T o k e n I n f o                                           %
80 %                                                                             %
81 %                                                                             %
82 %                                                                             %
83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84 %
85 %  AcquireTokenInfo() allocates the TokenInfo structure.
86 %
87 %  The format of the AcquireTokenInfo method is:
88 %
89 %      TokenInfo *AcquireTokenInfo()
90 %
91 */
92 MagickExport TokenInfo *AcquireTokenInfo(void)
93 {
94   TokenInfo
95     *token_info;
96
97   token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
98   if (token_info == (TokenInfo *) NULL)
99     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100   token_info->signature=MagickSignature;
101   return(token_info);
102 }
103 \f
104 /*
105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106 %                                                                             %
107 %                                                                             %
108 %                                                                             %
109 %   D e s t r o y T o k e n I n f o                                           %
110 %                                                                             %
111 %                                                                             %
112 %                                                                             %
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114 %
115 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
116 %  structure.
117 %
118 %  The format of the DestroyTokenInfo method is:
119 %
120 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121 %
122 %  A description of each parameter follows:
123 %
124 %    o token_info: Specifies a pointer to an TokenInfo structure.
125 %
126 */
127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128 {
129   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130   assert(token_info != (TokenInfo *) NULL);
131   assert(token_info->signature == MagickSignature);
132   token_info->signature=(~MagickSignature);
133   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134   return(token_info);
135 }
136 \f
137 /*
138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139 %                                                                             %
140 %                                                                             %
141 %                                                                             %
142 +   G e t M a g i c k T o k e n                                               %
143 %                                                                             %
144 %                                                                             %
145 %                                                                             %
146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147 %
148 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
149 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
151 %  parenthesis (e.g. rgb(0,0,0)).
152 %
153 %  The format of the GetMagickToken method is:
154 %
155 %      void GetMagickToken(const char *start,const char **end,char *token)
156 %
157 %  A description of each parameter follows:
158 %
159 %    o start: the start of the token sequence.
160 %
161 %    o end: point to the end of the token sequence.
162 %
163 %    o token: copy the token to this buffer.
164 %
165 */
166 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
167 {
168   double
169     value;
170
171   register const char
172     *p;
173
174   register long
175     i;
176
177   i=0;
178   for (p=start; *p != '\0'; )
179   {
180     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
181       p++;
182     if (*p == '\0')
183       break;
184     switch (*p)
185     {
186       case '"':
187       case '\'':
188       case '`':
189       case '{':
190       {
191         register char
192           escape;
193
194         switch (*p)
195         {
196           case '"': escape='"'; break;
197           case '\'': escape='\''; break;
198           case '`': escape='\''; break;
199           case '{': escape='}'; break;
200           default: escape=(*p); break;
201         }
202         for (p++; *p != '\0'; p++)
203         {
204           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
205             p++;
206           else
207             if (*p == escape)
208               {
209                 p++;
210                 break;
211               }
212           token[i++]=(*p);
213         }
214         break;
215       }
216       case '/':
217       {
218         token[i++]=(*p++);
219         if ((*p == '>') || (*p == '/'))
220           token[i++]=(*p++);
221         break;
222       }
223       default:
224       {
225         char
226           *q;
227
228         value=strtod(p,&q);
229         if ((p != q) && (*p != ','))
230           {
231             for ( ; (p < q) && (*p != ','); p++)
232               token[i++]=(*p);
233             if (*p == '%')
234               token[i++]=(*p++);
235             break;
236           }
237         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
238             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
239           {
240             token[i++]=(*p++);
241             break;
242           }
243         for ( ; *p != '\0'; p++)
244         {
245           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
246               (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
247             break;
248           if ((i > 0) && (*p == '<'))
249             break;
250           token[i++]=(*p);
251           if (*p == '>')
252             break;
253           if (*p == '(')
254             for (p++; *p != '\0'; p++)
255             {
256               token[i++]=(*p);
257               if ((*p == ')') && (*(p-1) != '\\'))
258                 break;
259             }
260         }
261         break;
262       }
263     }
264     break;
265   }
266   token[i]='\0';
267   if (LocaleNCompare(token,"url(",4) == 0)
268     {
269       ssize_t
270         offset;
271
272       offset=4;
273       if (token[offset] == '#')
274         offset++;
275       i=(long) strlen(token);
276       (void) CopyMagickString(token,token+offset,MaxTextExtent);
277       token[i-offset-1]='\0';
278     }
279   while (isspace((int) ((unsigned char) *p)) != 0)
280     p++;
281   if (end != (const char **) NULL)
282     *end=(const char *) p;
283 }
284 \f
285 /*
286 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287 %                                                                             %
288 %                                                                             %
289 %                                                                             %
290 %   G l o b E x p r e s s i o n                                               %
291 %                                                                             %
292 %                                                                             %
293 %                                                                             %
294 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
295 %
296 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
297 %
298 %  The format of the GlobExpression function is:
299 %
300 %      MagickBooleanType GlobExpression(const char *expression,
301 %        const char *pattern,const MagickBooleanType case_insensitive)
302 %
303 %  A description of each parameter follows:
304 %
305 %    o expression: Specifies a pointer to a text string containing a file name.
306 %
307 %    o pattern: Specifies a pointer to a text string containing a pattern.
308 %
309 %    o case_insensitive: set to MagickTrue to ignore the case when matching
310 %      an expression.
311 %
312 */
313 MagickExport MagickBooleanType GlobExpression(const char *expression,
314   const char *pattern,const MagickBooleanType case_insensitive)
315 {
316   MagickBooleanType
317     done,
318     match;
319
320   register const char
321     *p;
322
323   /*
324     Return on empty pattern or '*'.
325   */
326   if (pattern == (char *) NULL)
327     return(MagickTrue);
328   if (GetUTFCode(pattern) == 0)
329     return(MagickTrue);
330   if (LocaleCompare(pattern,"*") == 0)
331     return(MagickTrue);
332   p=pattern+strlen(pattern)-1;
333   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
334     {
335       ExceptionInfo
336         *exception;
337
338       ImageInfo
339         *image_info;
340
341       /*
342         Determine if pattern is a scene, i.e. img0001.pcd[2].
343       */
344       image_info=AcquireImageInfo();
345       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
346       exception=AcquireExceptionInfo();
347       (void) SetImageInfo(image_info,0,exception);
348       exception=DestroyExceptionInfo(exception);
349       if (LocaleCompare(image_info->filename,pattern) != 0)
350         {
351           image_info=DestroyImageInfo(image_info);
352           return(MagickFalse);
353         }
354       image_info=DestroyImageInfo(image_info);
355     }
356   /*
357     Evaluate glob expression.
358   */
359   done=MagickFalse;
360   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
361   {
362     if (GetUTFCode(expression) == 0)
363       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
364         break;
365     switch (GetUTFCode(pattern))
366     {
367       case '*':
368       {
369         MagickBooleanType
370           status;
371
372         status=MagickFalse;
373         pattern+=GetUTFOctets(pattern);
374         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
375         {
376           status=GlobExpression(expression,pattern,case_insensitive);
377           expression+=GetUTFOctets(expression);
378         }
379         if (status != MagickFalse)
380           {
381             while (GetUTFCode(expression) != 0)
382               expression+=GetUTFOctets(expression);
383             while (GetUTFCode(pattern) != 0)
384               pattern+=GetUTFOctets(pattern);
385           }
386         break;
387       }
388       case '[':
389       {
390         long
391           c;
392
393         pattern+=GetUTFOctets(pattern);
394         for ( ; ; )
395         {
396           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
397             {
398               done=MagickTrue;
399               break;
400             }
401           if (GetUTFCode(pattern) == '\\')
402             {
403               pattern+=GetUTFOctets(pattern);
404               if (GetUTFCode(pattern) == 0)
405                 {
406                   done=MagickTrue;
407                   break;
408                 }
409              }
410           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
411             {
412               c=GetUTFCode(pattern);
413               pattern+=GetUTFOctets(pattern);
414               pattern+=GetUTFOctets(pattern);
415               if (GetUTFCode(pattern) == ']')
416                 {
417                   done=MagickTrue;
418                   break;
419                 }
420               if (GetUTFCode(pattern) == '\\')
421                 {
422                   pattern+=GetUTFOctets(pattern);
423                   if (GetUTFCode(pattern) == 0)
424                     {
425                       done=MagickTrue;
426                       break;
427                     }
428                 }
429               if ((GetUTFCode(expression) < c) ||
430                   (GetUTFCode(expression) > GetUTFCode(pattern)))
431                 {
432                   pattern+=GetUTFOctets(pattern);
433                   continue;
434                 }
435             }
436           else
437             if (GetUTFCode(pattern) != GetUTFCode(expression))
438               {
439                 pattern+=GetUTFOctets(pattern);
440                 continue;
441               }
442           pattern+=GetUTFOctets(pattern);
443           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
444           {
445             if ((GetUTFCode(pattern) == '\\') &&
446                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
447               pattern+=GetUTFOctets(pattern);
448             pattern+=GetUTFOctets(pattern);
449           }
450           if (GetUTFCode(pattern) != 0)
451             {
452               pattern+=GetUTFOctets(pattern);
453               expression+=GetUTFOctets(expression);
454             }
455           break;
456         }
457         break;
458       }
459       case '?':
460       {
461         pattern+=GetUTFOctets(pattern);
462         expression+=GetUTFOctets(expression);
463         break;
464       }
465       case '{':
466       {
467         register const char
468           *p;
469
470         pattern+=GetUTFOctets(pattern);
471         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
472         {
473           p=expression;
474           match=MagickTrue;
475           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
476                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
477                  (match != MagickFalse))
478           {
479             if (GetUTFCode(pattern) == '\\')
480               pattern+=GetUTFOctets(pattern);
481             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
482               MagickFalse;
483             p+=GetUTFOctets(p);
484             pattern+=GetUTFOctets(pattern);
485           }
486           if (GetUTFCode(pattern) == 0)
487             {
488               match=MagickFalse;
489               done=MagickTrue;
490               break;
491             }
492           else
493             if (match != MagickFalse)
494               {
495                 expression=p;
496                 while ((GetUTFCode(pattern) != '}') &&
497                        (GetUTFCode(pattern) != 0))
498                 {
499                   pattern+=GetUTFOctets(pattern);
500                   if (GetUTFCode(pattern) == '\\')
501                     {
502                       pattern+=GetUTFOctets(pattern);
503                       if (GetUTFCode(pattern) == '}')
504                         pattern+=GetUTFOctets(pattern);
505                     }
506                 }
507               }
508             else
509               {
510                 while ((GetUTFCode(pattern) != '}') &&
511                        (GetUTFCode(pattern) != ',') &&
512                        (GetUTFCode(pattern) != 0))
513                 {
514                   pattern+=GetUTFOctets(pattern);
515                   if (GetUTFCode(pattern) == '\\')
516                     {
517                       pattern+=GetUTFOctets(pattern);
518                       if ((GetUTFCode(pattern) == '}') ||
519                           (GetUTFCode(pattern) == ','))
520                         pattern+=GetUTFOctets(pattern);
521                     }
522                 }
523               }
524             if (GetUTFCode(pattern) != 0)
525               pattern+=GetUTFOctets(pattern);
526           }
527         break;
528       }
529       case '\\':
530       {
531         pattern+=GetUTFOctets(pattern);
532         if (GetUTFCode(pattern) == 0)
533           break;
534       }
535       default:
536       {
537         if (case_insensitive != MagickFalse)
538           {
539             if (tolower((int) GetUTFCode(expression)) !=
540                 tolower((int) GetUTFCode(pattern)))
541               {
542                 done=MagickTrue;
543                 break;
544               }
545           }
546         else
547           if (GetUTFCode(expression) != GetUTFCode(pattern))
548             {
549               done=MagickTrue;
550               break;
551             }
552         expression+=GetUTFOctets(expression);
553         pattern+=GetUTFOctets(pattern);
554       }
555     }
556   }
557   while (GetUTFCode(pattern) == '*')
558     pattern+=GetUTFOctets(pattern);
559   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
560     MagickTrue : MagickFalse;
561   return(match);
562 }
563 \f
564 /*
565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
566 %                                                                             %
567 %                                                                             %
568 %                                                                             %
569 +     I s G l o b                                                             %
570 %                                                                             %
571 %                                                                             %
572 %                                                                             %
573 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
574 %
575 %  IsGlob() returns MagickTrue if the path specification contains a globbing
576 %  pattern.
577 %
578 %  The format of the IsGlob method is:
579 %
580 %      MagickBooleanType IsGlob(const char *geometry)
581 %
582 %  A description of each parameter follows:
583 %
584 %    o path: the path.
585 %
586 */
587 MagickExport MagickBooleanType IsGlob(const char *path)
588 {
589   MagickBooleanType
590     status;
591
592   if (IsPathAccessible(path) != MagickFalse)
593     return(MagickFalse);
594   status=(strchr(path,'*') != (char *) NULL) ||
595     (strchr(path,'?') != (char *) NULL) ||
596     (strchr(path,'{') != (char *) NULL) ||
597     (strchr(path,'}') != (char *) NULL) ||
598     (strchr(path,'[') != (char *) NULL) ||
599     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
600   return(status);
601 }
602 \f
603 /*
604 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
605 %                                                                             %
606 %                                                                             %
607 %                                                                             %
608 %   T o k e n i z e r                                                         %
609 %                                                                             %
610 %                                                                             %
611 %                                                                             %
612 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
613 %
614 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
615 %  one at a time from a string of characters.  The characters used for white
616 %  space, for break characters, and for quotes can be specified.  Also,
617 %  characters in the string can be preceded by a specifiable escape character
618 %  which removes any special meaning the character may have.
619 %
620 %  Here is some terminology:
621 %
622 %    o token: A single unit of information in the form of a group of
623 %      characters.
624 %
625 %    o white space: Apace that gets ignored (except within quotes or when
626 %      escaped), like blanks and tabs. in addition, white space terminates a
627 %      non-quoted token.
628 %
629 %    o break set: One or more characters that separates non-quoted tokens.
630 %      Commas are a common break character. The usage of break characters to
631 %      signal the end of a token is the same as that of white space, except
632 %      multiple break characters with nothing or only white space between
633 %      generate a null token for each two break characters together.
634 %
635 %      For example, if blank is set to be the white space and comma is set to
636 %      be the break character, the line
637 %
638 %        A, B, C ,  , DEF
639 %
640 %        ... consists of 5 tokens:
641 %
642 %        1)  "A"
643 %        2)  "B"
644 %        3)  "C"
645 %        4)  "" (the null string)
646 %        5)  "DEF"
647 %
648 %    o Quote character: A character that, when surrounding a group of other
649 %      characters, causes the group of characters to be treated as a single
650 %      token, no matter how many white spaces or break characters exist in
651 %      the group. Also, a token always terminates after the closing quote.
652 %      For example, if ' is the quote character, blank is white space, and
653 %      comma is the break character, the following string
654 %
655 %        A, ' B, CD'EF GHI
656 %
657 %        ... consists of 4 tokens:
658 %
659 %        1)  "A"
660 %        2)  " B, CD" (note the blanks & comma)
661 %        3)  "EF"
662 %        4)  "GHI"
663 %
664 %      The quote characters themselves do not appear in the resultant
665 %      tokens.  The double quotes are delimiters i use here for
666 %      documentation purposes only.
667 %
668 %    o Escape character: A character which itself is ignored but which
669 %      causes the next character to be used as is.  ^ and \ are often used
670 %      as escape characters. An escape in the last position of the string
671 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
672 %      and non-escape) character. For example, assume white space, break
673 %      character, and quote are the same as in the above examples, and
674 %      further, assume that ^ is the escape character. Then, in the string
675 %
676 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
677 %
678 %        ... there are 7 tokens:
679 %
680 %        1)  "ABC"
681 %        2)  " DEF ' GH"
682 %        3)  "I"
683 %        4)  " "     (a lone blank)
684 %        5)  "J"
685 %        6)  "K L"
686 %        7)  "^"     (passed as is at end of line)
687 %
688 %  The format of the Tokenizer method is:
689 %
690 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
691 %        const size_t max_token_length,const char *line,const char *white,
692 %        const char *break_set,const char *quote,const char escape,
693 %        char *breaker,int *next,char *quoted)
694 %
695 %  A description of each parameter follows:
696 %
697 %    o flag: right now, only the low order 3 bits are used.
698 %
699 %        1 => convert non-quoted tokens to upper case
700 %        2 => convert non-quoted tokens to lower case
701 %        0 => do not convert non-quoted tokens
702 %
703 %    o token: a character string containing the returned next token
704 %
705 %    o max_token_length: the maximum size of "token".  Characters beyond
706 %      "max_token_length" are truncated.
707 %
708 %    o string: the string to be parsed.
709 %
710 %    o white: a string of the valid white spaces.  example:
711 %
712 %        char whitesp[]={" \t"};
713 %
714 %      blank and tab will be valid white space.
715 %
716 %    o break: a string of the valid break characters. example:
717 %
718 %        char breakch[]={";,"};
719 %
720 %      semicolon and comma will be valid break characters.
721 %
722 %    o quote: a string of the valid quote characters. An example would be
723 %
724 %        char whitesp[]={"'\"");
725 %
726 %      (this causes single and double quotes to be valid) Note that a
727 %      token starting with one of these characters needs the same quote
728 %      character to terminate it.
729 %
730 %      for example:
731 %
732 %        "ABC '
733 %
734 %      is unterminated, but
735 %
736 %        "DEF" and 'GHI'
737 %
738 %      are properly terminated.  Note that different quote characters
739 %      can appear on the same line; only for a given token do the quote
740 %      characters have to be the same.
741 %
742 %    o escape: the escape character (NOT a string ... only one
743 %      allowed). Use zero if none is desired.
744 %
745 %    o breaker: the break character used to terminate the current
746 %      token.  If the token was quoted, this will be the quote used.  If
747 %      the token is the last one on the line, this will be zero.
748 %
749 %    o next: this variable points to the first character of the
750 %      next token.  it gets reset by "tokenizer" as it steps through the
751 %      string.  Set it to 0 upon initialization, and leave it alone
752 %      after that.  You can change it if you want to jump around in the
753 %      string or re-parse from the beginning, but be careful.
754 %
755 %    o quoted: set to True if the token was quoted and MagickFalse
756 %      if not.  You may need this information (for example:  in C, a
757 %      string with quotes around it is a character string, while one
758 %      without is an identifier).
759 %
760 %    o result: 0 if we haven't reached EOS (end of string), and 1
761 %      if we have.
762 %
763 */
764
765 #define IN_WHITE 0
766 #define IN_TOKEN 1
767 #define IN_QUOTE 2
768 #define IN_OZONE 3
769
770 static long sindex(int c,const char *string)
771 {
772   register const char
773     *p;
774
775   for (p=string; *p != '\0'; p++)
776     if (c == (int) (*p))
777       return(p-string);
778   return(-1);
779 }
780
781 static void StoreToken(TokenInfo *token_info,char *string,
782   size_t max_token_length,int c)
783 {
784   register long
785     i;
786
787   if ((token_info->offset < 0) ||
788       ((size_t) token_info->offset >= (max_token_length-1)))
789     return;
790   i=token_info->offset++;
791   string[i]=(char) c;
792   if (token_info->state == IN_QUOTE)
793     return;
794   switch (token_info->flag & 0x03)
795   {
796     case 1:
797     {
798       string[i]=(char) toupper(c);
799       break;
800     }
801     case 2:
802     {
803       string[i]=(char) tolower(c);
804       break;
805     }
806     default:
807       break;
808   }
809 }
810
811 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
812   char *token,const size_t max_token_length,const char *line,const char *white,
813   const char *break_set,const char *quote,const char escape,char *breaker,
814   int *next,char *quoted)
815 {
816   int
817     c;
818
819   register long
820     i;
821
822   *breaker='\0';
823   *quoted='\0';
824   if (line[*next] == '\0')
825     return(1);
826   token_info->state=IN_WHITE;
827   token_info->quote=(char) MagickFalse;
828   token_info->flag=flag;
829   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
830   {
831     c=(int) line[*next];
832     i=sindex(c,break_set);
833     if (i >= 0)
834       {
835         switch (token_info->state)
836         {
837           case IN_WHITE:
838           case IN_TOKEN:
839           case IN_OZONE:
840           {
841             (*next)++;
842             *breaker=break_set[i];
843             token[token_info->offset]='\0';
844             return(0);
845           }
846           case IN_QUOTE:
847           {
848             StoreToken(token_info,token,max_token_length,c);
849             break;
850           }
851         }
852         continue;
853       }
854     i=sindex(c,quote);
855     if (i >= 0)
856       {
857         switch (token_info->state)
858         {
859           case IN_WHITE:
860           {
861             token_info->state=IN_QUOTE;
862             token_info->quote=quote[i];
863             *quoted=(char) MagickTrue;
864             break;
865           }
866           case IN_QUOTE:
867           {
868             if (quote[i] != token_info->quote)
869               StoreToken(token_info,token,max_token_length,c);
870             else
871               {
872                 token_info->state=IN_OZONE;
873                 token_info->quote='\0';
874               }
875             break;
876           }
877           case IN_TOKEN:
878           case IN_OZONE:
879           {
880             *breaker=(char) c;
881             token[token_info->offset]='\0';
882             return(0);
883           }
884         }
885         continue;
886       }
887     i=sindex(c,white);
888     if (i >= 0)
889       {
890         switch (token_info->state)
891         {
892           case IN_WHITE:
893           case IN_OZONE:
894             break;
895           case IN_TOKEN:
896           {
897             token_info->state=IN_OZONE;
898             break;
899           }
900           case IN_QUOTE:
901           {
902             StoreToken(token_info,token,max_token_length,c);
903             break;
904           }
905         }
906         continue;
907       }
908     if (c == (int) escape)
909       {
910         if (line[(*next)+1] == '\0')
911           {
912             *breaker='\0';
913             StoreToken(token_info,token,max_token_length,c);
914             (*next)++;
915             token[token_info->offset]='\0';
916             return(0);
917           }
918         switch (token_info->state)
919         {
920           case IN_WHITE:
921           {
922             (*next)--;
923             token_info->state=IN_TOKEN;
924             break;
925           }
926           case IN_TOKEN:
927           case IN_QUOTE:
928           {
929             (*next)++;
930             c=(int) line[*next];
931             StoreToken(token_info,token,max_token_length,c);
932             break;
933           }
934           case IN_OZONE:
935           {
936             token[token_info->offset]='\0';
937             return(0);
938           }
939         }
940         continue;
941       }
942     switch (token_info->state)
943     {
944       case IN_WHITE:
945         token_info->state=IN_TOKEN;
946       case IN_TOKEN:
947       case IN_QUOTE:
948       {
949         StoreToken(token_info,token,max_token_length,c);
950         break;
951       }
952       case IN_OZONE:
953       {
954         token[token_info->offset]='\0';
955         return(0);
956       }
957     }
958   }
959   token[token_info->offset]='\0';
960   return(0);
961 }