]> granicus.if.org Git - imagemagick/blob - MagickCore/token.c
(no commit message)
[imagemagick] / MagickCore / token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                               John Cristy                                   %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    http://www.imagemagick.org/script/license.php                            %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 \f
40 /*
41   Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/memory_.h"
48 #include "MagickCore/string_.h"
49 #include "MagickCore/string-private.h"
50 #include "MagickCore/token.h"
51 #include "MagickCore/token-private.h"
52 #include "MagickCore/utility.h"
53 #include "MagickCore/utility-private.h"
54 \f
55 /*
56   Typedef declaractions.
57 */
58 struct _TokenInfo
59 {
60   int
61     state;
62
63   MagickStatusType
64     flag;
65
66   ssize_t
67     offset;
68
69   char
70     quote;
71
72   size_t
73     signature;
74 };
75 \f
76 /*
77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78 %                                                                             %
79 %                                                                             %
80 %                                                                             %
81 %   A c q u i r e T o k e n I n f o                                           %
82 %                                                                             %
83 %                                                                             %
84 %                                                                             %
85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86 %
87 %  AcquireTokenInfo() allocates the TokenInfo structure.
88 %
89 %  The format of the AcquireTokenInfo method is:
90 %
91 %      TokenInfo *AcquireTokenInfo()
92 %
93 */
94 MagickExport TokenInfo *AcquireTokenInfo(void)
95 {
96   TokenInfo
97     *token_info;
98
99   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
100   if (token_info == (TokenInfo *) NULL)
101     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102   token_info->signature=MagickSignature;
103   return(token_info);
104 }
105 \f
106 /*
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108 %                                                                             %
109 %                                                                             %
110 %                                                                             %
111 %   D e s t r o y T o k e n I n f o                                           %
112 %                                                                             %
113 %                                                                             %
114 %                                                                             %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 %
117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 %  structure.
119 %
120 %  The format of the DestroyTokenInfo method is:
121 %
122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123 %
124 %  A description of each parameter follows:
125 %
126 %    o token_info: Specifies a pointer to an TokenInfo structure.
127 %
128 */
129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130 {
131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132   assert(token_info != (TokenInfo *) NULL);
133   assert(token_info->signature == MagickSignature);
134   token_info->signature=(~MagickSignature);
135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136   return(token_info);
137 }
138 \f
139 /*
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 %                                                                             %
142 %                                                                             %
143 %                                                                             %
144 +   G e t M a g i c k T o k e n                                               %
145 %                                                                             %
146 %                                                                             %
147 %                                                                             %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149 %
150 %  GetMagickToken() gets a token from the token stream.  A token is defined as
151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
154 %  separator characters: ':', '=', ',', and ';'.
155 %
156 %  The format of the GetMagickToken method is:
157 %
158 %      void GetMagickToken(const char *start,const char **end,char *token)
159 %
160 %  A description of each parameter follows:
161 %
162 %    o start: the start of the token sequence.
163 %
164 %    o end: point to the end of the token sequence.
165 %
166 %    o token: copy the token to this buffer.
167 %
168 */
169 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
170 {
171   double
172     value;
173
174   register const char
175     *p;
176
177   register ssize_t
178     i;
179
180   assert(start != (const char *) NULL);
181   assert(token != (char *) NULL);
182   i=0;
183   for (p=start; *p != '\0'; )
184   {
185     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
186       p++;
187     if (*p == '\0')
188       break;
189     switch (*p)
190     {
191       case '"':
192       case '\'':
193       case '`':
194       case '{':
195       {
196         register char
197           escape;
198
199         switch (*p)
200         {
201           case '"': escape='"'; break;
202           case '\'': escape='\''; break;
203           case '`': escape='\''; break;
204           case '{': escape='}'; break;
205           default: escape=(*p); break;
206         }
207         for (p++; *p != '\0'; p++)
208         {
209           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
210             p++;
211           else
212             if (*p == escape)
213               {
214                 p++;
215                 break;
216               }
217           token[i++]=(*p);
218         }
219         break;
220       }
221       case '/':
222       {
223         token[i++]=(*p++);
224         if ((*p == '>') || (*p == '/'))
225           token[i++]=(*p++);
226         break;
227       }
228       default:
229       {
230         char
231           *q;
232
233         value=InterpretLocaleValue(p,&q);
234         (void) value;
235         if ((p != q) && (*p != ','))
236           {
237             for ( ; (p < q) && (*p != ','); p++)
238               token[i++]=(*p);
239             if (*p == '%')
240               token[i++]=(*p++);
241             break;
242           }
243         if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
244             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
245           {
246             token[i++]=(*p++);
247             break;
248           }
249         for ( ; *p != '\0'; p++)
250         {
251           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
252               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
253             break;
254           if ((i > 0) && (*p == '<'))
255             break;
256           token[i++]=(*p);
257           if (*p == '>')
258             break;
259           if (*p == '(')
260             for (p++; *p != '\0'; p++)
261             {
262               token[i++]=(*p);
263               if ((*p == ')') && (*(p-1) != '\\'))
264                 break;
265             }
266         }
267         break;
268       }
269     }
270     break;
271   }
272   token[i]='\0';
273   if (LocaleNCompare(token,"url(",4) == 0)
274     {
275       ssize_t
276         offset;
277
278       offset=4;
279       if (token[offset] == '#')
280         offset++;
281       i=(ssize_t) strlen(token);
282       (void) CopyMagickString(token,token+offset,MaxTextExtent);
283       token[i-offset-1]='\0';
284     }
285   while (isspace((int) ((unsigned char) *p)) != 0)
286     p++;
287   if (end != (const char **) NULL)
288     *end=(const char *) p;
289 }
290 \f
291 /*
292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
293 %                                                                             %
294 %                                                                             %
295 %                                                                             %
296 %   G l o b E x p r e s s i o n                                               %
297 %                                                                             %
298 %                                                                             %
299 %                                                                             %
300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
301 %
302 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
303 %
304 %  The format of the GlobExpression function is:
305 %
306 %      MagickBooleanType GlobExpression(const char *expression,
307 %        const char *pattern,const MagickBooleanType case_insensitive)
308 %
309 %  A description of each parameter follows:
310 %
311 %    o expression: Specifies a pointer to a text string containing a file name.
312 %
313 %    o pattern: Specifies a pointer to a text string containing a pattern.
314 %
315 %    o case_insensitive: set to MagickTrue to ignore the case when matching
316 %      an expression.
317 %
318 */
319 MagickExport MagickBooleanType GlobExpression(const char *expression,
320   const char *pattern,const MagickBooleanType case_insensitive)
321 {
322   MagickBooleanType
323     done,
324     match;
325
326   register const char
327     *p;
328
329   /*
330     Return on empty pattern or '*'.
331   */
332   if (pattern == (char *) NULL)
333     return(MagickTrue);
334   if (GetUTFCode(pattern) == 0)
335     return(MagickTrue);
336   if (LocaleCompare(pattern,"*") == 0)
337     return(MagickTrue);
338   p=pattern+strlen(pattern)-1;
339   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
340     {
341       ExceptionInfo
342         *exception;
343
344       ImageInfo
345         *image_info;
346
347       /*
348         Determine if pattern is a scene, i.e. img0001.pcd[2].
349       */
350       image_info=AcquireImageInfo();
351       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
352       exception=AcquireExceptionInfo();
353       (void) SetImageInfo(image_info,0,exception);
354       exception=DestroyExceptionInfo(exception);
355       if (LocaleCompare(image_info->filename,pattern) != 0)
356         {
357           image_info=DestroyImageInfo(image_info);
358           return(MagickFalse);
359         }
360       image_info=DestroyImageInfo(image_info);
361     }
362   /*
363     Evaluate glob expression.
364   */
365   done=MagickFalse;
366   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
367   {
368     if (GetUTFCode(expression) == 0)
369       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
370         break;
371     switch (GetUTFCode(pattern))
372     {
373       case '*':
374       {
375         MagickBooleanType
376           status;
377
378         status=MagickFalse;
379         pattern+=GetUTFOctets(pattern);
380         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
381         {
382           status=GlobExpression(expression,pattern,case_insensitive);
383           expression+=GetUTFOctets(expression);
384         }
385         if (status != MagickFalse)
386           {
387             while (GetUTFCode(expression) != 0)
388               expression+=GetUTFOctets(expression);
389             while (GetUTFCode(pattern) != 0)
390               pattern+=GetUTFOctets(pattern);
391           }
392         break;
393       }
394       case '[':
395       {
396         int
397           c;
398
399         pattern+=GetUTFOctets(pattern);
400         for ( ; ; )
401         {
402           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
403             {
404               done=MagickTrue;
405               break;
406             }
407           if (GetUTFCode(pattern) == '\\')
408             {
409               pattern+=GetUTFOctets(pattern);
410               if (GetUTFCode(pattern) == 0)
411                 {
412                   done=MagickTrue;
413                   break;
414                 }
415              }
416           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
417             {
418               c=GetUTFCode(pattern);
419               pattern+=GetUTFOctets(pattern);
420               pattern+=GetUTFOctets(pattern);
421               if (GetUTFCode(pattern) == ']')
422                 {
423                   done=MagickTrue;
424                   break;
425                 }
426               if (GetUTFCode(pattern) == '\\')
427                 {
428                   pattern+=GetUTFOctets(pattern);
429                   if (GetUTFCode(pattern) == 0)
430                     {
431                       done=MagickTrue;
432                       break;
433                     }
434                 }
435               if ((GetUTFCode(expression) < c) ||
436                   (GetUTFCode(expression) > GetUTFCode(pattern)))
437                 {
438                   pattern+=GetUTFOctets(pattern);
439                   continue;
440                 }
441             }
442           else
443             if (GetUTFCode(pattern) != GetUTFCode(expression))
444               {
445                 pattern+=GetUTFOctets(pattern);
446                 continue;
447               }
448           pattern+=GetUTFOctets(pattern);
449           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
450           {
451             if ((GetUTFCode(pattern) == '\\') &&
452                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
453               pattern+=GetUTFOctets(pattern);
454             pattern+=GetUTFOctets(pattern);
455           }
456           if (GetUTFCode(pattern) != 0)
457             {
458               pattern+=GetUTFOctets(pattern);
459               expression+=GetUTFOctets(expression);
460             }
461           break;
462         }
463         break;
464       }
465       case '?':
466       {
467         pattern+=GetUTFOctets(pattern);
468         expression+=GetUTFOctets(expression);
469         break;
470       }
471       case '{':
472       {
473         register const char
474           *p;
475
476         pattern+=GetUTFOctets(pattern);
477         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
478         {
479           p=expression;
480           match=MagickTrue;
481           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
482                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
483                  (match != MagickFalse))
484           {
485             if (GetUTFCode(pattern) == '\\')
486               pattern+=GetUTFOctets(pattern);
487             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
488               MagickFalse;
489             p+=GetUTFOctets(p);
490             pattern+=GetUTFOctets(pattern);
491           }
492           if (GetUTFCode(pattern) == 0)
493             {
494               match=MagickFalse;
495               done=MagickTrue;
496               break;
497             }
498           else
499             if (match != MagickFalse)
500               {
501                 expression=p;
502                 while ((GetUTFCode(pattern) != '}') &&
503                        (GetUTFCode(pattern) != 0))
504                 {
505                   pattern+=GetUTFOctets(pattern);
506                   if (GetUTFCode(pattern) == '\\')
507                     {
508                       pattern+=GetUTFOctets(pattern);
509                       if (GetUTFCode(pattern) == '}')
510                         pattern+=GetUTFOctets(pattern);
511                     }
512                 }
513               }
514             else
515               {
516                 while ((GetUTFCode(pattern) != '}') &&
517                        (GetUTFCode(pattern) != ',') &&
518                        (GetUTFCode(pattern) != 0))
519                 {
520                   pattern+=GetUTFOctets(pattern);
521                   if (GetUTFCode(pattern) == '\\')
522                     {
523                       pattern+=GetUTFOctets(pattern);
524                       if ((GetUTFCode(pattern) == '}') ||
525                           (GetUTFCode(pattern) == ','))
526                         pattern+=GetUTFOctets(pattern);
527                     }
528                 }
529               }
530             if (GetUTFCode(pattern) != 0)
531               pattern+=GetUTFOctets(pattern);
532           }
533         break;
534       }
535       case '\\':
536       {
537         pattern+=GetUTFOctets(pattern);
538         if (GetUTFCode(pattern) == 0)
539           break;
540       }
541       default:
542       {
543         if (case_insensitive != MagickFalse)
544           {
545             if (tolower((int) GetUTFCode(expression)) !=
546                 tolower((int) GetUTFCode(pattern)))
547               {
548                 done=MagickTrue;
549                 break;
550               }
551           }
552         else
553           if (GetUTFCode(expression) != GetUTFCode(pattern))
554             {
555               done=MagickTrue;
556               break;
557             }
558         expression+=GetUTFOctets(expression);
559         pattern+=GetUTFOctets(pattern);
560       }
561     }
562   }
563   while (GetUTFCode(pattern) == '*')
564     pattern+=GetUTFOctets(pattern);
565   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
566     MagickTrue : MagickFalse;
567   return(match);
568 }
569 \f
570 /*
571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
572 %                                                                             %
573 %                                                                             %
574 %                                                                             %
575 +     I s G l o b                                                             %
576 %                                                                             %
577 %                                                                             %
578 %                                                                             %
579 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
580 %
581 %  IsGlob() returns MagickTrue if the path specification contains a globbing
582 %  pattern.
583 %
584 %  The format of the IsGlob method is:
585 %
586 %      MagickBooleanType IsGlob(const char *geometry)
587 %
588 %  A description of each parameter follows:
589 %
590 %    o path: the path.
591 %
592 */
593 MagickPrivate MagickBooleanType IsGlob(const char *path)
594 {
595   MagickBooleanType
596     status;
597
598   if (IsPathAccessible(path) != MagickFalse)
599     return(MagickFalse);
600   status=(strchr(path,'*') != (char *) NULL) ||
601     (strchr(path,'?') != (char *) NULL) ||
602     (strchr(path,'{') != (char *) NULL) ||
603     (strchr(path,'}') != (char *) NULL) ||
604     (strchr(path,'[') != (char *) NULL) ||
605     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
606   return(status);
607 }
608 \f
609 /*
610 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
611 %                                                                             %
612 %                                                                             %
613 %                                                                             %
614 %   T o k e n i z e r                                                         %
615 %                                                                             %
616 %                                                                             %
617 %                                                                             %
618 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
619 %
620 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
621 %  one at a time from a string of characters.  The characters used for white
622 %  space, for break characters, and for quotes can be specified.  Also,
623 %  characters in the string can be preceded by a specifiable escape character
624 %  which removes any special meaning the character may have.
625 %
626 %  Here is some terminology:
627 %
628 %    o token: A single unit of information in the form of a group of
629 %      characters.
630 %
631 %    o white space: Apace that gets ignored (except within quotes or when
632 %      escaped), like blanks and tabs. in addition, white space terminates a
633 %      non-quoted token.
634 %
635 %    o break set: One or more characters that separates non-quoted tokens.
636 %      Commas are a common break character. The usage of break characters to
637 %      signal the end of a token is the same as that of white space, except
638 %      multiple break characters with nothing or only white space between
639 %      generate a null token for each two break characters together.
640 %
641 %      For example, if blank is set to be the white space and comma is set to
642 %      be the break character, the line
643 %
644 %        A, B, C ,  , DEF
645 %
646 %        ... consists of 5 tokens:
647 %
648 %        1)  "A"
649 %        2)  "B"
650 %        3)  "C"
651 %        4)  "" (the null string)
652 %        5)  "DEF"
653 %
654 %    o Quote character: A character that, when surrounding a group of other
655 %      characters, causes the group of characters to be treated as a single
656 %      token, no matter how many white spaces or break characters exist in
657 %      the group. Also, a token always terminates after the closing quote.
658 %      For example, if ' is the quote character, blank is white space, and
659 %      comma is the break character, the following string
660 %
661 %        A, ' B, CD'EF GHI
662 %
663 %        ... consists of 4 tokens:
664 %
665 %        1)  "A"
666 %        2)  " B, CD" (note the blanks & comma)
667 %        3)  "EF"
668 %        4)  "GHI"
669 %
670 %      The quote characters themselves do not appear in the resultant
671 %      tokens.  The double quotes are delimiters i use here for
672 %      documentation purposes only.
673 %
674 %    o Escape character: A character which itself is ignored but which
675 %      causes the next character to be used as is.  ^ and \ are often used
676 %      as escape characters. An escape in the last position of the string
677 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
678 %      and non-escape) character. For example, assume white space, break
679 %      character, and quote are the same as in the above examples, and
680 %      further, assume that ^ is the escape character. Then, in the string
681 %
682 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
683 %
684 %        ... there are 7 tokens:
685 %
686 %        1)  "ABC"
687 %        2)  " DEF ' GH"
688 %        3)  "I"
689 %        4)  " "     (a lone blank)
690 %        5)  "J"
691 %        6)  "K L"
692 %        7)  "^"     (passed as is at end of line)
693 %
694 %  The format of the Tokenizer method is:
695 %
696 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
697 %        const size_t max_token_length,const char *line,const char *white,
698 %        const char *break_set,const char *quote,const char escape,
699 %        char *breaker,int *next,char *quoted)
700 %
701 %  A description of each parameter follows:
702 %
703 %    o flag: right now, only the low order 3 bits are used.
704 %
705 %        1 => convert non-quoted tokens to upper case
706 %        2 => convert non-quoted tokens to lower case
707 %        0 => do not convert non-quoted tokens
708 %
709 %    o token: a character string containing the returned next token
710 %
711 %    o max_token_length: the maximum size of "token".  Characters beyond
712 %      "max_token_length" are truncated.
713 %
714 %    o string: the string to be parsed.
715 %
716 %    o white: a string of the valid white spaces.  example:
717 %
718 %        char whitesp[]={" \t"};
719 %
720 %      blank and tab will be valid white space.
721 %
722 %    o break: a string of the valid break characters. example:
723 %
724 %        char breakch[]={";,"};
725 %
726 %      semicolon and comma will be valid break characters.
727 %
728 %    o quote: a string of the valid quote characters. An example would be
729 %
730 %        char whitesp[]={"'\"");
731 %
732 %      (this causes single and double quotes to be valid) Note that a
733 %      token starting with one of these characters needs the same quote
734 %      character to terminate it.
735 %
736 %      for example:
737 %
738 %        "ABC '
739 %
740 %      is unterminated, but
741 %
742 %        "DEF" and 'GHI'
743 %
744 %      are properly terminated.  Note that different quote characters
745 %      can appear on the same line; only for a given token do the quote
746 %      characters have to be the same.
747 %
748 %    o escape: the escape character (NOT a string ... only one
749 %      allowed). Use zero if none is desired.
750 %
751 %    o breaker: the break character used to terminate the current
752 %      token.  If the token was quoted, this will be the quote used.  If
753 %      the token is the last one on the line, this will be zero.
754 %
755 %    o next: this variable points to the first character of the
756 %      next token.  it gets reset by "tokenizer" as it steps through the
757 %      string.  Set it to 0 upon initialization, and leave it alone
758 %      after that.  You can change it if you want to jump around in the
759 %      string or re-parse from the beginning, but be careful.
760 %
761 %    o quoted: set to True if the token was quoted and MagickFalse
762 %      if not.  You may need this information (for example:  in C, a
763 %      string with quotes around it is a character string, while one
764 %      without is an identifier).
765 %
766 %    o result: 0 if we haven't reached EOS (end of string), and 1
767 %      if we have.
768 %
769 */
770
771 #define IN_WHITE 0
772 #define IN_TOKEN 1
773 #define IN_QUOTE 2
774 #define IN_OZONE 3
775
776 static ssize_t sindex(int c,const char *string)
777 {
778   register const char
779     *p;
780
781   for (p=string; *p != '\0'; p++)
782     if (c == (int) (*p))
783       return((ssize_t) (p-string));
784   return(-1);
785 }
786
787 static void StoreToken(TokenInfo *token_info,char *string,
788   size_t max_token_length,int c)
789 {
790   register ssize_t
791     i;
792
793   if ((token_info->offset < 0) ||
794       ((size_t) token_info->offset >= (max_token_length-1)))
795     return;
796   i=token_info->offset++;
797   string[i]=(char) c;
798   if (token_info->state == IN_QUOTE)
799     return;
800   switch (token_info->flag & 0x03)
801   {
802     case 1:
803     {
804       string[i]=(char) toupper(c);
805       break;
806     }
807     case 2:
808     {
809       string[i]=(char) tolower(c);
810       break;
811     }
812     default:
813       break;
814   }
815 }
816
817 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
818   char *token,const size_t max_token_length,const char *line,const char *white,
819   const char *break_set,const char *quote,const char escape,char *breaker,
820   int *next,char *quoted)
821 {
822   int
823     c;
824
825   register ssize_t
826     i;
827
828   *breaker='\0';
829   *quoted='\0';
830   if (line[*next] == '\0')
831     return(1);
832   token_info->state=IN_WHITE;
833   token_info->quote=(char) MagickFalse;
834   token_info->flag=flag;
835   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
836   {
837     c=(int) line[*next];
838     i=sindex(c,break_set);
839     if (i >= 0)
840       {
841         switch (token_info->state)
842         {
843           case IN_WHITE:
844           case IN_TOKEN:
845           case IN_OZONE:
846           {
847             (*next)++;
848             *breaker=break_set[i];
849             token[token_info->offset]='\0';
850             return(0);
851           }
852           case IN_QUOTE:
853           {
854             StoreToken(token_info,token,max_token_length,c);
855             break;
856           }
857         }
858         continue;
859       }
860     i=sindex(c,quote);
861     if (i >= 0)
862       {
863         switch (token_info->state)
864         {
865           case IN_WHITE:
866           {
867             token_info->state=IN_QUOTE;
868             token_info->quote=quote[i];
869             *quoted=(char) MagickTrue;
870             break;
871           }
872           case IN_QUOTE:
873           {
874             if (quote[i] != token_info->quote)
875               StoreToken(token_info,token,max_token_length,c);
876             else
877               {
878                 token_info->state=IN_OZONE;
879                 token_info->quote='\0';
880               }
881             break;
882           }
883           case IN_TOKEN:
884           case IN_OZONE:
885           {
886             *breaker=(char) c;
887             token[token_info->offset]='\0';
888             return(0);
889           }
890         }
891         continue;
892       }
893     i=sindex(c,white);
894     if (i >= 0)
895       {
896         switch (token_info->state)
897         {
898           case IN_WHITE:
899           case IN_OZONE:
900             break;
901           case IN_TOKEN:
902           {
903             token_info->state=IN_OZONE;
904             break;
905           }
906           case IN_QUOTE:
907           {
908             StoreToken(token_info,token,max_token_length,c);
909             break;
910           }
911         }
912         continue;
913       }
914     if (c == (int) escape)
915       {
916         if (line[(*next)+1] == '\0')
917           {
918             *breaker='\0';
919             StoreToken(token_info,token,max_token_length,c);
920             (*next)++;
921             token[token_info->offset]='\0';
922             return(0);
923           }
924         switch (token_info->state)
925         {
926           case IN_WHITE:
927           {
928             (*next)--;
929             token_info->state=IN_TOKEN;
930             break;
931           }
932           case IN_TOKEN:
933           case IN_QUOTE:
934           {
935             (*next)++;
936             c=(int) line[*next];
937             StoreToken(token_info,token,max_token_length,c);
938             break;
939           }
940           case IN_OZONE:
941           {
942             token[token_info->offset]='\0';
943             return(0);
944           }
945         }
946         continue;
947       }
948     switch (token_info->state)
949     {
950       case IN_WHITE:
951         token_info->state=IN_TOKEN;
952       case IN_TOKEN:
953       case IN_QUOTE:
954       {
955         StoreToken(token_info,token,max_token_length,c);
956         break;
957       }
958       case IN_OZONE:
959       {
960         token[token_info->offset]='\0';
961         return(0);
962       }
963     }
964   }
965   token[token_info->offset]='\0';
966   return(0);
967 }