]> granicus.if.org Git - imagemagick/blob - magick/token.c
(no commit message)
[imagemagick] / magick / token.c
1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                               John Cristy                                   %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    http://www.imagemagick.org/script/license.php                            %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 \f
40 /*
41   Include declarations.
42 */
43 #include "magick/studio.h"
44 #include "magick/exception.h"
45 #include "magick/exception-private.h"
46 #include "magick/image.h"
47 #include "magick/memory_.h"
48 #include "magick/string_.h"
49 #include "magick/token.h"
50 #include "magick/token-private.h"
51 #include "magick/utility.h"
52 \f
53 /*
54   Typedef declaractions.
55 */
56 struct _TokenInfo
57 {
58   int
59     state;
60
61   MagickStatusType
62     flag;
63
64   ssize_t
65     offset;
66
67   char
68     quote;
69
70   size_t
71     signature;
72 };
73 \f
74 /*
75 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76 %                                                                             %
77 %                                                                             %
78 %                                                                             %
79 %   A c q u i r e T o k e n I n f o                                           %
80 %                                                                             %
81 %                                                                             %
82 %                                                                             %
83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84 %
85 %  AcquireTokenInfo() allocates the TokenInfo structure.
86 %
87 %  The format of the AcquireTokenInfo method is:
88 %
89 %      TokenInfo *AcquireTokenInfo()
90 %
91 */
92 MagickExport TokenInfo *AcquireTokenInfo(void)
93 {
94   TokenInfo
95     *token_info;
96
97   token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
98   if (token_info == (TokenInfo *) NULL)
99     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100   token_info->signature=MagickSignature;
101   return(token_info);
102 }
103 \f
104 /*
105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106 %                                                                             %
107 %                                                                             %
108 %                                                                             %
109 %   D e s t r o y T o k e n I n f o                                           %
110 %                                                                             %
111 %                                                                             %
112 %                                                                             %
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114 %
115 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
116 %  structure.
117 %
118 %  The format of the DestroyTokenInfo method is:
119 %
120 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121 %
122 %  A description of each parameter follows:
123 %
124 %    o token_info: Specifies a pointer to an TokenInfo structure.
125 %
126 */
127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128 {
129   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130   assert(token_info != (TokenInfo *) NULL);
131   assert(token_info->signature == MagickSignature);
132   token_info->signature=(~MagickSignature);
133   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134   return(token_info);
135 }
136 \f
137 /*
138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139 %                                                                             %
140 %                                                                             %
141 %                                                                             %
142 +   G e t M a g i c k T o k e n                                               %
143 %                                                                             %
144 %                                                                             %
145 %                                                                             %
146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147 %
148 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
149 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
151 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
152 %  separator characters: ':', '=', ',', and ';'.
153 %
154 %  The format of the GetMagickToken method is:
155 %
156 %      void GetMagickToken(const char *start,const char **end,char *token)
157 %
158 %  A description of each parameter follows:
159 %
160 %    o start: the start of the token sequence.
161 %
162 %    o end: point to the end of the token sequence.
163 %
164 %    o token: copy the token to this buffer.
165 %
166 */
167 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
168 {
169   double
170     value;
171
172   register const char
173     *p;
174
175   register ssize_t
176     i;
177
178   i=0;
179   for (p=start; *p != '\0'; )
180   {
181     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
182       p++;
183     if (*p == '\0')
184       break;
185     switch (*p)
186     {
187       case '"':
188       case '\'':
189       case '`':
190       case '{':
191       {
192         register char
193           escape;
194
195         switch (*p)
196         {
197           case '"': escape='"'; break;
198           case '\'': escape='\''; break;
199           case '`': escape='\''; break;
200           case '{': escape='}'; break;
201           default: escape=(*p); break;
202         }
203         for (p++; *p != '\0'; p++)
204         {
205           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
206             p++;
207           else
208             if (*p == escape)
209               {
210                 p++;
211                 break;
212               }
213           token[i++]=(*p);
214         }
215         break;
216       }
217       case '/':
218       {
219         token[i++]=(*p++);
220         if ((*p == '>') || (*p == '/'))
221           token[i++]=(*p++);
222         break;
223       }
224       default:
225       {
226         char
227           *q;
228
229         value=strtod(p,&q);
230         if ((p != q) && (*p != ','))
231           {
232             for ( ; (p < q) && (*p != ','); p++)
233               token[i++]=(*p);
234             if (*p == '%')
235               token[i++]=(*p++);
236             break;
237           }
238         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
239             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
240           {
241             token[i++]=(*p++);
242             break;
243           }
244         for ( ; *p != '\0'; p++)
245         {
246           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
247               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
248             break;
249           if ((i > 0) && (*p == '<'))
250             break;
251           token[i++]=(*p);
252           if (*p == '>')
253             break;
254           if (*p == '(')
255             for (p++; *p != '\0'; p++)
256             {
257               token[i++]=(*p);
258               if ((*p == ')') && (*(p-1) != '\\'))
259                 break;
260             }
261         }
262         break;
263       }
264     }
265     break;
266   }
267   token[i]='\0';
268   if (LocaleNCompare(token,"url(",4) == 0)
269     {
270       ssize_t
271         offset;
272
273       offset=4;
274       if (token[offset] == '#')
275         offset++;
276       i=(ssize_t) strlen(token);
277       (void) CopyMagickString(token,token+offset,MaxTextExtent);
278       token[i-offset-1]='\0';
279     }
280   while (isspace((int) ((unsigned char) *p)) != 0)
281     p++;
282   if (end != (const char **) NULL)
283     *end=(const char *) p;
284 }
285 \f
286 /*
287 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
288 %                                                                             %
289 %                                                                             %
290 %                                                                             %
291 %   G l o b E x p r e s s i o n                                               %
292 %                                                                             %
293 %                                                                             %
294 %                                                                             %
295 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
296 %
297 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
298 %
299 %  The format of the GlobExpression function is:
300 %
301 %      MagickBooleanType GlobExpression(const char *expression,
302 %        const char *pattern,const MagickBooleanType case_insensitive)
303 %
304 %  A description of each parameter follows:
305 %
306 %    o expression: Specifies a pointer to a text string containing a file name.
307 %
308 %    o pattern: Specifies a pointer to a text string containing a pattern.
309 %
310 %    o case_insensitive: set to MagickTrue to ignore the case when matching
311 %      an expression.
312 %
313 */
314 MagickExport MagickBooleanType GlobExpression(const char *expression,
315   const char *pattern,const MagickBooleanType case_insensitive)
316 {
317   MagickBooleanType
318     done,
319     match;
320
321   register const char
322     *p;
323
324   /*
325     Return on empty pattern or '*'.
326   */
327   if (pattern == (char *) NULL)
328     return(MagickTrue);
329   if (GetUTFCode(pattern) == 0)
330     return(MagickTrue);
331   if (LocaleCompare(pattern,"*") == 0)
332     return(MagickTrue);
333   p=pattern+strlen(pattern)-1;
334   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
335     {
336       ExceptionInfo
337         *exception;
338
339       ImageInfo
340         *image_info;
341
342       /*
343         Determine if pattern is a scene, i.e. img0001.pcd[2].
344       */
345       image_info=AcquireImageInfo();
346       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
347       exception=AcquireExceptionInfo();
348       (void) SetImageInfo(image_info,0,exception);
349       exception=DestroyExceptionInfo(exception);
350       if (LocaleCompare(image_info->filename,pattern) != 0)
351         {
352           image_info=DestroyImageInfo(image_info);
353           return(MagickFalse);
354         }
355       image_info=DestroyImageInfo(image_info);
356     }
357   /*
358     Evaluate glob expression.
359   */
360   done=MagickFalse;
361   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
362   {
363     if (GetUTFCode(expression) == 0)
364       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
365         break;
366     switch (GetUTFCode(pattern))
367     {
368       case '*':
369       {
370         MagickBooleanType
371           status;
372
373         status=MagickFalse;
374         pattern+=GetUTFOctets(pattern);
375         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
376         {
377           status=GlobExpression(expression,pattern,case_insensitive);
378           expression+=GetUTFOctets(expression);
379         }
380         if (status != MagickFalse)
381           {
382             while (GetUTFCode(expression) != 0)
383               expression+=GetUTFOctets(expression);
384             while (GetUTFCode(pattern) != 0)
385               pattern+=GetUTFOctets(pattern);
386           }
387         break;
388       }
389       case '[':
390       {
391         ssize_t
392           c;
393
394         pattern+=GetUTFOctets(pattern);
395         for ( ; ; )
396         {
397           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
398             {
399               done=MagickTrue;
400               break;
401             }
402           if (GetUTFCode(pattern) == '\\')
403             {
404               pattern+=GetUTFOctets(pattern);
405               if (GetUTFCode(pattern) == 0)
406                 {
407                   done=MagickTrue;
408                   break;
409                 }
410              }
411           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
412             {
413               c=GetUTFCode(pattern);
414               pattern+=GetUTFOctets(pattern);
415               pattern+=GetUTFOctets(pattern);
416               if (GetUTFCode(pattern) == ']')
417                 {
418                   done=MagickTrue;
419                   break;
420                 }
421               if (GetUTFCode(pattern) == '\\')
422                 {
423                   pattern+=GetUTFOctets(pattern);
424                   if (GetUTFCode(pattern) == 0)
425                     {
426                       done=MagickTrue;
427                       break;
428                     }
429                 }
430               if ((GetUTFCode(expression) < c) ||
431                   (GetUTFCode(expression) > GetUTFCode(pattern)))
432                 {
433                   pattern+=GetUTFOctets(pattern);
434                   continue;
435                 }
436             }
437           else
438             if (GetUTFCode(pattern) != GetUTFCode(expression))
439               {
440                 pattern+=GetUTFOctets(pattern);
441                 continue;
442               }
443           pattern+=GetUTFOctets(pattern);
444           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
445           {
446             if ((GetUTFCode(pattern) == '\\') &&
447                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
448               pattern+=GetUTFOctets(pattern);
449             pattern+=GetUTFOctets(pattern);
450           }
451           if (GetUTFCode(pattern) != 0)
452             {
453               pattern+=GetUTFOctets(pattern);
454               expression+=GetUTFOctets(expression);
455             }
456           break;
457         }
458         break;
459       }
460       case '?':
461       {
462         pattern+=GetUTFOctets(pattern);
463         expression+=GetUTFOctets(expression);
464         break;
465       }
466       case '{':
467       {
468         register const char
469           *p;
470
471         pattern+=GetUTFOctets(pattern);
472         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
473         {
474           p=expression;
475           match=MagickTrue;
476           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
477                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
478                  (match != MagickFalse))
479           {
480             if (GetUTFCode(pattern) == '\\')
481               pattern+=GetUTFOctets(pattern);
482             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
483               MagickFalse;
484             p+=GetUTFOctets(p);
485             pattern+=GetUTFOctets(pattern);
486           }
487           if (GetUTFCode(pattern) == 0)
488             {
489               match=MagickFalse;
490               done=MagickTrue;
491               break;
492             }
493           else
494             if (match != MagickFalse)
495               {
496                 expression=p;
497                 while ((GetUTFCode(pattern) != '}') &&
498                        (GetUTFCode(pattern) != 0))
499                 {
500                   pattern+=GetUTFOctets(pattern);
501                   if (GetUTFCode(pattern) == '\\')
502                     {
503                       pattern+=GetUTFOctets(pattern);
504                       if (GetUTFCode(pattern) == '}')
505                         pattern+=GetUTFOctets(pattern);
506                     }
507                 }
508               }
509             else
510               {
511                 while ((GetUTFCode(pattern) != '}') &&
512                        (GetUTFCode(pattern) != ',') &&
513                        (GetUTFCode(pattern) != 0))
514                 {
515                   pattern+=GetUTFOctets(pattern);
516                   if (GetUTFCode(pattern) == '\\')
517                     {
518                       pattern+=GetUTFOctets(pattern);
519                       if ((GetUTFCode(pattern) == '}') ||
520                           (GetUTFCode(pattern) == ','))
521                         pattern+=GetUTFOctets(pattern);
522                     }
523                 }
524               }
525             if (GetUTFCode(pattern) != 0)
526               pattern+=GetUTFOctets(pattern);
527           }
528         break;
529       }
530       case '\\':
531       {
532         pattern+=GetUTFOctets(pattern);
533         if (GetUTFCode(pattern) == 0)
534           break;
535       }
536       default:
537       {
538         if (case_insensitive != MagickFalse)
539           {
540             if (tolower((int) GetUTFCode(expression)) !=
541                 tolower((int) GetUTFCode(pattern)))
542               {
543                 done=MagickTrue;
544                 break;
545               }
546           }
547         else
548           if (GetUTFCode(expression) != GetUTFCode(pattern))
549             {
550               done=MagickTrue;
551               break;
552             }
553         expression+=GetUTFOctets(expression);
554         pattern+=GetUTFOctets(pattern);
555       }
556     }
557   }
558   while (GetUTFCode(pattern) == '*')
559     pattern+=GetUTFOctets(pattern);
560   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
561     MagickTrue : MagickFalse;
562   return(match);
563 }
564 \f
565 /*
566 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
567 %                                                                             %
568 %                                                                             %
569 %                                                                             %
570 +     I s G l o b                                                             %
571 %                                                                             %
572 %                                                                             %
573 %                                                                             %
574 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
575 %
576 %  IsGlob() returns MagickTrue if the path specification contains a globbing
577 %  pattern.
578 %
579 %  The format of the IsGlob method is:
580 %
581 %      MagickBooleanType IsGlob(const char *geometry)
582 %
583 %  A description of each parameter follows:
584 %
585 %    o path: the path.
586 %
587 */
588 MagickExport MagickBooleanType IsGlob(const char *path)
589 {
590   MagickBooleanType
591     status;
592
593   if (IsPathAccessible(path) != MagickFalse)
594     return(MagickFalse);
595   status=(strchr(path,'*') != (char *) NULL) ||
596     (strchr(path,'?') != (char *) NULL) ||
597     (strchr(path,'{') != (char *) NULL) ||
598     (strchr(path,'}') != (char *) NULL) ||
599     (strchr(path,'[') != (char *) NULL) ||
600     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
601   return(status);
602 }
603 \f
604 /*
605 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
606 %                                                                             %
607 %                                                                             %
608 %                                                                             %
609 %   T o k e n i z e r                                                         %
610 %                                                                             %
611 %                                                                             %
612 %                                                                             %
613 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
614 %
615 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
616 %  one at a time from a string of characters.  The characters used for white
617 %  space, for break characters, and for quotes can be specified.  Also,
618 %  characters in the string can be preceded by a specifiable escape character
619 %  which removes any special meaning the character may have.
620 %
621 %  Here is some terminology:
622 %
623 %    o token: A single unit of information in the form of a group of
624 %      characters.
625 %
626 %    o white space: Apace that gets ignored (except within quotes or when
627 %      escaped), like blanks and tabs. in addition, white space terminates a
628 %      non-quoted token.
629 %
630 %    o break set: One or more characters that separates non-quoted tokens.
631 %      Commas are a common break character. The usage of break characters to
632 %      signal the end of a token is the same as that of white space, except
633 %      multiple break characters with nothing or only white space between
634 %      generate a null token for each two break characters together.
635 %
636 %      For example, if blank is set to be the white space and comma is set to
637 %      be the break character, the line
638 %
639 %        A, B, C ,  , DEF
640 %
641 %        ... consists of 5 tokens:
642 %
643 %        1)  "A"
644 %        2)  "B"
645 %        3)  "C"
646 %        4)  "" (the null string)
647 %        5)  "DEF"
648 %
649 %    o Quote character: A character that, when surrounding a group of other
650 %      characters, causes the group of characters to be treated as a single
651 %      token, no matter how many white spaces or break characters exist in
652 %      the group. Also, a token always terminates after the closing quote.
653 %      For example, if ' is the quote character, blank is white space, and
654 %      comma is the break character, the following string
655 %
656 %        A, ' B, CD'EF GHI
657 %
658 %        ... consists of 4 tokens:
659 %
660 %        1)  "A"
661 %        2)  " B, CD" (note the blanks & comma)
662 %        3)  "EF"
663 %        4)  "GHI"
664 %
665 %      The quote characters themselves do not appear in the resultant
666 %      tokens.  The double quotes are delimiters i use here for
667 %      documentation purposes only.
668 %
669 %    o Escape character: A character which itself is ignored but which
670 %      causes the next character to be used as is.  ^ and \ are often used
671 %      as escape characters. An escape in the last position of the string
672 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
673 %      and non-escape) character. For example, assume white space, break
674 %      character, and quote are the same as in the above examples, and
675 %      further, assume that ^ is the escape character. Then, in the string
676 %
677 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
678 %
679 %        ... there are 7 tokens:
680 %
681 %        1)  "ABC"
682 %        2)  " DEF ' GH"
683 %        3)  "I"
684 %        4)  " "     (a lone blank)
685 %        5)  "J"
686 %        6)  "K L"
687 %        7)  "^"     (passed as is at end of line)
688 %
689 %  The format of the Tokenizer method is:
690 %
691 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
692 %        const size_t max_token_length,const char *line,const char *white,
693 %        const char *break_set,const char *quote,const char escape,
694 %        char *breaker,int *next,char *quoted)
695 %
696 %  A description of each parameter follows:
697 %
698 %    o flag: right now, only the low order 3 bits are used.
699 %
700 %        1 => convert non-quoted tokens to upper case
701 %        2 => convert non-quoted tokens to lower case
702 %        0 => do not convert non-quoted tokens
703 %
704 %    o token: a character string containing the returned next token
705 %
706 %    o max_token_length: the maximum size of "token".  Characters beyond
707 %      "max_token_length" are truncated.
708 %
709 %    o string: the string to be parsed.
710 %
711 %    o white: a string of the valid white spaces.  example:
712 %
713 %        char whitesp[]={" \t"};
714 %
715 %      blank and tab will be valid white space.
716 %
717 %    o break: a string of the valid break characters. example:
718 %
719 %        char breakch[]={";,"};
720 %
721 %      semicolon and comma will be valid break characters.
722 %
723 %    o quote: a string of the valid quote characters. An example would be
724 %
725 %        char whitesp[]={"'\"");
726 %
727 %      (this causes single and double quotes to be valid) Note that a
728 %      token starting with one of these characters needs the same quote
729 %      character to terminate it.
730 %
731 %      for example:
732 %
733 %        "ABC '
734 %
735 %      is unterminated, but
736 %
737 %        "DEF" and 'GHI'
738 %
739 %      are properly terminated.  Note that different quote characters
740 %      can appear on the same line; only for a given token do the quote
741 %      characters have to be the same.
742 %
743 %    o escape: the escape character (NOT a string ... only one
744 %      allowed). Use zero if none is desired.
745 %
746 %    o breaker: the break character used to terminate the current
747 %      token.  If the token was quoted, this will be the quote used.  If
748 %      the token is the last one on the line, this will be zero.
749 %
750 %    o next: this variable points to the first character of the
751 %      next token.  it gets reset by "tokenizer" as it steps through the
752 %      string.  Set it to 0 upon initialization, and leave it alone
753 %      after that.  You can change it if you want to jump around in the
754 %      string or re-parse from the beginning, but be careful.
755 %
756 %    o quoted: set to True if the token was quoted and MagickFalse
757 %      if not.  You may need this information (for example:  in C, a
758 %      string with quotes around it is a character string, while one
759 %      without is an identifier).
760 %
761 %    o result: 0 if we haven't reached EOS (end of string), and 1
762 %      if we have.
763 %
764 */
765
766 #define IN_WHITE 0
767 #define IN_TOKEN 1
768 #define IN_QUOTE 2
769 #define IN_OZONE 3
770
771 static ssize_t sindex(int c,const char *string)
772 {
773   register const char
774     *p;
775
776   for (p=string; *p != '\0'; p++)
777     if (c == (int) (*p))
778       return((ssize_t) (p-string));
779   return(-1);
780 }
781
782 static void StoreToken(TokenInfo *token_info,char *string,
783   size_t max_token_length,int c)
784 {
785   register ssize_t
786     i;
787
788   if ((token_info->offset < 0) ||
789       ((size_t) token_info->offset >= (max_token_length-1)))
790     return;
791   i=token_info->offset++;
792   string[i]=(char) c;
793   if (token_info->state == IN_QUOTE)
794     return;
795   switch (token_info->flag & 0x03)
796   {
797     case 1:
798     {
799       string[i]=(char) toupper(c);
800       break;
801     }
802     case 2:
803     {
804       string[i]=(char) tolower(c);
805       break;
806     }
807     default:
808       break;
809   }
810 }
811
812 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
813   char *token,const size_t max_token_length,const char *line,const char *white,
814   const char *break_set,const char *quote,const char escape,char *breaker,
815   int *next,char *quoted)
816 {
817   int
818     c;
819
820   register ssize_t
821     i;
822
823   *breaker='\0';
824   *quoted='\0';
825   if (line[*next] == '\0')
826     return(1);
827   token_info->state=IN_WHITE;
828   token_info->quote=(char) MagickFalse;
829   token_info->flag=flag;
830   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
831   {
832     c=(int) line[*next];
833     i=sindex(c,break_set);
834     if (i >= 0)
835       {
836         switch (token_info->state)
837         {
838           case IN_WHITE:
839           case IN_TOKEN:
840           case IN_OZONE:
841           {
842             (*next)++;
843             *breaker=break_set[i];
844             token[token_info->offset]='\0';
845             return(0);
846           }
847           case IN_QUOTE:
848           {
849             StoreToken(token_info,token,max_token_length,c);
850             break;
851           }
852         }
853         continue;
854       }
855     i=sindex(c,quote);
856     if (i >= 0)
857       {
858         switch (token_info->state)
859         {
860           case IN_WHITE:
861           {
862             token_info->state=IN_QUOTE;
863             token_info->quote=quote[i];
864             *quoted=(char) MagickTrue;
865             break;
866           }
867           case IN_QUOTE:
868           {
869             if (quote[i] != token_info->quote)
870               StoreToken(token_info,token,max_token_length,c);
871             else
872               {
873                 token_info->state=IN_OZONE;
874                 token_info->quote='\0';
875               }
876             break;
877           }
878           case IN_TOKEN:
879           case IN_OZONE:
880           {
881             *breaker=(char) c;
882             token[token_info->offset]='\0';
883             return(0);
884           }
885         }
886         continue;
887       }
888     i=sindex(c,white);
889     if (i >= 0)
890       {
891         switch (token_info->state)
892         {
893           case IN_WHITE:
894           case IN_OZONE:
895             break;
896           case IN_TOKEN:
897           {
898             token_info->state=IN_OZONE;
899             break;
900           }
901           case IN_QUOTE:
902           {
903             StoreToken(token_info,token,max_token_length,c);
904             break;
905           }
906         }
907         continue;
908       }
909     if (c == (int) escape)
910       {
911         if (line[(*next)+1] == '\0')
912           {
913             *breaker='\0';
914             StoreToken(token_info,token,max_token_length,c);
915             (*next)++;
916             token[token_info->offset]='\0';
917             return(0);
918           }
919         switch (token_info->state)
920         {
921           case IN_WHITE:
922           {
923             (*next)--;
924             token_info->state=IN_TOKEN;
925             break;
926           }
927           case IN_TOKEN:
928           case IN_QUOTE:
929           {
930             (*next)++;
931             c=(int) line[*next];
932             StoreToken(token_info,token,max_token_length,c);
933             break;
934           }
935           case IN_OZONE:
936           {
937             token[token_info->offset]='\0';
938             return(0);
939           }
940         }
941         continue;
942       }
943     switch (token_info->state)
944     {
945       case IN_WHITE:
946         token_info->state=IN_TOKEN;
947       case IN_TOKEN:
948       case IN_QUOTE:
949       {
950         StoreToken(token_info,token,max_token_length,c);
951         break;
952       }
953       case IN_OZONE:
954       {
955         token[token_info->offset]='\0';
956         return(0);
957       }
958     }
959   }
960   token[token_info->offset]='\0';
961   return(0);
962 }