2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6 % TTTTT OOO K K EEEEE N N %
8 % T O O KKK EEE N N N %
10 % T OOO K K EEEEE N N %
13 % MagickCore Token Methods %
20 % Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
26 % http://www.imagemagick.org/script/license.php %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
43 #include "magick/studio.h"
44 #include "magick/exception.h"
45 #include "magick/exception-private.h"
46 #include "magick/image.h"
47 #include "magick/memory_.h"
48 #include "magick/string_.h"
49 #include "magick/token.h"
50 #include "magick/token-private.h"
51 #include "magick/utility.h"
54 Typedef declaractions.
75 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79 % A c q u i r e T o k e n I n f o %
83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
85 % AcquireTokenInfo() allocates the TokenInfo structure.
87 % The format of the AcquireTokenInfo method is:
89 % TokenInfo *AcquireTokenInfo()
92 MagickExport TokenInfo *AcquireTokenInfo(void)
97 token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
98 if (token_info == (TokenInfo *) NULL)
99 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100 token_info->signature=MagickSignature;
105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 % D e s t r o y T o k e n I n f o %
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115 % DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 % The format of the DestroyTokenInfo method is:
120 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
122 % A description of each parameter follows:
124 % o token_info: Specifies a pointer to an TokenInfo structure.
127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
129 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130 assert(token_info != (TokenInfo *) NULL);
131 assert(token_info->signature == MagickSignature);
132 token_info->signature=(~MagickSignature);
133 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
142 + G e t M a g i c k T o k e n %
146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148 % GetMagickToken() gets a token from the token stream. A token is defined as a
149 % sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150 % delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
151 % parenthesis (e.g. rgb(0,0,0)).
153 % The format of the GetMagickToken method is:
155 % void GetMagickToken(const char *start,const char **end,char *token)
157 % A description of each parameter follows:
159 % o start: the start of the token sequence.
161 % o end: point to the end of the token sequence.
163 % o token: copy the token to this buffer.
166 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
178 for (p=start; *p != '\0'; )
180 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
196 case '"': escape='"'; break;
197 case '\'': escape='\''; break;
198 case '`': escape='\''; break;
199 case '{': escape='}'; break;
200 default: escape=(*p); break;
202 for (p++; *p != '\0'; p++)
204 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
219 if ((*p == '>') || (*p == '/'))
229 if ((p != q) && (*p != ','))
231 for ( ; (p < q) && (*p != ','); p++)
237 if ((isalpha((int) ((unsigned char) *p)) == 0) &&
238 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
243 for ( ; *p != '\0'; p++)
245 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
246 (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
248 if ((i > 0) && (*p == '<'))
254 for (p++; *p != '\0'; p++)
257 if ((*p == ')') && (*(p-1) != '\\'))
267 if (LocaleNCompare(token,"url(",4) == 0)
273 if (token[offset] == '#')
275 i=(long) strlen(token);
276 (void) CopyMagickString(token,token+offset,MaxTextExtent);
277 token[i-offset-1]='\0';
279 while (isspace((int) ((unsigned char) *p)) != 0)
281 if (end != (const char **) NULL)
282 *end=(const char *) p;
286 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
290 % G l o b E x p r e s s i o n %
294 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
296 % GlobExpression() returns MagickTrue if the expression matches the pattern.
298 % The format of the GlobExpression function is:
300 % MagickBooleanType GlobExpression(const char *expression,
301 % const char *pattern,const MagickBooleanType case_insensitive)
303 % A description of each parameter follows:
305 % o expression: Specifies a pointer to a text string containing a file name.
307 % o pattern: Specifies a pointer to a text string containing a pattern.
309 % o case_insensitive: set to MagickTrue to ignore the case when matching
313 MagickExport MagickBooleanType GlobExpression(const char *expression,
314 const char *pattern,const MagickBooleanType case_insensitive)
324 Return on empty pattern or '*'.
326 if (pattern == (char *) NULL)
328 if (GetUTFCode(pattern) == 0)
330 if (LocaleCompare(pattern,"*") == 0)
332 p=pattern+strlen(pattern)-1;
333 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
342 Determine if pattern is a scene, i.e. img0001.pcd[2].
344 image_info=AcquireImageInfo();
345 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
346 exception=AcquireExceptionInfo();
347 (void) SetImageInfo(image_info,0,exception);
348 exception=DestroyExceptionInfo(exception);
349 if (LocaleCompare(image_info->filename,pattern) != 0)
351 image_info=DestroyImageInfo(image_info);
354 image_info=DestroyImageInfo(image_info);
357 Evaluate glob expression.
360 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
362 if (GetUTFCode(expression) == 0)
363 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
365 switch (GetUTFCode(pattern))
373 pattern+=GetUTFOctets(pattern);
374 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
376 status=GlobExpression(expression,pattern,case_insensitive);
377 expression+=GetUTFOctets(expression);
379 if (status != MagickFalse)
381 while (GetUTFCode(expression) != 0)
382 expression+=GetUTFOctets(expression);
383 while (GetUTFCode(pattern) != 0)
384 pattern+=GetUTFOctets(pattern);
393 pattern+=GetUTFOctets(pattern);
396 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
401 if (GetUTFCode(pattern) == '\\')
403 pattern+=GetUTFOctets(pattern);
404 if (GetUTFCode(pattern) == 0)
410 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
412 c=GetUTFCode(pattern);
413 pattern+=GetUTFOctets(pattern);
414 pattern+=GetUTFOctets(pattern);
415 if (GetUTFCode(pattern) == ']')
420 if (GetUTFCode(pattern) == '\\')
422 pattern+=GetUTFOctets(pattern);
423 if (GetUTFCode(pattern) == 0)
429 if ((GetUTFCode(expression) < c) ||
430 (GetUTFCode(expression) > GetUTFCode(pattern)))
432 pattern+=GetUTFOctets(pattern);
437 if (GetUTFCode(pattern) != GetUTFCode(expression))
439 pattern+=GetUTFOctets(pattern);
442 pattern+=GetUTFOctets(pattern);
443 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
445 if ((GetUTFCode(pattern) == '\\') &&
446 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
447 pattern+=GetUTFOctets(pattern);
448 pattern+=GetUTFOctets(pattern);
450 if (GetUTFCode(pattern) != 0)
452 pattern+=GetUTFOctets(pattern);
453 expression+=GetUTFOctets(expression);
461 pattern+=GetUTFOctets(pattern);
462 expression+=GetUTFOctets(expression);
470 pattern+=GetUTFOctets(pattern);
471 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
475 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
476 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
477 (match != MagickFalse))
479 if (GetUTFCode(pattern) == '\\')
480 pattern+=GetUTFOctets(pattern);
481 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
484 pattern+=GetUTFOctets(pattern);
486 if (GetUTFCode(pattern) == 0)
493 if (match != MagickFalse)
496 while ((GetUTFCode(pattern) != '}') &&
497 (GetUTFCode(pattern) != 0))
499 pattern+=GetUTFOctets(pattern);
500 if (GetUTFCode(pattern) == '\\')
502 pattern+=GetUTFOctets(pattern);
503 if (GetUTFCode(pattern) == '}')
504 pattern+=GetUTFOctets(pattern);
510 while ((GetUTFCode(pattern) != '}') &&
511 (GetUTFCode(pattern) != ',') &&
512 (GetUTFCode(pattern) != 0))
514 pattern+=GetUTFOctets(pattern);
515 if (GetUTFCode(pattern) == '\\')
517 pattern+=GetUTFOctets(pattern);
518 if ((GetUTFCode(pattern) == '}') ||
519 (GetUTFCode(pattern) == ','))
520 pattern+=GetUTFOctets(pattern);
524 if (GetUTFCode(pattern) != 0)
525 pattern+=GetUTFOctets(pattern);
531 pattern+=GetUTFOctets(pattern);
532 if (GetUTFCode(pattern) != 0)
533 pattern+=GetUTFOctets(pattern);
537 if (case_insensitive != MagickFalse)
539 if (tolower((int) GetUTFCode(expression)) !=
540 tolower((int) GetUTFCode(pattern)))
547 if (GetUTFCode(expression) != GetUTFCode(pattern))
552 expression+=GetUTFOctets(expression);
553 pattern+=GetUTFOctets(pattern);
557 while (GetUTFCode(pattern) == '*')
558 pattern+=GetUTFOctets(pattern);
559 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
560 MagickTrue : MagickFalse;
565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
573 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
575 % IsGlob() returns MagickTrue if the path specification contains a globbing
578 % The format of the IsGlob method is:
580 % MagickBooleanType IsGlob(const char *geometry)
582 % A description of each parameter follows:
587 MagickExport MagickBooleanType IsGlob(const char *path)
592 if (IsPathAccessible(path) != MagickFalse)
594 status=(strchr(path,'*') != (char *) NULL) ||
595 (strchr(path,'?') != (char *) NULL) ||
596 (strchr(path,'{') != (char *) NULL) ||
597 (strchr(path,'}') != (char *) NULL) ||
598 (strchr(path,'[') != (char *) NULL) ||
599 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
604 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
608 % T o k e n i z e r %
612 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
614 % Tokenizer() is a generalized, finite state token parser. It extracts tokens
615 % one at a time from a string of characters. The characters used for white
616 % space, for break characters, and for quotes can be specified. Also,
617 % characters in the string can be preceded by a specifiable escape character
618 % which removes any special meaning the character may have.
620 % Here is some terminology:
622 % o token: A single unit of information in the form of a group of
625 % o white space: Apace that gets ignored (except within quotes or when
626 % escaped), like blanks and tabs. in addition, white space terminates a
629 % o break set: One or more characters that separates non-quoted tokens.
630 % Commas are a common break character. The usage of break characters to
631 % signal the end of a token is the same as that of white space, except
632 % multiple break characters with nothing or only white space between
633 % generate a null token for each two break characters together.
635 % For example, if blank is set to be the white space and comma is set to
636 % be the break character, the line
640 % ... consists of 5 tokens:
645 % 4) "" (the null string)
648 % o Quote character: A character that, when surrounding a group of other
649 % characters, causes the group of characters to be treated as a single
650 % token, no matter how many white spaces or break characters exist in
651 % the group. Also, a token always terminates after the closing quote.
652 % For example, if ' is the quote character, blank is white space, and
653 % comma is the break character, the following string
657 % ... consists of 4 tokens:
660 % 2) " B, CD" (note the blanks & comma)
664 % The quote characters themselves do not appear in the resultant
665 % tokens. The double quotes are delimiters i use here for
666 % documentation purposes only.
668 % o Escape character: A character which itself is ignored but which
669 % causes the next character to be used as is. ^ and \ are often used
670 % as escape characters. An escape in the last position of the string
671 % gets treated as a "normal" (i.e., non-quote, non-white, non-break,
672 % and non-escape) character. For example, assume white space, break
673 % character, and quote are the same as in the above examples, and
674 % further, assume that ^ is the escape character. Then, in the string
676 % ABC, ' DEF ^' GH' I ^ J K^ L ^
678 % ... there are 7 tokens:
683 % 4) " " (a lone blank)
686 % 7) "^" (passed as is at end of line)
688 % The format of the Tokenizer method is:
690 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
691 % const size_t max_token_length,const char *line,const char *white,
692 % const char *break_set,const char *quote,const char escape,
693 % char *breaker,int *next,char *quoted)
695 % A description of each parameter follows:
697 % o flag: right now, only the low order 3 bits are used.
699 % 1 => convert non-quoted tokens to upper case
700 % 2 => convert non-quoted tokens to lower case
701 % 0 => do not convert non-quoted tokens
703 % o token: a character string containing the returned next token
705 % o max_token_length: the maximum size of "token". Characters beyond
706 % "max_token_length" are truncated.
708 % o string: the string to be parsed.
710 % o white: a string of the valid white spaces. example:
712 % char whitesp[]={" \t"};
714 % blank and tab will be valid white space.
716 % o break: a string of the valid break characters. example:
718 % char breakch[]={";,"};
720 % semicolon and comma will be valid break characters.
722 % o quote: a string of the valid quote characters. An example would be
724 % char whitesp[]={"'\"");
726 % (this causes single and double quotes to be valid) Note that a
727 % token starting with one of these characters needs the same quote
728 % character to terminate it.
734 % is unterminated, but
738 % are properly terminated. Note that different quote characters
739 % can appear on the same line; only for a given token do the quote
740 % characters have to be the same.
742 % o escape: the escape character (NOT a string ... only one
743 % allowed). Use zero if none is desired.
745 % o breaker: the break character used to terminate the current
746 % token. If the token was quoted, this will be the quote used. If
747 % the token is the last one on the line, this will be zero.
749 % o next: this variable points to the first character of the
750 % next token. it gets reset by "tokenizer" as it steps through the
751 % string. Set it to 0 upon initialization, and leave it alone
752 % after that. You can change it if you want to jump around in the
753 % string or re-parse from the beginning, but be careful.
755 % o quoted: set to True if the token was quoted and MagickFalse
756 % if not. You may need this information (for example: in C, a
757 % string with quotes around it is a character string, while one
758 % without is an identifier).
760 % o result: 0 if we haven't reached EOS (end of string), and 1
770 static long sindex(int c,const char *string)
775 for (p=string; *p != '\0'; p++)
781 static void StoreToken(TokenInfo *token_info,char *string,
782 size_t max_token_length,int c)
787 if ((token_info->offset < 0) ||
788 ((size_t) token_info->offset >= (max_token_length-1)))
790 i=token_info->offset++;
792 if (token_info->state == IN_QUOTE)
794 switch (token_info->flag & 0x03)
798 string[i]=(char) toupper(c);
803 string[i]=(char) tolower(c);
811 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
812 char *token,const size_t max_token_length,const char *line,const char *white,
813 const char *break_set,const char *quote,const char escape,char *breaker,
814 int *next,char *quoted)
824 if (line[*next] == '\0')
826 token_info->state=IN_WHITE;
827 token_info->quote=(char) MagickFalse;
828 token_info->flag=flag;
829 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
832 i=sindex(c,break_set);
835 switch (token_info->state)
842 *breaker=break_set[i];
843 token[token_info->offset]='\0';
848 StoreToken(token_info,token,max_token_length,c);
857 switch (token_info->state)
861 token_info->state=IN_QUOTE;
862 token_info->quote=quote[i];
863 *quoted=(char) MagickTrue;
868 if (quote[i] != token_info->quote)
869 StoreToken(token_info,token,max_token_length,c);
872 token_info->state=IN_OZONE;
873 token_info->quote='\0';
881 token[token_info->offset]='\0';
890 switch (token_info->state)
897 token_info->state=IN_OZONE;
902 StoreToken(token_info,token,max_token_length,c);
908 if (c == (int) escape)
910 if (line[(*next)+1] == '\0')
913 StoreToken(token_info,token,max_token_length,c);
915 token[token_info->offset]='\0';
918 switch (token_info->state)
923 token_info->state=IN_TOKEN;
931 StoreToken(token_info,token,max_token_length,c);
936 token[token_info->offset]='\0';
942 switch (token_info->state)
945 token_info->state=IN_TOKEN;
949 StoreToken(token_info,token,max_token_length,c);
954 token[token_info->offset]='\0';
959 token[token_info->offset]='\0';