2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6 % TTTTT OOO K K EEEEE N N %
8 % T O O KKK EEE N N N %
10 % T OOO K K EEEEE N N %
13 % MagickCore Token Methods %
20 % Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
21 % dedicated to making software imaging solutions freely available. %
23 % You may not use this file except in compliance with the License. You may %
24 % obtain a copy of the License at %
26 % http://www.imagemagick.org/script/license.php %
28 % Unless required by applicable law or agreed to in writing, software %
29 % distributed under the License is distributed on an "AS IS" BASIS, %
30 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31 % See the License for the specific language governing permissions and %
32 % limitations under the License. %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/memory_.h"
48 #include "MagickCore/string_.h"
49 #include "MagickCore/string-private.h"
50 #include "MagickCore/token.h"
51 #include "MagickCore/token-private.h"
52 #include "MagickCore/utility.h"
53 #include "MagickCore/utility-private.h"
56 Typedef declaractions.
77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
81 % A c q u i r e T o k e n I n f o %
85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87 % AcquireTokenInfo() allocates the TokenInfo structure.
89 % The format of the AcquireTokenInfo method is:
91 % TokenInfo *AcquireTokenInfo()
94 MagickExport TokenInfo *AcquireTokenInfo(void)
99 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
100 if (token_info == (TokenInfo *) NULL)
101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102 token_info->signature=MagickSignature;
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
111 % D e s t r o y T o k e n I n f o %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117 % DestroyTokenInfo() deallocates memory associated with an TokenInfo
120 % The format of the DestroyTokenInfo method is:
122 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124 % A description of each parameter follows:
126 % o token_info: Specifies a pointer to an TokenInfo structure.
129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickSignature);
134 token_info->signature=(~MagickSignature);
135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
144 + G e t M a g i c k T o k e n %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
150 % GetMagickToken() gets a token from the token stream. A token is defined as
151 % a sequence of characters delimited by whitespace (e.g. clip-path), a
152 % sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 % parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these
154 % separator characters: ':', '=', ',', and ';'.
156 % The format of the GetMagickToken method is:
158 % void GetMagickToken(const char *start,const char **end,char *token)
160 % A description of each parameter follows:
162 % o start: the start of the token sequence.
164 % o end: point to the end of the token sequence.
166 % o token: copy the token to this buffer.
169 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
180 assert(start != (const char *) NULL);
181 assert(token != (char *) NULL);
184 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
200 case '"': escape='"'; break;
201 case '\'': escape='\''; break;
202 case '`': escape='\''; break;
203 case '{': escape='}'; break;
204 default: escape=(*p); break;
206 for (p++; *p != '\0'; p++)
208 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
223 if ((*p == '>') || (*p == '/'))
232 value=StringToDouble(p,&q);
234 if ((p != q) && (*p != ','))
236 for ( ; (p < q) && (*p != ','); p++)
242 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
243 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
248 for ( ; *p != '\0'; p++)
250 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
251 (*p == ':') || (*p == ',') || (*p == '|') || (*p == ';')) &&
254 if ((i > 0) && (*p == '<'))
260 for (p++; *p != '\0'; p++)
263 if ((*p == ')') && (*(p-1) != '\\'))
271 if (LocaleNCompare(token,"url(",4) == 0)
277 if (token[offset] == '#')
279 i=(ssize_t) strlen(token);
280 (void) CopyMagickString(token,token+offset,MaxTextExtent);
281 token[i-offset-1]='\0';
283 while (isspace((int) ((unsigned char) *p)) != 0)
285 if (end != (const char **) NULL)
286 *end=(const char *) p;
290 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
294 % G l o b E x p r e s s i o n %
298 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
300 % GlobExpression() returns MagickTrue if the expression matches the pattern.
302 % The format of the GlobExpression function is:
304 % MagickBooleanType GlobExpression(const char *expression,
305 % const char *pattern,const MagickBooleanType case_insensitive)
307 % A description of each parameter follows:
309 % o expression: Specifies a pointer to a text string containing a file name.
311 % o pattern: Specifies a pointer to a text string containing a pattern.
313 % o case_insensitive: set to MagickTrue to ignore the case when matching
317 MagickExport MagickBooleanType GlobExpression(const char *expression,
318 const char *pattern,const MagickBooleanType case_insensitive)
328 Return on empty pattern or '*'.
330 if (pattern == (char *) NULL)
332 if (GetUTFCode(pattern) == 0)
334 if (LocaleCompare(pattern,"*") == 0)
336 p=pattern+strlen(pattern)-1;
337 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
346 Determine if pattern is a scene, i.e. img0001.pcd[2].
348 image_info=AcquireImageInfo();
349 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
350 exception=AcquireExceptionInfo();
351 (void) SetImageInfo(image_info,0,exception);
352 exception=DestroyExceptionInfo(exception);
353 if (LocaleCompare(image_info->filename,pattern) != 0)
355 image_info=DestroyImageInfo(image_info);
358 image_info=DestroyImageInfo(image_info);
361 Evaluate glob expression.
364 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
366 if (GetUTFCode(expression) == 0)
367 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
369 switch (GetUTFCode(pattern))
377 pattern+=GetUTFOctets(pattern);
378 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
380 status=GlobExpression(expression,pattern,case_insensitive);
381 expression+=GetUTFOctets(expression);
383 if (status != MagickFalse)
385 while (GetUTFCode(expression) != 0)
386 expression+=GetUTFOctets(expression);
387 while (GetUTFCode(pattern) != 0)
388 pattern+=GetUTFOctets(pattern);
397 pattern+=GetUTFOctets(pattern);
400 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
405 if (GetUTFCode(pattern) == '\\')
407 pattern+=GetUTFOctets(pattern);
408 if (GetUTFCode(pattern) == 0)
414 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
416 c=GetUTFCode(pattern);
417 pattern+=GetUTFOctets(pattern);
418 pattern+=GetUTFOctets(pattern);
419 if (GetUTFCode(pattern) == ']')
424 if (GetUTFCode(pattern) == '\\')
426 pattern+=GetUTFOctets(pattern);
427 if (GetUTFCode(pattern) == 0)
433 if ((GetUTFCode(expression) < c) ||
434 (GetUTFCode(expression) > GetUTFCode(pattern)))
436 pattern+=GetUTFOctets(pattern);
441 if (GetUTFCode(pattern) != GetUTFCode(expression))
443 pattern+=GetUTFOctets(pattern);
446 pattern+=GetUTFOctets(pattern);
447 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
449 if ((GetUTFCode(pattern) == '\\') &&
450 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
451 pattern+=GetUTFOctets(pattern);
452 pattern+=GetUTFOctets(pattern);
454 if (GetUTFCode(pattern) != 0)
456 pattern+=GetUTFOctets(pattern);
457 expression+=GetUTFOctets(expression);
465 pattern+=GetUTFOctets(pattern);
466 expression+=GetUTFOctets(expression);
474 pattern+=GetUTFOctets(pattern);
475 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
479 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
480 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
481 (match != MagickFalse))
483 if (GetUTFCode(pattern) == '\\')
484 pattern+=GetUTFOctets(pattern);
485 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
488 pattern+=GetUTFOctets(pattern);
490 if (GetUTFCode(pattern) == 0)
497 if (match != MagickFalse)
500 while ((GetUTFCode(pattern) != '}') &&
501 (GetUTFCode(pattern) != 0))
503 pattern+=GetUTFOctets(pattern);
504 if (GetUTFCode(pattern) == '\\')
506 pattern+=GetUTFOctets(pattern);
507 if (GetUTFCode(pattern) == '}')
508 pattern+=GetUTFOctets(pattern);
514 while ((GetUTFCode(pattern) != '}') &&
515 (GetUTFCode(pattern) != ',') &&
516 (GetUTFCode(pattern) != 0))
518 pattern+=GetUTFOctets(pattern);
519 if (GetUTFCode(pattern) == '\\')
521 pattern+=GetUTFOctets(pattern);
522 if ((GetUTFCode(pattern) == '}') ||
523 (GetUTFCode(pattern) == ','))
524 pattern+=GetUTFOctets(pattern);
528 if (GetUTFCode(pattern) != 0)
529 pattern+=GetUTFOctets(pattern);
535 pattern+=GetUTFOctets(pattern);
536 if (GetUTFCode(pattern) == 0)
541 if (case_insensitive != MagickFalse)
543 if (tolower((int) GetUTFCode(expression)) !=
544 tolower((int) GetUTFCode(pattern)))
551 if (GetUTFCode(expression) != GetUTFCode(pattern))
556 expression+=GetUTFOctets(expression);
557 pattern+=GetUTFOctets(pattern);
561 while (GetUTFCode(pattern) == '*')
562 pattern+=GetUTFOctets(pattern);
563 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
564 MagickTrue : MagickFalse;
569 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
577 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
579 % IsGlob() returns MagickTrue if the path specification contains a globbing
582 % The format of the IsGlob method is:
584 % MagickBooleanType IsGlob(const char *geometry)
586 % A description of each parameter follows:
591 MagickPrivate MagickBooleanType IsGlob(const char *path)
594 status = MagickFalse;
599 if (IsPathAccessible(path) != MagickFalse)
601 for (p=path; *p != '\0'; p++)
623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
627 % T o k e n i z e r %
631 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
633 % Tokenizer() is a generalized, finite state token parser. It extracts tokens
634 % one at a time from a string of characters. The characters used for white
635 % space, for break characters, and for quotes can be specified. Also,
636 % characters in the string can be preceded by a specifiable escape character
637 % which removes any special meaning the character may have.
639 % Here is some terminology:
641 % o token: A single unit of information in the form of a group of
644 % o white space: Apace that gets ignored (except within quotes or when
645 % escaped), like blanks and tabs. in addition, white space terminates a
648 % o break set: One or more characters that separates non-quoted tokens.
649 % Commas are a common break character. The usage of break characters to
650 % signal the end of a token is the same as that of white space, except
651 % multiple break characters with nothing or only white space between
652 % generate a null token for each two break characters together.
654 % For example, if blank is set to be the white space and comma is set to
655 % be the break character, the line
659 % ... consists of 5 tokens:
664 % 4) "" (the null string)
667 % o Quote character: A character that, when surrounding a group of other
668 % characters, causes the group of characters to be treated as a single
669 % token, no matter how many white spaces or break characters exist in
670 % the group. Also, a token always terminates after the closing quote.
671 % For example, if ' is the quote character, blank is white space, and
672 % comma is the break character, the following string
676 % ... consists of 4 tokens:
679 % 2) " B, CD" (note the blanks & comma)
683 % The quote characters themselves do not appear in the resultant
684 % tokens. The double quotes are delimiters i use here for
685 % documentation purposes only.
687 % o Escape character: A character which itself is ignored but which
688 % causes the next character to be used as is. ^ and \ are often used
689 % as escape characters. An escape in the last position of the string
690 % gets treated as a "normal" (i.e., non-quote, non-white, non-break,
691 % and non-escape) character. For example, assume white space, break
692 % character, and quote are the same as in the above examples, and
693 % further, assume that ^ is the escape character. Then, in the string
695 % ABC, ' DEF ^' GH' I ^ J K^ L ^
697 % ... there are 7 tokens:
702 % 4) " " (a lone blank)
705 % 7) "^" (passed as is at end of line)
707 % The format of the Tokenizer method is:
709 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
710 % const size_t max_token_length,const char *line,const char *white,
711 % const char *break_set,const char *quote,const char escape,
712 % char *breaker,int *next,char *quoted)
714 % A description of each parameter follows:
716 % o flag: right now, only the low order 3 bits are used.
718 % 1 => convert non-quoted tokens to upper case
719 % 2 => convert non-quoted tokens to lower case
720 % 0 => do not convert non-quoted tokens
722 % o token: a character string containing the returned next token
724 % o max_token_length: the maximum size of "token". Characters beyond
725 % "max_token_length" are truncated.
727 % o string: the string to be parsed.
729 % o white: a string of the valid white spaces. example:
731 % char whitesp[]={" \t"};
733 % blank and tab will be valid white space.
735 % o break: a string of the valid break characters. example:
737 % char breakch[]={";,"};
739 % semicolon and comma will be valid break characters.
741 % o quote: a string of the valid quote characters. An example would be
743 % char whitesp[]={"'\"");
745 % (this causes single and double quotes to be valid) Note that a
746 % token starting with one of these characters needs the same quote
747 % character to terminate it.
753 % is unterminated, but
757 % are properly terminated. Note that different quote characters
758 % can appear on the same line; only for a given token do the quote
759 % characters have to be the same.
761 % o escape: the escape character (NOT a string ... only one
762 % allowed). Use zero if none is desired.
764 % o breaker: the break character used to terminate the current
765 % token. If the token was quoted, this will be the quote used. If
766 % the token is the last one on the line, this will be zero.
768 % o next: this variable points to the first character of the
769 % next token. it gets reset by "tokenizer" as it steps through the
770 % string. Set it to 0 upon initialization, and leave it alone
771 % after that. You can change it if you want to jump around in the
772 % string or re-parse from the beginning, but be careful.
774 % o quoted: set to True if the token was quoted and MagickFalse
775 % if not. You may need this information (for example: in C, a
776 % string with quotes around it is a character string, while one
777 % without is an identifier).
779 % o result: 0 if we haven't reached EOS (end of string), and 1
789 static ssize_t sindex(int c,const char *string)
794 for (p=string; *p != '\0'; p++)
796 return((ssize_t) (p-string));
800 static void StoreToken(TokenInfo *token_info,char *string,
801 size_t max_token_length,int c)
806 if ((token_info->offset < 0) ||
807 ((size_t) token_info->offset >= (max_token_length-1)))
809 i=token_info->offset++;
811 if (token_info->state == IN_QUOTE)
813 switch (token_info->flag & 0x03)
817 string[i]=(char) toupper(c);
822 string[i]=(char) tolower(c);
830 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
831 char *token,const size_t max_token_length,const char *line,const char *white,
832 const char *break_set,const char *quote,const char escape,char *breaker,
833 int *next,char *quoted)
843 if (line[*next] == '\0')
845 token_info->state=IN_WHITE;
846 token_info->quote=(char) MagickFalse;
847 token_info->flag=flag;
848 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
851 i=sindex(c,break_set);
854 switch (token_info->state)
861 *breaker=break_set[i];
862 token[token_info->offset]='\0';
867 StoreToken(token_info,token,max_token_length,c);
876 switch (token_info->state)
880 token_info->state=IN_QUOTE;
881 token_info->quote=quote[i];
882 *quoted=(char) MagickTrue;
887 if (quote[i] != token_info->quote)
888 StoreToken(token_info,token,max_token_length,c);
891 token_info->state=IN_OZONE;
892 token_info->quote='\0';
900 token[token_info->offset]='\0';
909 switch (token_info->state)
916 token_info->state=IN_OZONE;
921 StoreToken(token_info,token,max_token_length,c);
927 if (c == (int) escape)
929 if (line[(*next)+1] == '\0')
932 StoreToken(token_info,token,max_token_length,c);
934 token[token_info->offset]='\0';
937 switch (token_info->state)
942 token_info->state=IN_TOKEN;
950 StoreToken(token_info,token,max_token_length,c);
955 token[token_info->offset]='\0';
961 switch (token_info->state)
964 token_info->state=IN_TOKEN;
968 StoreToken(token_info,token,max_token_length,c);
973 token[token_info->offset]='\0';
978 token[token_info->offset]='\0';