token.c revision d965a420485ec04d44aa15ccf4adcfc55811a2e9
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7%                      T    O   O  K  K   E      NN  N                        %
8%                      T    O   O  KKK    EEE    N N N                        %
9%                      T    O   O  K  K   E      N  NN                        %
10%                      T     OOO   K   K  EEEEE  N   N                        %
11%                                                                             %
12%                                                                             %
13%                         MagickCore Token Methods                            %
14%                                                                             %
15%                             Software Design                                 %
16%                               John Cristy                                   %
17%                              January 1993                                   %
18%                                                                             %
19%                                                                             %
20%  Copyright 1999-2010 ImageMagick Studio LLC, a non-profit organization      %
21%  dedicated to making software imaging solutions freely available.           %
22%                                                                             %
23%  You may not use this file except in compliance with the License.  You may  %
24%  obtain a copy of the License at                                            %
25%                                                                             %
26%    http://www.imagemagick.org/script/license.php                            %
27%                                                                             %
28%  Unless required by applicable law or agreed to in writing, software        %
29%  distributed under the License is distributed on an "AS IS" BASIS,          %
30%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31%  See the License for the specific language governing permissions and        %
32%  limitations under the License.                                             %
33%                                                                             %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41  Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/memory_.h"
48#include "magick/string_.h"
49#include "magick/token.h"
50#include "magick/token-private.h"
51#include "magick/utility.h"
52
53/*
54  Typedef declaractions.
55*/
56struct _TokenInfo
57{
58  int
59    state;
60
61  MagickStatusType
62    flag;
63
64  long
65    offset;
66
67  char
68    quote;
69
70  unsigned long
71    signature;
72};
73
74/*
75%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
76%                                                                             %
77%                                                                             %
78%                                                                             %
79%   A c q u i r e T o k e n I n f o                                           %
80%                                                                             %
81%                                                                             %
82%                                                                             %
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84%
85%  AcquireTokenInfo() allocates the TokenInfo structure.
86%
87%  The format of the AcquireTokenInfo method is:
88%
89%      TokenInfo *AcquireTokenInfo()
90%
91*/
92MagickExport TokenInfo *AcquireTokenInfo(void)
93{
94  TokenInfo
95    *token_info;
96
97  token_info=(TokenInfo *) AcquireAlignedMemory(1,sizeof(*token_info));
98  if (token_info == (TokenInfo *) NULL)
99    ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
100  token_info->signature=MagickSignature;
101  return(token_info);
102}
103
104/*
105%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106%                                                                             %
107%                                                                             %
108%                                                                             %
109%   D e s t r o y T o k e n I n f o                                           %
110%                                                                             %
111%                                                                             %
112%                                                                             %
113%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114%
115%  DestroyTokenInfo() deallocates memory associated with an TokenInfo
116%  structure.
117%
118%  The format of the DestroyTokenInfo method is:
119%
120%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
121%
122%  A description of each parameter follows:
123%
124%    o token_info: Specifies a pointer to an TokenInfo structure.
125%
126*/
127MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
128{
129  (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
130  assert(token_info != (TokenInfo *) NULL);
131  assert(token_info->signature == MagickSignature);
132  token_info->signature=(~MagickSignature);
133  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
134  return(token_info);
135}
136
137/*
138%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
139%                                                                             %
140%                                                                             %
141%                                                                             %
142+   G e t M a g i c k T o k e n                                               %
143%                                                                             %
144%                                                                             %
145%                                                                             %
146%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147%
148%  GetMagickToken() gets a token from the token stream.  A token is defined as a
149%  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
150%  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
151%  parenthesis (e.g. rgb(0,0,0)).
152%
153%  The format of the GetMagickToken method is:
154%
155%      void GetMagickToken(const char *start,const char **end,char *token)
156%
157%  A description of each parameter follows:
158%
159%    o start: the start of the token sequence.
160%
161%    o end: point to the end of the token sequence.
162%
163%    o token: copy the token to this buffer.
164%
165*/
166MagickExport void GetMagickToken(const char *start,const char **end,char *token)
167{
168  double
169    value;
170
171  register const char
172    *p;
173
174  register long
175    i;
176
177  i=0;
178  for (p=start; *p != '\0'; )
179  {
180    while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
181      p++;
182    if (*p == '\0')
183      break;
184    switch (*p)
185    {
186      case '"':
187      case '\'':
188      case '`':
189      case '{':
190      {
191        register char
192          escape;
193
194        switch (*p)
195        {
196          case '"': escape='"'; break;
197          case '\'': escape='\''; break;
198          case '`': escape='\''; break;
199          case '{': escape='}'; break;
200          default: escape=(*p); break;
201        }
202        for (p++; *p != '\0'; p++)
203        {
204          if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
205            p++;
206          else
207            if (*p == escape)
208              {
209                p++;
210                break;
211              }
212          token[i++]=(*p);
213        }
214        break;
215      }
216      case '/':
217      {
218        token[i++]=(*p++);
219        if ((*p == '>') || (*p == '/'))
220          token[i++]=(*p++);
221        break;
222      }
223      default:
224      {
225        char
226          *q;
227
228        value=strtod(p,&q);
229        if ((p != q) && (*p != ','))
230          {
231            for ( ; (p < q) && (*p != ','); p++)
232              token[i++]=(*p);
233            if (*p == '%')
234              token[i++]=(*p++);
235            break;
236          }
237        if ((isalpha((int) ((unsigned char) *p)) == 0) &&
238            (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
239          {
240            token[i++]=(*p++);
241            break;
242          }
243        for ( ; *p != '\0'; p++)
244        {
245          if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
246              (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
247            break;
248          if ((i > 0) && (*p == '<'))
249            break;
250          token[i++]=(*p);
251          if (*p == '>')
252            break;
253          if (*p == '(')
254            for (p++; *p != '\0'; p++)
255            {
256              token[i++]=(*p);
257              if ((*p == ')') && (*(p-1) != '\\'))
258                break;
259            }
260        }
261        break;
262      }
263    }
264    break;
265  }
266  token[i]='\0';
267  if (LocaleNCompare(token,"url(",4) == 0)
268    {
269      ssize_t
270        offset;
271
272      offset=4;
273      if (token[offset] == '#')
274        offset++;
275      i=(long) strlen(token);
276      (void) CopyMagickString(token,token+offset,MaxTextExtent);
277      token[i-offset-1]='\0';
278    }
279  while (isspace((int) ((unsigned char) *p)) != 0)
280    p++;
281  if (end != (const char **) NULL)
282    *end=(const char *) p;
283}
284
285/*
286%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287%                                                                             %
288%                                                                             %
289%                                                                             %
290%   G l o b E x p r e s s i o n                                               %
291%                                                                             %
292%                                                                             %
293%                                                                             %
294%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
295%
296%  GlobExpression() returns MagickTrue if the expression matches the pattern.
297%
298%  The format of the GlobExpression function is:
299%
300%      MagickBooleanType GlobExpression(const char *expression,
301%        const char *pattern,const MagickBooleanType case_insensitive)
302%
303%  A description of each parameter follows:
304%
305%    o expression: Specifies a pointer to a text string containing a file name.
306%
307%    o pattern: Specifies a pointer to a text string containing a pattern.
308%
309%    o case_insensitive: set to MagickTrue to ignore the case when matching
310%      an expression.
311%
312*/
313MagickExport MagickBooleanType GlobExpression(const char *expression,
314  const char *pattern,const MagickBooleanType case_insensitive)
315{
316  MagickBooleanType
317    done,
318    match;
319
320  register const char
321    *p;
322
323  /*
324    Return on empty pattern or '*'.
325  */
326  if (pattern == (char *) NULL)
327    return(MagickTrue);
328  if (GetUTFCode(pattern) == 0)
329    return(MagickTrue);
330  if (LocaleCompare(pattern,"*") == 0)
331    return(MagickTrue);
332  p=pattern+strlen(pattern)-1;
333  if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
334    {
335      ExceptionInfo
336        *exception;
337
338      ImageInfo
339        *image_info;
340
341      /*
342        Determine if pattern is a scene, i.e. img0001.pcd[2].
343      */
344      image_info=AcquireImageInfo();
345      (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
346      exception=AcquireExceptionInfo();
347      (void) SetImageInfo(image_info,0,exception);
348      exception=DestroyExceptionInfo(exception);
349      if (LocaleCompare(image_info->filename,pattern) != 0)
350        {
351          image_info=DestroyImageInfo(image_info);
352          return(MagickFalse);
353        }
354      image_info=DestroyImageInfo(image_info);
355    }
356  /*
357    Evaluate glob expression.
358  */
359  done=MagickFalse;
360  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
361  {
362    if (GetUTFCode(expression) == 0)
363      if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
364        break;
365    switch (GetUTFCode(pattern))
366    {
367      case '\\':
368      {
369        pattern+=GetUTFOctets(pattern);
370        if (GetUTFCode(pattern) != 0)
371          pattern+=GetUTFOctets(pattern);
372        break;
373      }
374      case '*':
375      {
376        MagickBooleanType
377          status;
378
379        status=MagickFalse;
380        pattern+=GetUTFOctets(pattern);
381        while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
382        {
383          status=GlobExpression(expression,pattern,case_insensitive);
384          expression+=GetUTFOctets(expression);
385        }
386        if (status != MagickFalse)
387          {
388            while (GetUTFCode(expression) != 0)
389              expression+=GetUTFOctets(expression);
390            while (GetUTFCode(pattern) != 0)
391              pattern+=GetUTFOctets(pattern);
392          }
393        break;
394      }
395      case '[':
396      {
397        long
398          c;
399
400        pattern+=GetUTFOctets(pattern);
401        for ( ; ; )
402        {
403          if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
404            {
405              done=MagickTrue;
406              break;
407            }
408          if (GetUTFCode(pattern) == '\\')
409            {
410              pattern+=GetUTFOctets(pattern);
411              if (GetUTFCode(pattern) == 0)
412                {
413                  done=MagickTrue;
414                  break;
415                }
416             }
417          if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
418            {
419              c=GetUTFCode(pattern);
420              pattern+=GetUTFOctets(pattern);
421              pattern+=GetUTFOctets(pattern);
422              if (GetUTFCode(pattern) == ']')
423                {
424                  done=MagickTrue;
425                  break;
426                }
427              if (GetUTFCode(pattern) == '\\')
428                {
429                  pattern+=GetUTFOctets(pattern);
430                  if (GetUTFCode(pattern) == 0)
431                    {
432                      done=MagickTrue;
433                      break;
434                    }
435                }
436              if ((GetUTFCode(expression) < c) ||
437                  (GetUTFCode(expression) > GetUTFCode(pattern)))
438                {
439                  pattern+=GetUTFOctets(pattern);
440                  continue;
441                }
442            }
443          else
444            if (GetUTFCode(pattern) != GetUTFCode(expression))
445              {
446                pattern+=GetUTFOctets(pattern);
447                continue;
448              }
449          pattern+=GetUTFOctets(pattern);
450          while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
451          {
452            if ((GetUTFCode(pattern) == '\\') &&
453                (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
454              pattern+=GetUTFOctets(pattern);
455            pattern+=GetUTFOctets(pattern);
456          }
457          if (GetUTFCode(pattern) != 0)
458            {
459              pattern+=GetUTFOctets(pattern);
460              expression+=GetUTFOctets(expression);
461            }
462          break;
463        }
464        break;
465      }
466      case '?':
467      {
468        pattern+=GetUTFOctets(pattern);
469        expression+=GetUTFOctets(expression);
470        break;
471      }
472      case '{':
473      {
474        register const char
475          *p;
476
477        pattern+=GetUTFOctets(pattern);
478        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
479        {
480          p=expression;
481          match=MagickTrue;
482          while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
483                 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
484                 (match != MagickFalse))
485          {
486            if (GetUTFCode(pattern) == '\\')
487              pattern+=GetUTFOctets(pattern);
488            match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
489              MagickFalse;
490            p+=GetUTFOctets(p);
491            pattern+=GetUTFOctets(pattern);
492          }
493          if (GetUTFCode(pattern) == 0)
494            {
495              match=MagickFalse;
496              done=MagickTrue;
497              break;
498            }
499          else
500            if (match != MagickFalse)
501              {
502                expression=p;
503                while ((GetUTFCode(pattern) != '}') &&
504                       (GetUTFCode(pattern) != 0))
505                {
506                  pattern+=GetUTFOctets(pattern);
507                  if (GetUTFCode(pattern) == '\\')
508                    {
509                      pattern+=GetUTFOctets(pattern);
510                      if (GetUTFCode(pattern) == '}')
511                        pattern+=GetUTFOctets(pattern);
512                    }
513                }
514              }
515            else
516              {
517                while ((GetUTFCode(pattern) != '}') &&
518                       (GetUTFCode(pattern) != ',') &&
519                       (GetUTFCode(pattern) != 0))
520                {
521                  pattern+=GetUTFOctets(pattern);
522                  if (GetUTFCode(pattern) == '\\')
523                    {
524                      pattern+=GetUTFOctets(pattern);
525                      if ((GetUTFCode(pattern) == '}') ||
526                          (GetUTFCode(pattern) == ','))
527                        pattern+=GetUTFOctets(pattern);
528                    }
529                }
530              }
531            if (GetUTFCode(pattern) != 0)
532              pattern+=GetUTFOctets(pattern);
533          }
534        break;
535      }
536      default:
537      {
538        if (case_insensitive != MagickFalse)
539          {
540            if (tolower((int) GetUTFCode(expression)) !=
541                tolower((int) GetUTFCode(pattern)))
542              {
543                done=MagickTrue;
544                break;
545              }
546          }
547        else
548          if (GetUTFCode(expression) != GetUTFCode(pattern))
549            {
550              done=MagickTrue;
551              break;
552            }
553        expression+=GetUTFOctets(expression);
554        pattern+=GetUTFOctets(pattern);
555      }
556    }
557  }
558  while (GetUTFCode(pattern) == '*')
559    pattern+=GetUTFOctets(pattern);
560  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
561    MagickTrue : MagickFalse;
562  return(match);
563}
564
565/*
566%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
567%                                                                             %
568%                                                                             %
569%                                                                             %
570+     I s G l o b                                                             %
571%                                                                             %
572%                                                                             %
573%                                                                             %
574%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
575%
576%  IsGlob() returns MagickTrue if the path specification contains a globbing
577%  pattern.
578%
579%  The format of the IsGlob method is:
580%
581%      MagickBooleanType IsGlob(const char *geometry)
582%
583%  A description of each parameter follows:
584%
585%    o path: the path.
586%
587*/
588MagickExport MagickBooleanType IsGlob(const char *path)
589{
590  MagickBooleanType
591    status;
592
593  if (IsPathAccessible(path) != MagickFalse)
594    return(MagickFalse);
595  status=(strchr(path,'*') != (char *) NULL) ||
596    (strchr(path,'?') != (char *) NULL) ||
597    (strchr(path,'{') != (char *) NULL) ||
598    (strchr(path,'}') != (char *) NULL) ||
599    (strchr(path,'[') != (char *) NULL) ||
600    (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
601  return(status);
602}
603
604/*
605%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
606%                                                                             %
607%                                                                             %
608%                                                                             %
609%   T o k e n i z e r                                                         %
610%                                                                             %
611%                                                                             %
612%                                                                             %
613%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
614%
615%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
616%  one at a time from a string of characters.  The characters used for white
617%  space, for break characters, and for quotes can be specified.  Also,
618%  characters in the string can be preceded by a specifiable escape character
619%  which removes any special meaning the character may have.
620%
621%  Here is some terminology:
622%
623%    o token: A single unit of information in the form of a group of
624%      characters.
625%
626%    o white space: Apace that gets ignored (except within quotes or when
627%      escaped), like blanks and tabs. in addition, white space terminates a
628%      non-quoted token.
629%
630%    o break set: One or more characters that separates non-quoted tokens.
631%      Commas are a common break character. The usage of break characters to
632%      signal the end of a token is the same as that of white space, except
633%      multiple break characters with nothing or only white space between
634%      generate a null token for each two break characters together.
635%
636%      For example, if blank is set to be the white space and comma is set to
637%      be the break character, the line
638%
639%        A, B, C ,  , DEF
640%
641%        ... consists of 5 tokens:
642%
643%        1)  "A"
644%        2)  "B"
645%        3)  "C"
646%        4)  "" (the null string)
647%        5)  "DEF"
648%
649%    o Quote character: A character that, when surrounding a group of other
650%      characters, causes the group of characters to be treated as a single
651%      token, no matter how many white spaces or break characters exist in
652%      the group. Also, a token always terminates after the closing quote.
653%      For example, if ' is the quote character, blank is white space, and
654%      comma is the break character, the following string
655%
656%        A, ' B, CD'EF GHI
657%
658%        ... consists of 4 tokens:
659%
660%        1)  "A"
661%        2)  " B, CD" (note the blanks & comma)
662%        3)  "EF"
663%        4)  "GHI"
664%
665%      The quote characters themselves do not appear in the resultant
666%      tokens.  The double quotes are delimiters i use here for
667%      documentation purposes only.
668%
669%    o Escape character: A character which itself is ignored but which
670%      causes the next character to be used as is.  ^ and \ are often used
671%      as escape characters. An escape in the last position of the string
672%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
673%      and non-escape) character. For example, assume white space, break
674%      character, and quote are the same as in the above examples, and
675%      further, assume that ^ is the escape character. Then, in the string
676%
677%        ABC, ' DEF ^' GH' I ^ J K^ L ^
678%
679%        ... there are 7 tokens:
680%
681%        1)  "ABC"
682%        2)  " DEF ' GH"
683%        3)  "I"
684%        4)  " "     (a lone blank)
685%        5)  "J"
686%        6)  "K L"
687%        7)  "^"     (passed as is at end of line)
688%
689%  The format of the Tokenizer method is:
690%
691%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
692%        const size_t max_token_length,const char *line,const char *white,
693%        const char *break_set,const char *quote,const char escape,
694%        char *breaker,int *next,char *quoted)
695%
696%  A description of each parameter follows:
697%
698%    o flag: right now, only the low order 3 bits are used.
699%
700%        1 => convert non-quoted tokens to upper case
701%        2 => convert non-quoted tokens to lower case
702%        0 => do not convert non-quoted tokens
703%
704%    o token: a character string containing the returned next token
705%
706%    o max_token_length: the maximum size of "token".  Characters beyond
707%      "max_token_length" are truncated.
708%
709%    o string: the string to be parsed.
710%
711%    o white: a string of the valid white spaces.  example:
712%
713%        char whitesp[]={" \t"};
714%
715%      blank and tab will be valid white space.
716%
717%    o break: a string of the valid break characters. example:
718%
719%        char breakch[]={";,"};
720%
721%      semicolon and comma will be valid break characters.
722%
723%    o quote: a string of the valid quote characters. An example would be
724%
725%        char whitesp[]={"'\"");
726%
727%      (this causes single and double quotes to be valid) Note that a
728%      token starting with one of these characters needs the same quote
729%      character to terminate it.
730%
731%      for example:
732%
733%        "ABC '
734%
735%      is unterminated, but
736%
737%        "DEF" and 'GHI'
738%
739%      are properly terminated.  Note that different quote characters
740%      can appear on the same line; only for a given token do the quote
741%      characters have to be the same.
742%
743%    o escape: the escape character (NOT a string ... only one
744%      allowed). Use zero if none is desired.
745%
746%    o breaker: the break character used to terminate the current
747%      token.  If the token was quoted, this will be the quote used.  If
748%      the token is the last one on the line, this will be zero.
749%
750%    o next: this variable points to the first character of the
751%      next token.  it gets reset by "tokenizer" as it steps through the
752%      string.  Set it to 0 upon initialization, and leave it alone
753%      after that.  You can change it if you want to jump around in the
754%      string or re-parse from the beginning, but be careful.
755%
756%    o quoted: set to True if the token was quoted and MagickFalse
757%      if not.  You may need this information (for example:  in C, a
758%      string with quotes around it is a character string, while one
759%      without is an identifier).
760%
761%    o result: 0 if we haven't reached EOS (end of string), and 1
762%      if we have.
763%
764*/
765
766#define IN_WHITE 0
767#define IN_TOKEN 1
768#define IN_QUOTE 2
769#define IN_OZONE 3
770
771static long sindex(int c,const char *string)
772{
773  register const char
774    *p;
775
776  for (p=string; *p != '\0'; p++)
777    if (c == (int) (*p))
778      return(p-string);
779  return(-1);
780}
781
782static void StoreToken(TokenInfo *token_info,char *string,
783  size_t max_token_length,int c)
784{
785  register long
786    i;
787
788  if ((token_info->offset < 0) ||
789      ((size_t) token_info->offset >= (max_token_length-1)))
790    return;
791  i=token_info->offset++;
792  string[i]=(char) c;
793  if (token_info->state == IN_QUOTE)
794    return;
795  switch (token_info->flag & 0x03)
796  {
797    case 1:
798    {
799      string[i]=(char) toupper(c);
800      break;
801    }
802    case 2:
803    {
804      string[i]=(char) tolower(c);
805      break;
806    }
807    default:
808      break;
809  }
810}
811
812MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
813  char *token,const size_t max_token_length,const char *line,const char *white,
814  const char *break_set,const char *quote,const char escape,char *breaker,
815  int *next,char *quoted)
816{
817  int
818    c;
819
820  register long
821    i;
822
823  *breaker='\0';
824  *quoted='\0';
825  if (line[*next] == '\0')
826    return(1);
827  token_info->state=IN_WHITE;
828  token_info->quote=(char) MagickFalse;
829  token_info->flag=flag;
830  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
831  {
832    c=(int) line[*next];
833    i=sindex(c,break_set);
834    if (i >= 0)
835      {
836        switch (token_info->state)
837        {
838          case IN_WHITE:
839          case IN_TOKEN:
840          case IN_OZONE:
841          {
842            (*next)++;
843            *breaker=break_set[i];
844            token[token_info->offset]='\0';
845            return(0);
846          }
847          case IN_QUOTE:
848          {
849            StoreToken(token_info,token,max_token_length,c);
850            break;
851          }
852        }
853        continue;
854      }
855    i=sindex(c,quote);
856    if (i >= 0)
857      {
858        switch (token_info->state)
859        {
860          case IN_WHITE:
861          {
862            token_info->state=IN_QUOTE;
863            token_info->quote=quote[i];
864            *quoted=(char) MagickTrue;
865            break;
866          }
867          case IN_QUOTE:
868          {
869            if (quote[i] != token_info->quote)
870              StoreToken(token_info,token,max_token_length,c);
871            else
872              {
873                token_info->state=IN_OZONE;
874                token_info->quote='\0';
875              }
876            break;
877          }
878          case IN_TOKEN:
879          case IN_OZONE:
880          {
881            *breaker=(char) c;
882            token[token_info->offset]='\0';
883            return(0);
884          }
885        }
886        continue;
887      }
888    i=sindex(c,white);
889    if (i >= 0)
890      {
891        switch (token_info->state)
892        {
893          case IN_WHITE:
894          case IN_OZONE:
895            break;
896          case IN_TOKEN:
897          {
898            token_info->state=IN_OZONE;
899            break;
900          }
901          case IN_QUOTE:
902          {
903            StoreToken(token_info,token,max_token_length,c);
904            break;
905          }
906        }
907        continue;
908      }
909    if (c == (int) escape)
910      {
911        if (line[(*next)+1] == '\0')
912          {
913            *breaker='\0';
914            StoreToken(token_info,token,max_token_length,c);
915            (*next)++;
916            token[token_info->offset]='\0';
917            return(0);
918          }
919        switch (token_info->state)
920        {
921          case IN_WHITE:
922          {
923            (*next)--;
924            token_info->state=IN_TOKEN;
925            break;
926          }
927          case IN_TOKEN:
928          case IN_QUOTE:
929          {
930            (*next)++;
931            c=(int) line[*next];
932            StoreToken(token_info,token,max_token_length,c);
933            break;
934          }
935          case IN_OZONE:
936          {
937            token[token_info->offset]='\0';
938            return(0);
939          }
940        }
941        continue;
942      }
943    switch (token_info->state)
944    {
945      case IN_WHITE:
946        token_info->state=IN_TOKEN;
947      case IN_TOKEN:
948      case IN_QUOTE:
949      {
950        StoreToken(token_info,token,max_token_length,c);
951        break;
952      }
953      case IN_OZONE:
954      {
955        token[token_info->offset]='\0';
956        return(0);
957      }
958    }
959  }
960  token[token_info->offset]='\0';
961  return(0);
962}
963