1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7%                      T    O   O  K  K   E      NN  N                        %
8%                      T    O   O  KKK    EEE    N N N                        %
9%                      T    O   O  K  K   E      N  NN                        %
10%                      T     OOO   K   K  EEEEE  N   N                        %
11%                                                                             %
12%                                                                             %
13%                         MagickCore Token Methods                            %
14%                                                                             %
15%                             Software Design                                 %
16%                                  Cristy                                     %
17%                              January 1993                                   %
18%                                                                             %
19%                                                                             %
20%  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
21%  dedicated to making software imaging solutions freely available.           %
22%                                                                             %
23%  You may not use this file except in compliance with the License.  You may  %
24%  obtain a copy of the License at                                            %
25%                                                                             %
26%    http://www.imagemagick.org/script/license.php                            %
27%                                                                             %
28%  Unless required by applicable law or agreed to in writing, software        %
29%  distributed under the License is distributed on an "AS IS" BASIS,          %
30%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31%  See the License for the specific language governing permissions and        %
32%  limitations under the License.                                             %
33%                                                                             %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41  Include declarations.
42*/
43#include "MagickCore/studio.h"
44#include "MagickCore/exception.h"
45#include "MagickCore/exception-private.h"
46#include "MagickCore/image.h"
47#include "MagickCore/memory_.h"
48#include "MagickCore/string_.h"
49#include "MagickCore/string-private.h"
50#include "MagickCore/token.h"
51#include "MagickCore/token-private.h"
52#include "MagickCore/utility.h"
53#include "MagickCore/utility-private.h"
54
55/*
56  Typedef declaractions.
57*/
58struct _TokenInfo
59{
60  int
61    state;
62
63  MagickStatusType
64    flag;
65
66  ssize_t
67    offset;
68
69  char
70    quote;
71
72  size_t
73    signature;
74};
75
76/*
77%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78%                                                                             %
79%                                                                             %
80%                                                                             %
81%   A c q u i r e T o k e n I n f o                                           %
82%                                                                             %
83%                                                                             %
84%                                                                             %
85%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86%
87%  AcquireTokenInfo() allocates the TokenInfo structure.
88%
89%  The format of the AcquireTokenInfo method is:
90%
91%      TokenInfo *AcquireTokenInfo()
92%
93*/
94MagickExport TokenInfo *AcquireTokenInfo(void)
95{
96  TokenInfo
97    *token_info;
98
99  token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
100  if (token_info == (TokenInfo *) NULL)
101    ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102  token_info->signature=MagickCoreSignature;
103  return(token_info);
104}
105
106/*
107%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108%                                                                             %
109%                                                                             %
110%                                                                             %
111%   D e s t r o y T o k e n I n f o                                           %
112%                                                                             %
113%                                                                             %
114%                                                                             %
115%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116%
117%  DestroyTokenInfo() deallocates memory associated with an TokenInfo
118%  structure.
119%
120%  The format of the DestroyTokenInfo method is:
121%
122%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123%
124%  A description of each parameter follows:
125%
126%    o token_info: Specifies a pointer to an TokenInfo structure.
127%
128*/
129MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130{
131  (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132  assert(token_info != (TokenInfo *) NULL);
133  assert(token_info->signature == MagickCoreSignature);
134  token_info->signature=(~MagickCoreSignature);
135  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136  return(token_info);
137}
138
139/*
140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141%                                                                             %
142%                                                                             %
143%                                                                             %
144+   G e t N e x t T o k e n                                                   %
145%                                                                             %
146%                                                                             %
147%                                                                             %
148%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149%
150%  GetNextToken() gets a token from the token stream.  A token is defined as
151%  a sequence of characters delimited by whitespace (e.g. clip-path), a
152%  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153%  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
154%  separator characters: ':', '=', ',', and ';'.
155%
156%  The format of the GetNextToken method is:
157%
158%      void GetNextToken(const char *start,const char **end,
159%        const size_t extent,char *token)
160%
161%  A description of each parameter follows:
162%
163%    o start: the start of the token sequence.
164%
165%    o end: point to the end of the token sequence.
166%
167%    o extent: maximum extent of the token.
168%
169%    o token: copy the token to this buffer.
170%
171*/
172MagickExport void GetNextToken(const char *start,const char **end,
173  const size_t extent,char *token)
174{
175  double
176    value;
177
178  register const char
179    *p;
180
181  register ssize_t
182    i;
183
184  assert(start != (const char *) NULL);
185  assert(token != (char *) NULL);
186  i=0;
187  p=start;
188  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
189    p++;
190  switch (*p)
191  {
192    case '\0':
193      break;
194    case '"':
195    case '\'':
196    case '`':
197    case '{':
198    {
199      register char
200        escape;
201
202      switch (*p)
203      {
204        case '"': escape='"'; break;
205        case '\'': escape='\''; break;
206        case '`': escape='\''; break;
207        case '{': escape='}'; break;
208        default: escape=(*p); break;
209      }
210      for (p++; *p != '\0'; p++)
211      {
212        if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
213          p++;
214        else
215          if (*p == escape)
216            {
217              p++;
218              break;
219            }
220        if (i < (ssize_t) (extent-1))
221          token[i++]=(*p);
222      }
223      break;
224    }
225    case '/':
226    {
227      if (i < (ssize_t) (extent-1))
228        token[i++]=(*p++);
229      if ((*p == '>') || (*p == '/'))
230        if (i < (ssize_t) (extent-1))
231          token[i++]=(*p++);
232      break;
233    }
234    default:
235    {
236      char
237        *q;
238
239      value=StringToDouble(p,&q);
240      (void) value;
241      if ((p != q) && (*p != ','))
242        {
243          for ( ; (p < q) && (*p != ','); p++)
244            if (i < (ssize_t) (extent-1))
245              token[i++]=(*p);
246          if (*p == '%')
247            if (i < (ssize_t) (extent-1))
248              token[i++]=(*p++);
249          break;
250        }
251      if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
252          (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
253        {
254          if (i < (ssize_t) (extent-1))
255            token[i++]=(*p++);
256          break;
257        }
258      for ( ; *p != '\0'; p++)
259      {
260        if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
261            (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
262          break;
263        if ((i > 0) && (*p == '<'))
264          break;
265        if (i < (ssize_t) (extent-1))
266          token[i++]=(*p);
267        if (*p == '>')
268          break;
269        if (*p == '(')
270          for (p++; *p != '\0'; p++)
271          {
272            if (i < (ssize_t) (extent-1))
273              token[i++]=(*p);
274            if ((*p == ')') && (*(p-1) != '\\'))
275              break;
276          }
277      }
278      break;
279    }
280  }
281  token[i]='\0';
282  if (LocaleNCompare(token,"url(",4) == 0)
283    {
284      ssize_t
285        offset;
286
287      offset=4;
288      if (token[offset] == '#')
289        offset++;
290      i=(ssize_t) strlen(token);
291      (void) CopyMagickString(token,token+offset,MagickPathExtent);
292      token[i-offset-1]='\0';
293    }
294  while (isspace((int) ((unsigned char) *p)) != 0)
295    p++;
296  if (end != (const char **) NULL)
297    *end=(const char *) p;
298}
299
300/*
301%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
302%                                                                             %
303%                                                                             %
304%                                                                             %
305%   G l o b E x p r e s s i o n                                               %
306%                                                                             %
307%                                                                             %
308%                                                                             %
309%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
310%
311%  GlobExpression() returns MagickTrue if the expression matches the pattern.
312%
313%  The format of the GlobExpression function is:
314%
315%      MagickBooleanType GlobExpression(const char *expression,
316%        const char *pattern,const MagickBooleanType case_insensitive)
317%
318%  A description of each parameter follows:
319%
320%    o expression: Specifies a pointer to a text string containing a file name.
321%
322%    o pattern: Specifies a pointer to a text string containing a pattern.
323%
324%    o case_insensitive: set to MagickTrue to ignore the case when matching
325%      an expression.
326%
327*/
328MagickExport MagickBooleanType GlobExpression(const char *expression,
329  const char *pattern,const MagickBooleanType case_insensitive)
330{
331  MagickBooleanType
332    done,
333    match;
334
335  register const char
336    *p;
337
338  /*
339    Return on empty pattern or '*'.
340  */
341  if (pattern == (char *) NULL)
342    return(MagickTrue);
343  if (GetUTFCode(pattern) == 0)
344    return(MagickTrue);
345  if (LocaleCompare(pattern,"*") == 0)
346    return(MagickTrue);
347  p=pattern+strlen(pattern)-1;
348  if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
349    {
350      ExceptionInfo
351        *exception;
352
353      ImageInfo
354        *image_info;
355
356      /*
357        Determine if pattern is a scene, i.e. img0001.pcd[2].
358      */
359      image_info=AcquireImageInfo();
360      (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
361      exception=AcquireExceptionInfo();
362      (void) SetImageInfo(image_info,0,exception);
363      exception=DestroyExceptionInfo(exception);
364      if (LocaleCompare(image_info->filename,pattern) != 0)
365        {
366          image_info=DestroyImageInfo(image_info);
367          return(MagickFalse);
368        }
369      image_info=DestroyImageInfo(image_info);
370    }
371  /*
372    Evaluate glob expression.
373  */
374  done=MagickFalse;
375  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
376  {
377    if (GetUTFCode(expression) == 0)
378      if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
379        break;
380    switch (GetUTFCode(pattern))
381    {
382      case '*':
383      {
384        MagickBooleanType
385          status;
386
387        status=MagickFalse;
388        pattern+=GetUTFOctets(pattern);
389        while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
390        {
391          status=GlobExpression(expression,pattern,case_insensitive);
392          expression+=GetUTFOctets(expression);
393        }
394        if (status != MagickFalse)
395          {
396            while (GetUTFCode(expression) != 0)
397              expression+=GetUTFOctets(expression);
398            while (GetUTFCode(pattern) != 0)
399              pattern+=GetUTFOctets(pattern);
400          }
401        break;
402      }
403      case '[':
404      {
405        int
406          c;
407
408        pattern+=GetUTFOctets(pattern);
409        for ( ; ; )
410        {
411          if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
412            {
413              done=MagickTrue;
414              break;
415            }
416          if (GetUTFCode(pattern) == '\\')
417            {
418              pattern+=GetUTFOctets(pattern);
419              if (GetUTFCode(pattern) == 0)
420                {
421                  done=MagickTrue;
422                  break;
423                }
424             }
425          if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
426            {
427              c=GetUTFCode(pattern);
428              pattern+=GetUTFOctets(pattern);
429              pattern+=GetUTFOctets(pattern);
430              if (GetUTFCode(pattern) == ']')
431                {
432                  done=MagickTrue;
433                  break;
434                }
435              if (GetUTFCode(pattern) == '\\')
436                {
437                  pattern+=GetUTFOctets(pattern);
438                  if (GetUTFCode(pattern) == 0)
439                    {
440                      done=MagickTrue;
441                      break;
442                    }
443                }
444              if ((GetUTFCode(expression) < c) ||
445                  (GetUTFCode(expression) > GetUTFCode(pattern)))
446                {
447                  pattern+=GetUTFOctets(pattern);
448                  continue;
449                }
450            }
451          else
452            if (GetUTFCode(pattern) != GetUTFCode(expression))
453              {
454                pattern+=GetUTFOctets(pattern);
455                continue;
456              }
457          pattern+=GetUTFOctets(pattern);
458          while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
459          {
460            if ((GetUTFCode(pattern) == '\\') &&
461                (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
462              pattern+=GetUTFOctets(pattern);
463            pattern+=GetUTFOctets(pattern);
464          }
465          if (GetUTFCode(pattern) != 0)
466            {
467              pattern+=GetUTFOctets(pattern);
468              expression+=GetUTFOctets(expression);
469            }
470          break;
471        }
472        break;
473      }
474      case '?':
475      {
476        pattern+=GetUTFOctets(pattern);
477        expression+=GetUTFOctets(expression);
478        break;
479      }
480      case '{':
481      {
482        pattern+=GetUTFOctets(pattern);
483        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
484        {
485          p=expression;
486          match=MagickTrue;
487          while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
488                 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
489                 (match != MagickFalse))
490          {
491            if (GetUTFCode(pattern) == '\\')
492              pattern+=GetUTFOctets(pattern);
493            match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
494              MagickFalse;
495            p+=GetUTFOctets(p);
496            pattern+=GetUTFOctets(pattern);
497          }
498          if (GetUTFCode(pattern) == 0)
499            {
500              match=MagickFalse;
501              done=MagickTrue;
502              break;
503            }
504          else
505            if (match != MagickFalse)
506              {
507                expression=p;
508                while ((GetUTFCode(pattern) != '}') &&
509                       (GetUTFCode(pattern) != 0))
510                {
511                  pattern+=GetUTFOctets(pattern);
512                  if (GetUTFCode(pattern) == '\\')
513                    {
514                      pattern+=GetUTFOctets(pattern);
515                      if (GetUTFCode(pattern) == '}')
516                        pattern+=GetUTFOctets(pattern);
517                    }
518                }
519              }
520            else
521              {
522                while ((GetUTFCode(pattern) != '}') &&
523                       (GetUTFCode(pattern) != ',') &&
524                       (GetUTFCode(pattern) != 0))
525                {
526                  pattern+=GetUTFOctets(pattern);
527                  if (GetUTFCode(pattern) == '\\')
528                    {
529                      pattern+=GetUTFOctets(pattern);
530                      if ((GetUTFCode(pattern) == '}') ||
531                          (GetUTFCode(pattern) == ','))
532                        pattern+=GetUTFOctets(pattern);
533                    }
534                }
535              }
536            if (GetUTFCode(pattern) != 0)
537              pattern+=GetUTFOctets(pattern);
538          }
539        break;
540      }
541      case '\\':
542      {
543        pattern+=GetUTFOctets(pattern);
544        if (GetUTFCode(pattern) == 0)
545          break;
546      }
547      default:
548      {
549        if (case_insensitive != MagickFalse)
550          {
551            if (tolower((int) GetUTFCode(expression)) !=
552                tolower((int) GetUTFCode(pattern)))
553              {
554                done=MagickTrue;
555                break;
556              }
557          }
558        else
559          if (GetUTFCode(expression) != GetUTFCode(pattern))
560            {
561              done=MagickTrue;
562              break;
563            }
564        expression+=GetUTFOctets(expression);
565        pattern+=GetUTFOctets(pattern);
566      }
567    }
568  }
569  while (GetUTFCode(pattern) == '*')
570    pattern+=GetUTFOctets(pattern);
571  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
572    MagickTrue : MagickFalse;
573  return(match);
574}
575
576/*
577%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
578%                                                                             %
579%                                                                             %
580%                                                                             %
581+     I s G l o b                                                             %
582%                                                                             %
583%                                                                             %
584%                                                                             %
585%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
586%
587%  IsGlob() returns MagickTrue if the path specification contains a globbing
588%  pattern.
589%
590%  The format of the IsGlob method is:
591%
592%      MagickBooleanType IsGlob(const char *geometry)
593%
594%  A description of each parameter follows:
595%
596%    o path: the path.
597%
598*/
599MagickPrivate MagickBooleanType IsGlob(const char *path)
600{
601  MagickBooleanType
602    status = MagickFalse;
603
604  register const char
605    *p;
606
607  if (IsPathAccessible(path) != MagickFalse)
608    return(MagickFalse);
609  for (p=path; *p != '\0'; p++)
610  {
611    switch (*p)
612    {
613      case '*':
614      case '?':
615      case '{':
616      case '}':
617      case '[':
618      case ']':
619      {
620        status=MagickTrue;
621        break;
622      }
623      default:
624        break;
625    }
626  }
627  return(status);
628}
629
630/*
631%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
632%                                                                             %
633%                                                                             %
634%                                                                             %
635%   T o k e n i z e r                                                         %
636%                                                                             %
637%                                                                             %
638%                                                                             %
639%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
640%
641%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
642%  one at a time from a string of characters.  The characters used for white
643%  space, for break characters, and for quotes can be specified.  Also,
644%  characters in the string can be preceded by a specifiable escape character
645%  which removes any special meaning the character may have.
646%
647%  Here is some terminology:
648%
649%    o token: A single unit of information in the form of a group of
650%      characters.
651%
652%    o white space: Apace that gets ignored (except within quotes or when
653%      escaped), like blanks and tabs. in addition, white space terminates a
654%      non-quoted token.
655%
656%    o break set: One or more characters that separates non-quoted tokens.
657%      Commas are a common break character. The usage of break characters to
658%      signal the end of a token is the same as that of white space, except
659%      multiple break characters with nothing or only white space between
660%      generate a null token for each two break characters together.
661%
662%      For example, if blank is set to be the white space and comma is set to
663%      be the break character, the line
664%
665%        A, B, C ,  , DEF
666%
667%        ... consists of 5 tokens:
668%
669%        1)  "A"
670%        2)  "B"
671%        3)  "C"
672%        4)  "" (the null string)
673%        5)  "DEF"
674%
675%    o Quote character: A character that, when surrounding a group of other
676%      characters, causes the group of characters to be treated as a single
677%      token, no matter how many white spaces or break characters exist in
678%      the group. Also, a token always terminates after the closing quote.
679%      For example, if ' is the quote character, blank is white space, and
680%      comma is the break character, the following string
681%
682%        A, ' B, CD'EF GHI
683%
684%        ... consists of 4 tokens:
685%
686%        1)  "A"
687%        2)  " B, CD" (note the blanks & comma)
688%        3)  "EF"
689%        4)  "GHI"
690%
691%      The quote characters themselves do not appear in the resultant
692%      tokens.  The double quotes are delimiters i use here for
693%      documentation purposes only.
694%
695%    o Escape character: A character which itself is ignored but which
696%      causes the next character to be used as is.  ^ and \ are often used
697%      as escape characters. An escape in the last position of the string
698%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
699%      and non-escape) character. For example, assume white space, break
700%      character, and quote are the same as in the above examples, and
701%      further, assume that ^ is the escape character. Then, in the string
702%
703%        ABC, ' DEF ^' GH' I ^ J K^ L ^
704%
705%        ... there are 7 tokens:
706%
707%        1)  "ABC"
708%        2)  " DEF ' GH"
709%        3)  "I"
710%        4)  " "     (a lone blank)
711%        5)  "J"
712%        6)  "K L"
713%        7)  "^"     (passed as is at end of line)
714%
715%  The format of the Tokenizer method is:
716%
717%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
718%        const size_t max_token_length,const char *line,const char *white,
719%        const char *break_set,const char *quote,const char escape,
720%        char *breaker,int *next,char *quoted)
721%
722%  A description of each parameter follows:
723%
724%    o flag: right now, only the low order 3 bits are used.
725%
726%        1 => convert non-quoted tokens to upper case
727%        2 => convert non-quoted tokens to lower case
728%        0 => do not convert non-quoted tokens
729%
730%    o token: a character string containing the returned next token
731%
732%    o max_token_length: the maximum size of "token".  Characters beyond
733%      "max_token_length" are truncated.
734%
735%    o string: the string to be parsed.
736%
737%    o white: a string of the valid white spaces.  example:
738%
739%        char whitesp[]={" \t"};
740%
741%      blank and tab will be valid white space.
742%
743%    o break: a string of the valid break characters. example:
744%
745%        char breakch[]={";,"};
746%
747%      semicolon and comma will be valid break characters.
748%
749%    o quote: a string of the valid quote characters. An example would be
750%
751%        char whitesp[]={"'\"");
752%
753%      (this causes single and double quotes to be valid) Note that a
754%      token starting with one of these characters needs the same quote
755%      character to terminate it.
756%
757%      for example:
758%
759%        "ABC '
760%
761%      is unterminated, but
762%
763%        "DEF" and 'GHI'
764%
765%      are properly terminated.  Note that different quote characters
766%      can appear on the same line; only for a given token do the quote
767%      characters have to be the same.
768%
769%    o escape: the escape character (NOT a string ... only one
770%      allowed). Use zero if none is desired.
771%
772%    o breaker: the break character used to terminate the current
773%      token.  If the token was quoted, this will be the quote used.  If
774%      the token is the last one on the line, this will be zero.
775%
776%    o next: this variable points to the first character of the
777%      next token.  it gets reset by "tokenizer" as it steps through the
778%      string.  Set it to 0 upon initialization, and leave it alone
779%      after that.  You can change it if you want to jump around in the
780%      string or re-parse from the beginning, but be careful.
781%
782%    o quoted: set to True if the token was quoted and MagickFalse
783%      if not.  You may need this information (for example:  in C, a
784%      string with quotes around it is a character string, while one
785%      without is an identifier).
786%
787%    o result: 0 if we haven't reached EOS (end of string), and 1
788%      if we have.
789%
790*/
791
792#define IN_WHITE 0
793#define IN_TOKEN 1
794#define IN_QUOTE 2
795#define IN_OZONE 3
796
797static ssize_t sindex(int c,const char *string)
798{
799  register const char
800    *p;
801
802  for (p=string; *p != '\0'; p++)
803    if (c == (int) (*p))
804      return((ssize_t) (p-string));
805  return(-1);
806}
807
808static void StoreToken(TokenInfo *token_info,char *string,
809  size_t max_token_length,int c)
810{
811  register ssize_t
812    i;
813
814  if ((token_info->offset < 0) ||
815      ((size_t) token_info->offset >= (max_token_length-1)))
816    return;
817  i=token_info->offset++;
818  string[i]=(char) c;
819  if (token_info->state == IN_QUOTE)
820    return;
821  switch (token_info->flag & 0x03)
822  {
823    case 1:
824    {
825      string[i]=(char) toupper(c);
826      break;
827    }
828    case 2:
829    {
830      string[i]=(char) tolower(c);
831      break;
832    }
833    default:
834      break;
835  }
836}
837
838MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
839  char *token,const size_t max_token_length,const char *line,const char *white,
840  const char *break_set,const char *quote,const char escape,char *breaker,
841  int *next,char *quoted)
842{
843  int
844    c;
845
846  register ssize_t
847    i;
848
849  *breaker='\0';
850  *quoted='\0';
851  if (line[*next] == '\0')
852    return(1);
853  token_info->state=IN_WHITE;
854  token_info->quote=(char) MagickFalse;
855  token_info->flag=flag;
856  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
857  {
858    c=(int) line[*next];
859    i=sindex(c,break_set);
860    if (i >= 0)
861      {
862        switch (token_info->state)
863        {
864          case IN_WHITE:
865          case IN_TOKEN:
866          case IN_OZONE:
867          {
868            (*next)++;
869            *breaker=break_set[i];
870            token[token_info->offset]='\0';
871            return(0);
872          }
873          case IN_QUOTE:
874          {
875            StoreToken(token_info,token,max_token_length,c);
876            break;
877          }
878        }
879        continue;
880      }
881    i=sindex(c,quote);
882    if (i >= 0)
883      {
884        switch (token_info->state)
885        {
886          case IN_WHITE:
887          {
888            token_info->state=IN_QUOTE;
889            token_info->quote=quote[i];
890            *quoted=(char) MagickTrue;
891            break;
892          }
893          case IN_QUOTE:
894          {
895            if (quote[i] != token_info->quote)
896              StoreToken(token_info,token,max_token_length,c);
897            else
898              {
899                token_info->state=IN_OZONE;
900                token_info->quote='\0';
901              }
902            break;
903          }
904          case IN_TOKEN:
905          case IN_OZONE:
906          {
907            *breaker=(char) c;
908            token[token_info->offset]='\0';
909            return(0);
910          }
911        }
912        continue;
913      }
914    i=sindex(c,white);
915    if (i >= 0)
916      {
917        switch (token_info->state)
918        {
919          case IN_WHITE:
920          case IN_OZONE:
921            break;
922          case IN_TOKEN:
923          {
924            token_info->state=IN_OZONE;
925            break;
926          }
927          case IN_QUOTE:
928          {
929            StoreToken(token_info,token,max_token_length,c);
930            break;
931          }
932        }
933        continue;
934      }
935    if (c == (int) escape)
936      {
937        if (line[(*next)+1] == '\0')
938          {
939            *breaker='\0';
940            StoreToken(token_info,token,max_token_length,c);
941            (*next)++;
942            token[token_info->offset]='\0';
943            return(0);
944          }
945        switch (token_info->state)
946        {
947          case IN_WHITE:
948          {
949            (*next)--;
950            token_info->state=IN_TOKEN;
951            break;
952          }
953          case IN_TOKEN:
954          case IN_QUOTE:
955          {
956            (*next)++;
957            c=(int) line[*next];
958            StoreToken(token_info,token,max_token_length,c);
959            break;
960          }
961          case IN_OZONE:
962          {
963            token[token_info->offset]='\0';
964            return(0);
965          }
966        }
967        continue;
968      }
969    switch (token_info->state)
970    {
971      case IN_WHITE:
972      {
973        token_info->state=IN_TOKEN;
974        StoreToken(token_info,token,max_token_length,c);
975        break;
976      }
977      case IN_TOKEN:
978      case IN_QUOTE:
979      {
980        StoreToken(token_info,token,max_token_length,c);
981        break;
982      }
983      case IN_OZONE:
984      {
985        token[token_info->offset]='\0';
986        return(0);
987      }
988    }
989  }
990  token[token_info->offset]='\0';
991  return(0);
992}
993