1/* 2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3% % 4% % 5% % 6% TTTTT OOO K K EEEEE N N % 7% T O O K K E NN N % 8% T O O KKK EEE N N N % 9% T O O K K E N NN % 10% T OOO K K EEEEE N N % 11% % 12% % 13% MagickCore Token Methods % 14% % 15% Software Design % 16% Cristy % 17% January 1993 % 18% % 19% % 20% Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization % 21% dedicated to making software imaging solutions freely available. % 22% % 23% You may not use this file except in compliance with the License. You may % 24% obtain a copy of the License at % 25% % 26% http://www.imagemagick.org/script/license.php % 27% % 28% Unless required by applicable law or agreed to in writing, software % 29% distributed under the License is distributed on an "AS IS" BASIS, % 30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. % 31% See the License for the specific language governing permissions and % 32% limitations under the License. % 33% % 34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35% 36% 37% 38*/ 39 40/* 41 Include declarations. 42*/ 43#include "MagickCore/studio.h" 44#include "MagickCore/exception.h" 45#include "MagickCore/exception-private.h" 46#include "MagickCore/image.h" 47#include "MagickCore/memory_.h" 48#include "MagickCore/string_.h" 49#include "MagickCore/string-private.h" 50#include "MagickCore/token.h" 51#include "MagickCore/token-private.h" 52#include "MagickCore/utility.h" 53#include "MagickCore/utility-private.h" 54 55/* 56 Typedef declaractions. 57*/ 58struct _TokenInfo 59{ 60 int 61 state; 62 63 MagickStatusType 64 flag; 65 66 ssize_t 67 offset; 68 69 char 70 quote; 71 72 size_t 73 signature; 74}; 75 76/* 77%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 78% % 79% % 80% % 81% A c q u i r e T o k e n I n f o % 82% % 83% % 84% % 85%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 86% 87% AcquireTokenInfo() allocates the TokenInfo structure. 88% 89% The format of the AcquireTokenInfo method is: 90% 91% TokenInfo *AcquireTokenInfo() 92% 93*/ 94MagickExport TokenInfo *AcquireTokenInfo(void) 95{ 96 TokenInfo 97 *token_info; 98 99 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info)); 100 if (token_info == (TokenInfo *) NULL) 101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed"); 102 token_info->signature=MagickCoreSignature; 103 return(token_info); 104} 105 106/* 107%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 108% % 109% % 110% % 111% D e s t r o y T o k e n I n f o % 112% % 113% % 114% % 115%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 116% 117% DestroyTokenInfo() deallocates memory associated with an TokenInfo 118% structure. 119% 120% The format of the DestroyTokenInfo method is: 121% 122% TokenInfo *DestroyTokenInfo(TokenInfo *token_info) 123% 124% A description of each parameter follows: 125% 126% o token_info: Specifies a pointer to an TokenInfo structure. 127% 128*/ 129MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info) 130{ 131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"..."); 132 assert(token_info != (TokenInfo *) NULL); 133 assert(token_info->signature == MagickCoreSignature); 134 token_info->signature=(~MagickCoreSignature); 135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info); 136 return(token_info); 137} 138 139/* 140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 141% % 142% % 143% % 144+ G e t N e x t T o k e n % 145% % 146% % 147% % 148%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 149% 150% GetNextToken() gets a token from the token stream. A token is defined as 151% a sequence of characters delimited by whitespace (e.g. clip-path), a 152% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in 153% parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these 154% separator characters: ':', '=', ',', and ';'. 155% 156% The format of the GetNextToken method is: 157% 158% void GetNextToken(const char *start,const char **end, 159% const size_t extent,char *token) 160% 161% A description of each parameter follows: 162% 163% o start: the start of the token sequence. 164% 165% o end: point to the end of the token sequence. 166% 167% o extent: maximum extent of the token. 168% 169% o token: copy the token to this buffer. 170% 171*/ 172MagickExport void GetNextToken(const char *start,const char **end, 173 const size_t extent,char *token) 174{ 175 double 176 value; 177 178 register const char 179 *p; 180 181 register ssize_t 182 i; 183 184 assert(start != (const char *) NULL); 185 assert(token != (char *) NULL); 186 i=0; 187 p=start; 188 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0')) 189 p++; 190 switch (*p) 191 { 192 case '\0': 193 break; 194 case '"': 195 case '\'': 196 case '`': 197 case '{': 198 { 199 register char 200 escape; 201 202 switch (*p) 203 { 204 case '"': escape='"'; break; 205 case '\'': escape='\''; break; 206 case '`': escape='\''; break; 207 case '{': escape='}'; break; 208 default: escape=(*p); break; 209 } 210 for (p++; *p != '\0'; p++) 211 { 212 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\'))) 213 p++; 214 else 215 if (*p == escape) 216 { 217 p++; 218 break; 219 } 220 if (i < (ssize_t) (extent-1)) 221 token[i++]=(*p); 222 } 223 break; 224 } 225 case '/': 226 { 227 if (i < (ssize_t) (extent-1)) 228 token[i++]=(*p++); 229 if ((*p == '>') || (*p == '/')) 230 if (i < (ssize_t) (extent-1)) 231 token[i++]=(*p++); 232 break; 233 } 234 default: 235 { 236 char 237 *q; 238 239 value=StringToDouble(p,&q); 240 (void) value; 241 if ((p != q) && (*p != ',')) 242 { 243 for ( ; (p < q) && (*p != ','); p++) 244 if (i < (ssize_t) (extent-1)) 245 token[i++]=(*p); 246 if (*p == '%') 247 if (i < (ssize_t) (extent-1)) 248 token[i++]=(*p++); 249 break; 250 } 251 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) && 252 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<')) 253 { 254 if (i < (ssize_t) (extent-1)) 255 token[i++]=(*p++); 256 break; 257 } 258 for ( ; *p != '\0'; p++) 259 { 260 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') || 261 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\')) 262 break; 263 if ((i > 0) && (*p == '<')) 264 break; 265 if (i < (ssize_t) (extent-1)) 266 token[i++]=(*p); 267 if (*p == '>') 268 break; 269 if (*p == '(') 270 for (p++; *p != '\0'; p++) 271 { 272 if (i < (ssize_t) (extent-1)) 273 token[i++]=(*p); 274 if ((*p == ')') && (*(p-1) != '\\')) 275 break; 276 } 277 } 278 break; 279 } 280 } 281 token[i]='\0'; 282 if (LocaleNCompare(token,"url(",4) == 0) 283 { 284 ssize_t 285 offset; 286 287 offset=4; 288 if (token[offset] == '#') 289 offset++; 290 i=(ssize_t) strlen(token); 291 (void) CopyMagickString(token,token+offset,MagickPathExtent); 292 token[i-offset-1]='\0'; 293 } 294 while (isspace((int) ((unsigned char) *p)) != 0) 295 p++; 296 if (end != (const char **) NULL) 297 *end=(const char *) p; 298} 299 300/* 301%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 302% % 303% % 304% % 305% G l o b E x p r e s s i o n % 306% % 307% % 308% % 309%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 310% 311% GlobExpression() returns MagickTrue if the expression matches the pattern. 312% 313% The format of the GlobExpression function is: 314% 315% MagickBooleanType GlobExpression(const char *expression, 316% const char *pattern,const MagickBooleanType case_insensitive) 317% 318% A description of each parameter follows: 319% 320% o expression: Specifies a pointer to a text string containing a file name. 321% 322% o pattern: Specifies a pointer to a text string containing a pattern. 323% 324% o case_insensitive: set to MagickTrue to ignore the case when matching 325% an expression. 326% 327*/ 328MagickExport MagickBooleanType GlobExpression(const char *expression, 329 const char *pattern,const MagickBooleanType case_insensitive) 330{ 331 MagickBooleanType 332 done, 333 match; 334 335 register const char 336 *p; 337 338 /* 339 Return on empty pattern or '*'. 340 */ 341 if (pattern == (char *) NULL) 342 return(MagickTrue); 343 if (GetUTFCode(pattern) == 0) 344 return(MagickTrue); 345 if (LocaleCompare(pattern,"*") == 0) 346 return(MagickTrue); 347 p=pattern+strlen(pattern)-1; 348 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL)) 349 { 350 ExceptionInfo 351 *exception; 352 353 ImageInfo 354 *image_info; 355 356 /* 357 Determine if pattern is a scene, i.e. img0001.pcd[2]. 358 */ 359 image_info=AcquireImageInfo(); 360 (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent); 361 exception=AcquireExceptionInfo(); 362 (void) SetImageInfo(image_info,0,exception); 363 exception=DestroyExceptionInfo(exception); 364 if (LocaleCompare(image_info->filename,pattern) != 0) 365 { 366 image_info=DestroyImageInfo(image_info); 367 return(MagickFalse); 368 } 369 image_info=DestroyImageInfo(image_info); 370 } 371 /* 372 Evaluate glob expression. 373 */ 374 done=MagickFalse; 375 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse)) 376 { 377 if (GetUTFCode(expression) == 0) 378 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*')) 379 break; 380 switch (GetUTFCode(pattern)) 381 { 382 case '*': 383 { 384 MagickBooleanType 385 status; 386 387 status=MagickFalse; 388 pattern+=GetUTFOctets(pattern); 389 while ((GetUTFCode(expression) != 0) && (status == MagickFalse)) 390 { 391 status=GlobExpression(expression,pattern,case_insensitive); 392 expression+=GetUTFOctets(expression); 393 } 394 if (status != MagickFalse) 395 { 396 while (GetUTFCode(expression) != 0) 397 expression+=GetUTFOctets(expression); 398 while (GetUTFCode(pattern) != 0) 399 pattern+=GetUTFOctets(pattern); 400 } 401 break; 402 } 403 case '[': 404 { 405 int 406 c; 407 408 pattern+=GetUTFOctets(pattern); 409 for ( ; ; ) 410 { 411 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']')) 412 { 413 done=MagickTrue; 414 break; 415 } 416 if (GetUTFCode(pattern) == '\\') 417 { 418 pattern+=GetUTFOctets(pattern); 419 if (GetUTFCode(pattern) == 0) 420 { 421 done=MagickTrue; 422 break; 423 } 424 } 425 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-') 426 { 427 c=GetUTFCode(pattern); 428 pattern+=GetUTFOctets(pattern); 429 pattern+=GetUTFOctets(pattern); 430 if (GetUTFCode(pattern) == ']') 431 { 432 done=MagickTrue; 433 break; 434 } 435 if (GetUTFCode(pattern) == '\\') 436 { 437 pattern+=GetUTFOctets(pattern); 438 if (GetUTFCode(pattern) == 0) 439 { 440 done=MagickTrue; 441 break; 442 } 443 } 444 if ((GetUTFCode(expression) < c) || 445 (GetUTFCode(expression) > GetUTFCode(pattern))) 446 { 447 pattern+=GetUTFOctets(pattern); 448 continue; 449 } 450 } 451 else 452 if (GetUTFCode(pattern) != GetUTFCode(expression)) 453 { 454 pattern+=GetUTFOctets(pattern); 455 continue; 456 } 457 pattern+=GetUTFOctets(pattern); 458 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0)) 459 { 460 if ((GetUTFCode(pattern) == '\\') && 461 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0)) 462 pattern+=GetUTFOctets(pattern); 463 pattern+=GetUTFOctets(pattern); 464 } 465 if (GetUTFCode(pattern) != 0) 466 { 467 pattern+=GetUTFOctets(pattern); 468 expression+=GetUTFOctets(expression); 469 } 470 break; 471 } 472 break; 473 } 474 case '?': 475 { 476 pattern+=GetUTFOctets(pattern); 477 expression+=GetUTFOctets(expression); 478 break; 479 } 480 case '{': 481 { 482 pattern+=GetUTFOctets(pattern); 483 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0)) 484 { 485 p=expression; 486 match=MagickTrue; 487 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) && 488 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') && 489 (match != MagickFalse)) 490 { 491 if (GetUTFCode(pattern) == '\\') 492 pattern+=GetUTFOctets(pattern); 493 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue : 494 MagickFalse; 495 p+=GetUTFOctets(p); 496 pattern+=GetUTFOctets(pattern); 497 } 498 if (GetUTFCode(pattern) == 0) 499 { 500 match=MagickFalse; 501 done=MagickTrue; 502 break; 503 } 504 else 505 if (match != MagickFalse) 506 { 507 expression=p; 508 while ((GetUTFCode(pattern) != '}') && 509 (GetUTFCode(pattern) != 0)) 510 { 511 pattern+=GetUTFOctets(pattern); 512 if (GetUTFCode(pattern) == '\\') 513 { 514 pattern+=GetUTFOctets(pattern); 515 if (GetUTFCode(pattern) == '}') 516 pattern+=GetUTFOctets(pattern); 517 } 518 } 519 } 520 else 521 { 522 while ((GetUTFCode(pattern) != '}') && 523 (GetUTFCode(pattern) != ',') && 524 (GetUTFCode(pattern) != 0)) 525 { 526 pattern+=GetUTFOctets(pattern); 527 if (GetUTFCode(pattern) == '\\') 528 { 529 pattern+=GetUTFOctets(pattern); 530 if ((GetUTFCode(pattern) == '}') || 531 (GetUTFCode(pattern) == ',')) 532 pattern+=GetUTFOctets(pattern); 533 } 534 } 535 } 536 if (GetUTFCode(pattern) != 0) 537 pattern+=GetUTFOctets(pattern); 538 } 539 break; 540 } 541 case '\\': 542 { 543 pattern+=GetUTFOctets(pattern); 544 if (GetUTFCode(pattern) == 0) 545 break; 546 } 547 default: 548 { 549 if (case_insensitive != MagickFalse) 550 { 551 if (tolower((int) GetUTFCode(expression)) != 552 tolower((int) GetUTFCode(pattern))) 553 { 554 done=MagickTrue; 555 break; 556 } 557 } 558 else 559 if (GetUTFCode(expression) != GetUTFCode(pattern)) 560 { 561 done=MagickTrue; 562 break; 563 } 564 expression+=GetUTFOctets(expression); 565 pattern+=GetUTFOctets(pattern); 566 } 567 } 568 } 569 while (GetUTFCode(pattern) == '*') 570 pattern+=GetUTFOctets(pattern); 571 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ? 572 MagickTrue : MagickFalse; 573 return(match); 574} 575 576/* 577%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 578% % 579% % 580% % 581+ I s G l o b % 582% % 583% % 584% % 585%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 586% 587% IsGlob() returns MagickTrue if the path specification contains a globbing 588% pattern. 589% 590% The format of the IsGlob method is: 591% 592% MagickBooleanType IsGlob(const char *geometry) 593% 594% A description of each parameter follows: 595% 596% o path: the path. 597% 598*/ 599MagickPrivate MagickBooleanType IsGlob(const char *path) 600{ 601 MagickBooleanType 602 status = MagickFalse; 603 604 register const char 605 *p; 606 607 if (IsPathAccessible(path) != MagickFalse) 608 return(MagickFalse); 609 for (p=path; *p != '\0'; p++) 610 { 611 switch (*p) 612 { 613 case '*': 614 case '?': 615 case '{': 616 case '}': 617 case '[': 618 case ']': 619 { 620 status=MagickTrue; 621 break; 622 } 623 default: 624 break; 625 } 626 } 627 return(status); 628} 629 630/* 631%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 632% % 633% % 634% % 635% T o k e n i z e r % 636% % 637% % 638% % 639%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 640% 641% Tokenizer() is a generalized, finite state token parser. It extracts tokens 642% one at a time from a string of characters. The characters used for white 643% space, for break characters, and for quotes can be specified. Also, 644% characters in the string can be preceded by a specifiable escape character 645% which removes any special meaning the character may have. 646% 647% Here is some terminology: 648% 649% o token: A single unit of information in the form of a group of 650% characters. 651% 652% o white space: Apace that gets ignored (except within quotes or when 653% escaped), like blanks and tabs. in addition, white space terminates a 654% non-quoted token. 655% 656% o break set: One or more characters that separates non-quoted tokens. 657% Commas are a common break character. The usage of break characters to 658% signal the end of a token is the same as that of white space, except 659% multiple break characters with nothing or only white space between 660% generate a null token for each two break characters together. 661% 662% For example, if blank is set to be the white space and comma is set to 663% be the break character, the line 664% 665% A, B, C , , DEF 666% 667% ... consists of 5 tokens: 668% 669% 1) "A" 670% 2) "B" 671% 3) "C" 672% 4) "" (the null string) 673% 5) "DEF" 674% 675% o Quote character: A character that, when surrounding a group of other 676% characters, causes the group of characters to be treated as a single 677% token, no matter how many white spaces or break characters exist in 678% the group. Also, a token always terminates after the closing quote. 679% For example, if ' is the quote character, blank is white space, and 680% comma is the break character, the following string 681% 682% A, ' B, CD'EF GHI 683% 684% ... consists of 4 tokens: 685% 686% 1) "A" 687% 2) " B, CD" (note the blanks & comma) 688% 3) "EF" 689% 4) "GHI" 690% 691% The quote characters themselves do not appear in the resultant 692% tokens. The double quotes are delimiters i use here for 693% documentation purposes only. 694% 695% o Escape character: A character which itself is ignored but which 696% causes the next character to be used as is. ^ and \ are often used 697% as escape characters. An escape in the last position of the string 698% gets treated as a "normal" (i.e., non-quote, non-white, non-break, 699% and non-escape) character. For example, assume white space, break 700% character, and quote are the same as in the above examples, and 701% further, assume that ^ is the escape character. Then, in the string 702% 703% ABC, ' DEF ^' GH' I ^ J K^ L ^ 704% 705% ... there are 7 tokens: 706% 707% 1) "ABC" 708% 2) " DEF ' GH" 709% 3) "I" 710% 4) " " (a lone blank) 711% 5) "J" 712% 6) "K L" 713% 7) "^" (passed as is at end of line) 714% 715% The format of the Tokenizer method is: 716% 717% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token, 718% const size_t max_token_length,const char *line,const char *white, 719% const char *break_set,const char *quote,const char escape, 720% char *breaker,int *next,char *quoted) 721% 722% A description of each parameter follows: 723% 724% o flag: right now, only the low order 3 bits are used. 725% 726% 1 => convert non-quoted tokens to upper case 727% 2 => convert non-quoted tokens to lower case 728% 0 => do not convert non-quoted tokens 729% 730% o token: a character string containing the returned next token 731% 732% o max_token_length: the maximum size of "token". Characters beyond 733% "max_token_length" are truncated. 734% 735% o string: the string to be parsed. 736% 737% o white: a string of the valid white spaces. example: 738% 739% char whitesp[]={" \t"}; 740% 741% blank and tab will be valid white space. 742% 743% o break: a string of the valid break characters. example: 744% 745% char breakch[]={";,"}; 746% 747% semicolon and comma will be valid break characters. 748% 749% o quote: a string of the valid quote characters. An example would be 750% 751% char whitesp[]={"'\""); 752% 753% (this causes single and double quotes to be valid) Note that a 754% token starting with one of these characters needs the same quote 755% character to terminate it. 756% 757% for example: 758% 759% "ABC ' 760% 761% is unterminated, but 762% 763% "DEF" and 'GHI' 764% 765% are properly terminated. Note that different quote characters 766% can appear on the same line; only for a given token do the quote 767% characters have to be the same. 768% 769% o escape: the escape character (NOT a string ... only one 770% allowed). Use zero if none is desired. 771% 772% o breaker: the break character used to terminate the current 773% token. If the token was quoted, this will be the quote used. If 774% the token is the last one on the line, this will be zero. 775% 776% o next: this variable points to the first character of the 777% next token. it gets reset by "tokenizer" as it steps through the 778% string. Set it to 0 upon initialization, and leave it alone 779% after that. You can change it if you want to jump around in the 780% string or re-parse from the beginning, but be careful. 781% 782% o quoted: set to True if the token was quoted and MagickFalse 783% if not. You may need this information (for example: in C, a 784% string with quotes around it is a character string, while one 785% without is an identifier). 786% 787% o result: 0 if we haven't reached EOS (end of string), and 1 788% if we have. 789% 790*/ 791 792#define IN_WHITE 0 793#define IN_TOKEN 1 794#define IN_QUOTE 2 795#define IN_OZONE 3 796 797static ssize_t sindex(int c,const char *string) 798{ 799 register const char 800 *p; 801 802 for (p=string; *p != '\0'; p++) 803 if (c == (int) (*p)) 804 return((ssize_t) (p-string)); 805 return(-1); 806} 807 808static void StoreToken(TokenInfo *token_info,char *string, 809 size_t max_token_length,int c) 810{ 811 register ssize_t 812 i; 813 814 if ((token_info->offset < 0) || 815 ((size_t) token_info->offset >= (max_token_length-1))) 816 return; 817 i=token_info->offset++; 818 string[i]=(char) c; 819 if (token_info->state == IN_QUOTE) 820 return; 821 switch (token_info->flag & 0x03) 822 { 823 case 1: 824 { 825 string[i]=(char) toupper(c); 826 break; 827 } 828 case 2: 829 { 830 string[i]=(char) tolower(c); 831 break; 832 } 833 default: 834 break; 835 } 836} 837 838MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag, 839 char *token,const size_t max_token_length,const char *line,const char *white, 840 const char *break_set,const char *quote,const char escape,char *breaker, 841 int *next,char *quoted) 842{ 843 int 844 c; 845 846 register ssize_t 847 i; 848 849 *breaker='\0'; 850 *quoted='\0'; 851 if (line[*next] == '\0') 852 return(1); 853 token_info->state=IN_WHITE; 854 token_info->quote=(char) MagickFalse; 855 token_info->flag=flag; 856 for (token_info->offset=0; (int) line[*next] != 0; (*next)++) 857 { 858 c=(int) line[*next]; 859 i=sindex(c,break_set); 860 if (i >= 0) 861 { 862 switch (token_info->state) 863 { 864 case IN_WHITE: 865 case IN_TOKEN: 866 case IN_OZONE: 867 { 868 (*next)++; 869 *breaker=break_set[i]; 870 token[token_info->offset]='\0'; 871 return(0); 872 } 873 case IN_QUOTE: 874 { 875 StoreToken(token_info,token,max_token_length,c); 876 break; 877 } 878 } 879 continue; 880 } 881 i=sindex(c,quote); 882 if (i >= 0) 883 { 884 switch (token_info->state) 885 { 886 case IN_WHITE: 887 { 888 token_info->state=IN_QUOTE; 889 token_info->quote=quote[i]; 890 *quoted=(char) MagickTrue; 891 break; 892 } 893 case IN_QUOTE: 894 { 895 if (quote[i] != token_info->quote) 896 StoreToken(token_info,token,max_token_length,c); 897 else 898 { 899 token_info->state=IN_OZONE; 900 token_info->quote='\0'; 901 } 902 break; 903 } 904 case IN_TOKEN: 905 case IN_OZONE: 906 { 907 *breaker=(char) c; 908 token[token_info->offset]='\0'; 909 return(0); 910 } 911 } 912 continue; 913 } 914 i=sindex(c,white); 915 if (i >= 0) 916 { 917 switch (token_info->state) 918 { 919 case IN_WHITE: 920 case IN_OZONE: 921 break; 922 case IN_TOKEN: 923 { 924 token_info->state=IN_OZONE; 925 break; 926 } 927 case IN_QUOTE: 928 { 929 StoreToken(token_info,token,max_token_length,c); 930 break; 931 } 932 } 933 continue; 934 } 935 if (c == (int) escape) 936 { 937 if (line[(*next)+1] == '\0') 938 { 939 *breaker='\0'; 940 StoreToken(token_info,token,max_token_length,c); 941 (*next)++; 942 token[token_info->offset]='\0'; 943 return(0); 944 } 945 switch (token_info->state) 946 { 947 case IN_WHITE: 948 { 949 (*next)--; 950 token_info->state=IN_TOKEN; 951 break; 952 } 953 case IN_TOKEN: 954 case IN_QUOTE: 955 { 956 (*next)++; 957 c=(int) line[*next]; 958 StoreToken(token_info,token,max_token_length,c); 959 break; 960 } 961 case IN_OZONE: 962 { 963 token[token_info->offset]='\0'; 964 return(0); 965 } 966 } 967 continue; 968 } 969 switch (token_info->state) 970 { 971 case IN_WHITE: 972 { 973 token_info->state=IN_TOKEN; 974 StoreToken(token_info,token,max_token_length,c); 975 break; 976 } 977 case IN_TOKEN: 978 case IN_QUOTE: 979 { 980 StoreToken(token_info,token,max_token_length,c); 981 break; 982 } 983 case IN_OZONE: 984 { 985 token[token_info->offset]='\0'; 986 return(0); 987 } 988 } 989 } 990 token[token_info->offset]='\0'; 991 return(0); 992} 993