xmlregexp.c revision db68b74dc7ec531361a736de7769a3e8ce881f79
1/* 2 * regexp.c: generic and extensible Regular Expression engine 3 * 4 * Basically designed with the purpose of compiling regexps for 5 * the variety of validation/shemas mechanisms now available in 6 * XML related specifications these include: 7 * - XML-1.0 DTD validation 8 * - XML Schemas structure part 1 9 * - XML Schemas Datatypes part 2 especially Appendix F 10 * - RELAX-NG/TREX i.e. the counter proposal 11 * 12 * See Copyright for the status of this software. 13 * 14 * Daniel Veillard <veillard@redhat.com> 15 */ 16 17#define IN_LIBXML 18#include "libxml.h" 19 20#ifdef LIBXML_REGEXP_ENABLED 21 22/* #define DEBUG_ERR */ 23 24#include <stdio.h> 25#include <string.h> 26#ifdef HAVE_LIMITS_H 27#include <limits.h> 28#endif 29 30#include <libxml/tree.h> 31#include <libxml/parserInternals.h> 32#include <libxml/xmlregexp.h> 33#include <libxml/xmlautomata.h> 34#include <libxml/xmlunicode.h> 35 36#ifndef INT_MAX 37#define INT_MAX 123456789 /* easy to flag and big enough for our needs */ 38#endif 39 40/* #define DEBUG_REGEXP_GRAPH */ 41/* #define DEBUG_REGEXP_EXEC */ 42/* #define DEBUG_PUSH */ 43/* #define DEBUG_COMPACTION */ 44 45#define ERROR(str) \ 46 ctxt->error = XML_REGEXP_COMPILE_ERROR; \ 47 xmlRegexpErrCompile(ctxt, str); 48#define NEXT ctxt->cur++ 49#define CUR (*(ctxt->cur)) 50#define NXT(index) (ctxt->cur[index]) 51 52#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) 53#define NEXTL(l) ctxt->cur += l; 54#define XML_REG_STRING_SEPARATOR '|' 55 56/** 57 * TODO: 58 * 59 * macro to flag unimplemented blocks 60 */ 61#define TODO \ 62 xmlGenericError(xmlGenericErrorContext, \ 63 "Unimplemented block at %s:%d\n", \ 64 __FILE__, __LINE__); 65 66/************************************************************************ 67 * * 68 * Datatypes and structures * 69 * * 70 ************************************************************************/ 71 72typedef enum { 73 XML_REGEXP_EPSILON = 1, 74 XML_REGEXP_CHARVAL, 75 XML_REGEXP_RANGES, 76 XML_REGEXP_SUBREG, 77 XML_REGEXP_STRING, 78 XML_REGEXP_ANYCHAR, /* . */ 79 XML_REGEXP_ANYSPACE, /* \s */ 80 XML_REGEXP_NOTSPACE, /* \S */ 81 XML_REGEXP_INITNAME, /* \l */ 82 XML_REGEXP_NOTINITNAME, /* \l */ 83 XML_REGEXP_NAMECHAR, /* \c */ 84 XML_REGEXP_NOTNAMECHAR, /* \C */ 85 XML_REGEXP_DECIMAL, /* \d */ 86 XML_REGEXP_NOTDECIMAL, /* \d */ 87 XML_REGEXP_REALCHAR, /* \w */ 88 XML_REGEXP_NOTREALCHAR, /* \w */ 89 XML_REGEXP_LETTER, 90 XML_REGEXP_LETTER_UPPERCASE, 91 XML_REGEXP_LETTER_LOWERCASE, 92 XML_REGEXP_LETTER_TITLECASE, 93 XML_REGEXP_LETTER_MODIFIER, 94 XML_REGEXP_LETTER_OTHERS, 95 XML_REGEXP_MARK, 96 XML_REGEXP_MARK_NONSPACING, 97 XML_REGEXP_MARK_SPACECOMBINING, 98 XML_REGEXP_MARK_ENCLOSING, 99 XML_REGEXP_NUMBER, 100 XML_REGEXP_NUMBER_DECIMAL, 101 XML_REGEXP_NUMBER_LETTER, 102 XML_REGEXP_NUMBER_OTHERS, 103 XML_REGEXP_PUNCT, 104 XML_REGEXP_PUNCT_CONNECTOR, 105 XML_REGEXP_PUNCT_DASH, 106 XML_REGEXP_PUNCT_OPEN, 107 XML_REGEXP_PUNCT_CLOSE, 108 XML_REGEXP_PUNCT_INITQUOTE, 109 XML_REGEXP_PUNCT_FINQUOTE, 110 XML_REGEXP_PUNCT_OTHERS, 111 XML_REGEXP_SEPAR, 112 XML_REGEXP_SEPAR_SPACE, 113 XML_REGEXP_SEPAR_LINE, 114 XML_REGEXP_SEPAR_PARA, 115 XML_REGEXP_SYMBOL, 116 XML_REGEXP_SYMBOL_MATH, 117 XML_REGEXP_SYMBOL_CURRENCY, 118 XML_REGEXP_SYMBOL_MODIFIER, 119 XML_REGEXP_SYMBOL_OTHERS, 120 XML_REGEXP_OTHER, 121 XML_REGEXP_OTHER_CONTROL, 122 XML_REGEXP_OTHER_FORMAT, 123 XML_REGEXP_OTHER_PRIVATE, 124 XML_REGEXP_OTHER_NA, 125 XML_REGEXP_BLOCK_NAME 126} xmlRegAtomType; 127 128typedef enum { 129 XML_REGEXP_QUANT_EPSILON = 1, 130 XML_REGEXP_QUANT_ONCE, 131 XML_REGEXP_QUANT_OPT, 132 XML_REGEXP_QUANT_MULT, 133 XML_REGEXP_QUANT_PLUS, 134 XML_REGEXP_QUANT_ONCEONLY, 135 XML_REGEXP_QUANT_ALL, 136 XML_REGEXP_QUANT_RANGE 137} xmlRegQuantType; 138 139typedef enum { 140 XML_REGEXP_START_STATE = 1, 141 XML_REGEXP_FINAL_STATE, 142 XML_REGEXP_TRANS_STATE, 143 XML_REGEXP_SINK_STATE 144} xmlRegStateType; 145 146typedef enum { 147 XML_REGEXP_MARK_NORMAL = 0, 148 XML_REGEXP_MARK_START, 149 XML_REGEXP_MARK_VISITED 150} xmlRegMarkedType; 151 152typedef struct _xmlRegRange xmlRegRange; 153typedef xmlRegRange *xmlRegRangePtr; 154 155struct _xmlRegRange { 156 int neg; /* 0 normal, 1 not, 2 exclude */ 157 xmlRegAtomType type; 158 int start; 159 int end; 160 xmlChar *blockName; 161}; 162 163typedef struct _xmlRegAtom xmlRegAtom; 164typedef xmlRegAtom *xmlRegAtomPtr; 165 166typedef struct _xmlAutomataState xmlRegState; 167typedef xmlRegState *xmlRegStatePtr; 168 169struct _xmlRegAtom { 170 int no; 171 xmlRegAtomType type; 172 xmlRegQuantType quant; 173 int min; 174 int max; 175 176 void *valuep; 177 void *valuep2; 178 int neg; 179 int codepoint; 180 xmlRegStatePtr start; 181 xmlRegStatePtr stop; 182 int maxRanges; 183 int nbRanges; 184 xmlRegRangePtr *ranges; 185 void *data; 186}; 187 188typedef struct _xmlRegCounter xmlRegCounter; 189typedef xmlRegCounter *xmlRegCounterPtr; 190 191struct _xmlRegCounter { 192 int min; 193 int max; 194}; 195 196typedef struct _xmlRegTrans xmlRegTrans; 197typedef xmlRegTrans *xmlRegTransPtr; 198 199struct _xmlRegTrans { 200 xmlRegAtomPtr atom; 201 int to; 202 int counter; 203 int count; 204}; 205 206struct _xmlAutomataState { 207 xmlRegStateType type; 208 xmlRegMarkedType mark; 209 xmlRegMarkedType reached; 210 int no; 211 int maxTrans; 212 int nbTrans; 213 xmlRegTrans *trans; 214 /* knowing states ponting to us can speed things up */ 215 int maxTransTo; 216 int nbTransTo; 217 int *transTo; 218}; 219 220typedef struct _xmlAutomata xmlRegParserCtxt; 221typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; 222 223struct _xmlAutomata { 224 xmlChar *string; 225 xmlChar *cur; 226 227 int error; 228 int neg; 229 230 xmlRegStatePtr start; 231 xmlRegStatePtr end; 232 xmlRegStatePtr state; 233 234 xmlRegAtomPtr atom; 235 236 int maxAtoms; 237 int nbAtoms; 238 xmlRegAtomPtr *atoms; 239 240 int maxStates; 241 int nbStates; 242 xmlRegStatePtr *states; 243 244 int maxCounters; 245 int nbCounters; 246 xmlRegCounter *counters; 247 248 int determinist; 249}; 250 251struct _xmlRegexp { 252 xmlChar *string; 253 int nbStates; 254 xmlRegStatePtr *states; 255 int nbAtoms; 256 xmlRegAtomPtr *atoms; 257 int nbCounters; 258 xmlRegCounter *counters; 259 int determinist; 260 /* 261 * That's the compact form for determinists automatas 262 */ 263 int nbstates; 264 int *compact; 265 void **transdata; 266 int nbstrings; 267 xmlChar **stringMap; 268}; 269 270typedef struct _xmlRegExecRollback xmlRegExecRollback; 271typedef xmlRegExecRollback *xmlRegExecRollbackPtr; 272 273struct _xmlRegExecRollback { 274 xmlRegStatePtr state;/* the current state */ 275 int index; /* the index in the input stack */ 276 int nextbranch; /* the next transition to explore in that state */ 277 int *counts; /* save the automata state if it has some */ 278}; 279 280typedef struct _xmlRegInputToken xmlRegInputToken; 281typedef xmlRegInputToken *xmlRegInputTokenPtr; 282 283struct _xmlRegInputToken { 284 xmlChar *value; 285 void *data; 286}; 287 288struct _xmlRegExecCtxt { 289 int status; /* execution status != 0 indicate an error */ 290 int determinist; /* did we find an indeterministic behaviour */ 291 xmlRegexpPtr comp; /* the compiled regexp */ 292 xmlRegExecCallbacks callback; 293 void *data; 294 295 xmlRegStatePtr state;/* the current state */ 296 int transno; /* the current transition on that state */ 297 int transcount; /* the number of chars in char counted transitions */ 298 299 /* 300 * A stack of rollback states 301 */ 302 int maxRollbacks; 303 int nbRollbacks; 304 xmlRegExecRollback *rollbacks; 305 306 /* 307 * The state of the automata if any 308 */ 309 int *counts; 310 311 /* 312 * The input stack 313 */ 314 int inputStackMax; 315 int inputStackNr; 316 int index; 317 int *charStack; 318 const xmlChar *inputString; /* when operating on characters */ 319 xmlRegInputTokenPtr inputStack;/* when operating on strings */ 320 321 /* 322 * error handling 323 */ 324 int errStateNo; /* the error state number */ 325 xmlRegStatePtr errState; /* the error state */ 326 xmlChar *errString; /* the string raising the error */ 327 int *errCounts; /* counters at the error state */ 328}; 329 330#define REGEXP_ALL_COUNTER 0x123456 331#define REGEXP_ALL_LAX_COUNTER 0x123457 332 333static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); 334static void xmlRegFreeState(xmlRegStatePtr state); 335static void xmlRegFreeAtom(xmlRegAtomPtr atom); 336static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); 337 338/************************************************************************ 339 * * 340 * Regexp memory error handler * 341 * * 342 ************************************************************************/ 343/** 344 * xmlRegexpErrMemory: 345 * @extra: extra information 346 * 347 * Handle an out of memory condition 348 */ 349static void 350xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) 351{ 352 const char *regexp = NULL; 353 if (ctxt != NULL) { 354 regexp = (const char *) ctxt->string; 355 ctxt->error = XML_ERR_NO_MEMORY; 356 } 357 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 358 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 359 regexp, NULL, 0, 0, 360 "Memory allocation failed : %s\n", extra); 361} 362 363/** 364 * xmlRegexpErrCompile: 365 * @extra: extra information 366 * 367 * Handle a compilation failure 368 */ 369static void 370xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra) 371{ 372 const char *regexp = NULL; 373 int idx = 0; 374 375 if (ctxt != NULL) { 376 regexp = (const char *) ctxt->string; 377 idx = ctxt->cur - ctxt->string; 378 ctxt->error = XML_REGEXP_COMPILE_ERROR; 379 } 380 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 381 XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, 382 regexp, NULL, idx, 0, 383 "failed to compile: %s\n", extra); 384} 385 386/************************************************************************ 387 * * 388 * Allocation/Deallocation * 389 * * 390 ************************************************************************/ 391 392static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); 393/** 394 * xmlRegEpxFromParse: 395 * @ctxt: the parser context used to build it 396 * 397 * Allocate a new regexp and fill it with the result from the parser 398 * 399 * Returns the new regexp or NULL in case of error 400 */ 401static xmlRegexpPtr 402xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { 403 xmlRegexpPtr ret; 404 405 ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); 406 if (ret == NULL) { 407 xmlRegexpErrMemory(ctxt, "compiling regexp"); 408 return(NULL); 409 } 410 memset(ret, 0, sizeof(xmlRegexp)); 411 ret->string = ctxt->string; 412 ret->nbStates = ctxt->nbStates; 413 ret->states = ctxt->states; 414 ret->nbAtoms = ctxt->nbAtoms; 415 ret->atoms = ctxt->atoms; 416 ret->nbCounters = ctxt->nbCounters; 417 ret->counters = ctxt->counters; 418 ret->determinist = ctxt->determinist; 419 420 if ((ret->determinist != 0) && 421 (ret->nbCounters == 0) && 422 (ret->atoms != NULL) && 423 (ret->atoms[0] != NULL) && 424 (ret->atoms[0]->type == XML_REGEXP_STRING)) { 425 int i, j, nbstates = 0, nbatoms = 0; 426 int *stateRemap; 427 int *stringRemap; 428 int *transitions; 429 void **transdata; 430 xmlChar **stringMap; 431 xmlChar *value; 432 433 /* 434 * Switch to a compact representation 435 * 1/ counting the effective number of states left 436 * 2/ counting the unique number of atoms, and check that 437 * they are all of the string type 438 * 3/ build a table state x atom for the transitions 439 */ 440 441 stateRemap = xmlMalloc(ret->nbStates * sizeof(int)); 442 if (stateRemap == NULL) { 443 xmlRegexpErrMemory(ctxt, "compiling regexp"); 444 xmlFree(ret); 445 return(NULL); 446 } 447 for (i = 0;i < ret->nbStates;i++) { 448 if (ret->states[i] != NULL) { 449 stateRemap[i] = nbstates; 450 nbstates++; 451 } else { 452 stateRemap[i] = -1; 453 } 454 } 455#ifdef DEBUG_COMPACTION 456 printf("Final: %d states\n", nbstates); 457#endif 458 stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *)); 459 if (stringMap == NULL) { 460 xmlRegexpErrMemory(ctxt, "compiling regexp"); 461 xmlFree(stateRemap); 462 xmlFree(ret); 463 return(NULL); 464 } 465 stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int)); 466 if (stringRemap == NULL) { 467 xmlRegexpErrMemory(ctxt, "compiling regexp"); 468 xmlFree(stringMap); 469 xmlFree(stateRemap); 470 xmlFree(ret); 471 return(NULL); 472 } 473 for (i = 0;i < ret->nbAtoms;i++) { 474 if ((ret->atoms[i]->type == XML_REGEXP_STRING) && 475 (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { 476 value = ret->atoms[i]->valuep; 477 for (j = 0;j < nbatoms;j++) { 478 if (xmlStrEqual(stringMap[j], value)) { 479 stringRemap[i] = j; 480 break; 481 } 482 } 483 if (j >= nbatoms) { 484 stringRemap[i] = nbatoms; 485 stringMap[nbatoms] = xmlStrdup(value); 486 if (stringMap[nbatoms] == NULL) { 487 for (i = 0;i < nbatoms;i++) 488 xmlFree(stringMap[i]); 489 xmlFree(stringRemap); 490 xmlFree(stringMap); 491 xmlFree(stateRemap); 492 xmlFree(ret); 493 return(NULL); 494 } 495 nbatoms++; 496 } 497 } else { 498 xmlFree(stateRemap); 499 xmlFree(stringRemap); 500 for (i = 0;i < nbatoms;i++) 501 xmlFree(stringMap[i]); 502 xmlFree(stringMap); 503 xmlFree(ret); 504 return(NULL); 505 } 506 } 507#ifdef DEBUG_COMPACTION 508 printf("Final: %d atoms\n", nbatoms); 509#endif 510 transitions = (int *) xmlMalloc((nbstates + 1) * 511 (nbatoms + 1) * sizeof(int)); 512 if (transitions == NULL) { 513 xmlFree(stateRemap); 514 xmlFree(stringRemap); 515 xmlFree(stringMap); 516 xmlFree(ret); 517 return(NULL); 518 } 519 memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int)); 520 521 /* 522 * Allocate the transition table. The first entry for each 523 * state corresponds to the state type. 524 */ 525 transdata = NULL; 526 527 for (i = 0;i < ret->nbStates;i++) { 528 int stateno, atomno, targetno, prev; 529 xmlRegStatePtr state; 530 xmlRegTransPtr trans; 531 532 stateno = stateRemap[i]; 533 if (stateno == -1) 534 continue; 535 state = ret->states[i]; 536 537 transitions[stateno * (nbatoms + 1)] = state->type; 538 539 for (j = 0;j < state->nbTrans;j++) { 540 trans = &(state->trans[j]); 541 if ((trans->to == -1) || (trans->atom == NULL)) 542 continue; 543 atomno = stringRemap[trans->atom->no]; 544 if ((trans->atom->data != NULL) && (transdata == NULL)) { 545 transdata = (void **) xmlMalloc(nbstates * nbatoms * 546 sizeof(void *)); 547 if (transdata != NULL) 548 memset(transdata, 0, 549 nbstates * nbatoms * sizeof(void *)); 550 else { 551 xmlRegexpErrMemory(ctxt, "compiling regexp"); 552 break; 553 } 554 } 555 targetno = stateRemap[trans->to]; 556 /* 557 * if the same atom can generate transitions to 2 different 558 * states then it means the automata is not determinist and 559 * the compact form can't be used ! 560 */ 561 prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; 562 if (prev != 0) { 563 if (prev != targetno + 1) { 564 ret->determinist = 0; 565#ifdef DEBUG_COMPACTION 566 printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", 567 i, j, trans->atom->no, trans->to, atomno, targetno); 568 printf(" previous to is %d\n", prev); 569#endif 570 ret->determinist = 0; 571 if (transdata != NULL) 572 xmlFree(transdata); 573 xmlFree(transitions); 574 xmlFree(stateRemap); 575 xmlFree(stringRemap); 576 for (i = 0;i < nbatoms;i++) 577 xmlFree(stringMap[i]); 578 xmlFree(stringMap); 579 goto not_determ; 580 } 581 } else { 582#if 0 583 printf("State %d trans %d: atom %d to %d : %d to %d\n", 584 i, j, trans->atom->no, trans->to, atomno, targetno); 585#endif 586 transitions[stateno * (nbatoms + 1) + atomno + 1] = 587 targetno + 1; /* to avoid 0 */ 588 if (transdata != NULL) 589 transdata[stateno * nbatoms + atomno] = 590 trans->atom->data; 591 } 592 } 593 } 594 ret->determinist = 1; 595#ifdef DEBUG_COMPACTION 596 /* 597 * Debug 598 */ 599 for (i = 0;i < nbstates;i++) { 600 for (j = 0;j < nbatoms + 1;j++) { 601 printf("%02d ", transitions[i * (nbatoms + 1) + j]); 602 } 603 printf("\n"); 604 } 605 printf("\n"); 606#endif 607 /* 608 * Cleanup of the old data 609 */ 610 if (ret->states != NULL) { 611 for (i = 0;i < ret->nbStates;i++) 612 xmlRegFreeState(ret->states[i]); 613 xmlFree(ret->states); 614 } 615 ret->states = NULL; 616 ret->nbStates = 0; 617 if (ret->atoms != NULL) { 618 for (i = 0;i < ret->nbAtoms;i++) 619 xmlRegFreeAtom(ret->atoms[i]); 620 xmlFree(ret->atoms); 621 } 622 ret->atoms = NULL; 623 ret->nbAtoms = 0; 624 625 ret->compact = transitions; 626 ret->transdata = transdata; 627 ret->stringMap = stringMap; 628 ret->nbstrings = nbatoms; 629 ret->nbstates = nbstates; 630 xmlFree(stateRemap); 631 xmlFree(stringRemap); 632 } 633not_determ: 634 ctxt->string = NULL; 635 ctxt->nbStates = 0; 636 ctxt->states = NULL; 637 ctxt->nbAtoms = 0; 638 ctxt->atoms = NULL; 639 ctxt->nbCounters = 0; 640 ctxt->counters = NULL; 641 return(ret); 642} 643 644/** 645 * xmlRegNewParserCtxt: 646 * @string: the string to parse 647 * 648 * Allocate a new regexp parser context 649 * 650 * Returns the new context or NULL in case of error 651 */ 652static xmlRegParserCtxtPtr 653xmlRegNewParserCtxt(const xmlChar *string) { 654 xmlRegParserCtxtPtr ret; 655 656 ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); 657 if (ret == NULL) 658 return(NULL); 659 memset(ret, 0, sizeof(xmlRegParserCtxt)); 660 if (string != NULL) 661 ret->string = xmlStrdup(string); 662 ret->cur = ret->string; 663 ret->neg = 0; 664 ret->error = 0; 665 ret->determinist = -1; 666 return(ret); 667} 668 669/** 670 * xmlRegNewRange: 671 * @ctxt: the regexp parser context 672 * @neg: is that negative 673 * @type: the type of range 674 * @start: the start codepoint 675 * @end: the end codepoint 676 * 677 * Allocate a new regexp range 678 * 679 * Returns the new range or NULL in case of error 680 */ 681static xmlRegRangePtr 682xmlRegNewRange(xmlRegParserCtxtPtr ctxt, 683 int neg, xmlRegAtomType type, int start, int end) { 684 xmlRegRangePtr ret; 685 686 ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); 687 if (ret == NULL) { 688 xmlRegexpErrMemory(ctxt, "allocating range"); 689 return(NULL); 690 } 691 ret->neg = neg; 692 ret->type = type; 693 ret->start = start; 694 ret->end = end; 695 return(ret); 696} 697 698/** 699 * xmlRegFreeRange: 700 * @range: the regexp range 701 * 702 * Free a regexp range 703 */ 704static void 705xmlRegFreeRange(xmlRegRangePtr range) { 706 if (range == NULL) 707 return; 708 709 if (range->blockName != NULL) 710 xmlFree(range->blockName); 711 xmlFree(range); 712} 713 714/** 715 * xmlRegNewAtom: 716 * @ctxt: the regexp parser context 717 * @type: the type of atom 718 * 719 * Allocate a new regexp range 720 * 721 * Returns the new atom or NULL in case of error 722 */ 723static xmlRegAtomPtr 724xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { 725 xmlRegAtomPtr ret; 726 727 ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); 728 if (ret == NULL) { 729 xmlRegexpErrMemory(ctxt, "allocating atom"); 730 return(NULL); 731 } 732 memset(ret, 0, sizeof(xmlRegAtom)); 733 ret->type = type; 734 ret->quant = XML_REGEXP_QUANT_ONCE; 735 ret->min = 0; 736 ret->max = 0; 737 return(ret); 738} 739 740/** 741 * xmlRegFreeAtom: 742 * @atom: the regexp atom 743 * 744 * Free a regexp atom 745 */ 746static void 747xmlRegFreeAtom(xmlRegAtomPtr atom) { 748 int i; 749 750 if (atom == NULL) 751 return; 752 753 for (i = 0;i < atom->nbRanges;i++) 754 xmlRegFreeRange(atom->ranges[i]); 755 if (atom->ranges != NULL) 756 xmlFree(atom->ranges); 757 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL)) 758 xmlFree(atom->valuep); 759 if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL)) 760 xmlFree(atom->valuep2); 761 if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL)) 762 xmlFree(atom->valuep); 763 xmlFree(atom); 764} 765 766static xmlRegStatePtr 767xmlRegNewState(xmlRegParserCtxtPtr ctxt) { 768 xmlRegStatePtr ret; 769 770 ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); 771 if (ret == NULL) { 772 xmlRegexpErrMemory(ctxt, "allocating state"); 773 return(NULL); 774 } 775 memset(ret, 0, sizeof(xmlRegState)); 776 ret->type = XML_REGEXP_TRANS_STATE; 777 ret->mark = XML_REGEXP_MARK_NORMAL; 778 return(ret); 779} 780 781/** 782 * xmlRegFreeState: 783 * @state: the regexp state 784 * 785 * Free a regexp state 786 */ 787static void 788xmlRegFreeState(xmlRegStatePtr state) { 789 if (state == NULL) 790 return; 791 792 if (state->trans != NULL) 793 xmlFree(state->trans); 794 if (state->transTo != NULL) 795 xmlFree(state->transTo); 796 xmlFree(state); 797} 798 799/** 800 * xmlRegFreeParserCtxt: 801 * @ctxt: the regexp parser context 802 * 803 * Free a regexp parser context 804 */ 805static void 806xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { 807 int i; 808 if (ctxt == NULL) 809 return; 810 811 if (ctxt->string != NULL) 812 xmlFree(ctxt->string); 813 if (ctxt->states != NULL) { 814 for (i = 0;i < ctxt->nbStates;i++) 815 xmlRegFreeState(ctxt->states[i]); 816 xmlFree(ctxt->states); 817 } 818 if (ctxt->atoms != NULL) { 819 for (i = 0;i < ctxt->nbAtoms;i++) 820 xmlRegFreeAtom(ctxt->atoms[i]); 821 xmlFree(ctxt->atoms); 822 } 823 if (ctxt->counters != NULL) 824 xmlFree(ctxt->counters); 825 xmlFree(ctxt); 826} 827 828/************************************************************************ 829 * * 830 * Display of Data structures * 831 * * 832 ************************************************************************/ 833 834static void 835xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { 836 switch (type) { 837 case XML_REGEXP_EPSILON: 838 fprintf(output, "epsilon "); break; 839 case XML_REGEXP_CHARVAL: 840 fprintf(output, "charval "); break; 841 case XML_REGEXP_RANGES: 842 fprintf(output, "ranges "); break; 843 case XML_REGEXP_SUBREG: 844 fprintf(output, "subexpr "); break; 845 case XML_REGEXP_STRING: 846 fprintf(output, "string "); break; 847 case XML_REGEXP_ANYCHAR: 848 fprintf(output, "anychar "); break; 849 case XML_REGEXP_ANYSPACE: 850 fprintf(output, "anyspace "); break; 851 case XML_REGEXP_NOTSPACE: 852 fprintf(output, "notspace "); break; 853 case XML_REGEXP_INITNAME: 854 fprintf(output, "initname "); break; 855 case XML_REGEXP_NOTINITNAME: 856 fprintf(output, "notinitname "); break; 857 case XML_REGEXP_NAMECHAR: 858 fprintf(output, "namechar "); break; 859 case XML_REGEXP_NOTNAMECHAR: 860 fprintf(output, "notnamechar "); break; 861 case XML_REGEXP_DECIMAL: 862 fprintf(output, "decimal "); break; 863 case XML_REGEXP_NOTDECIMAL: 864 fprintf(output, "notdecimal "); break; 865 case XML_REGEXP_REALCHAR: 866 fprintf(output, "realchar "); break; 867 case XML_REGEXP_NOTREALCHAR: 868 fprintf(output, "notrealchar "); break; 869 case XML_REGEXP_LETTER: 870 fprintf(output, "LETTER "); break; 871 case XML_REGEXP_LETTER_UPPERCASE: 872 fprintf(output, "LETTER_UPPERCASE "); break; 873 case XML_REGEXP_LETTER_LOWERCASE: 874 fprintf(output, "LETTER_LOWERCASE "); break; 875 case XML_REGEXP_LETTER_TITLECASE: 876 fprintf(output, "LETTER_TITLECASE "); break; 877 case XML_REGEXP_LETTER_MODIFIER: 878 fprintf(output, "LETTER_MODIFIER "); break; 879 case XML_REGEXP_LETTER_OTHERS: 880 fprintf(output, "LETTER_OTHERS "); break; 881 case XML_REGEXP_MARK: 882 fprintf(output, "MARK "); break; 883 case XML_REGEXP_MARK_NONSPACING: 884 fprintf(output, "MARK_NONSPACING "); break; 885 case XML_REGEXP_MARK_SPACECOMBINING: 886 fprintf(output, "MARK_SPACECOMBINING "); break; 887 case XML_REGEXP_MARK_ENCLOSING: 888 fprintf(output, "MARK_ENCLOSING "); break; 889 case XML_REGEXP_NUMBER: 890 fprintf(output, "NUMBER "); break; 891 case XML_REGEXP_NUMBER_DECIMAL: 892 fprintf(output, "NUMBER_DECIMAL "); break; 893 case XML_REGEXP_NUMBER_LETTER: 894 fprintf(output, "NUMBER_LETTER "); break; 895 case XML_REGEXP_NUMBER_OTHERS: 896 fprintf(output, "NUMBER_OTHERS "); break; 897 case XML_REGEXP_PUNCT: 898 fprintf(output, "PUNCT "); break; 899 case XML_REGEXP_PUNCT_CONNECTOR: 900 fprintf(output, "PUNCT_CONNECTOR "); break; 901 case XML_REGEXP_PUNCT_DASH: 902 fprintf(output, "PUNCT_DASH "); break; 903 case XML_REGEXP_PUNCT_OPEN: 904 fprintf(output, "PUNCT_OPEN "); break; 905 case XML_REGEXP_PUNCT_CLOSE: 906 fprintf(output, "PUNCT_CLOSE "); break; 907 case XML_REGEXP_PUNCT_INITQUOTE: 908 fprintf(output, "PUNCT_INITQUOTE "); break; 909 case XML_REGEXP_PUNCT_FINQUOTE: 910 fprintf(output, "PUNCT_FINQUOTE "); break; 911 case XML_REGEXP_PUNCT_OTHERS: 912 fprintf(output, "PUNCT_OTHERS "); break; 913 case XML_REGEXP_SEPAR: 914 fprintf(output, "SEPAR "); break; 915 case XML_REGEXP_SEPAR_SPACE: 916 fprintf(output, "SEPAR_SPACE "); break; 917 case XML_REGEXP_SEPAR_LINE: 918 fprintf(output, "SEPAR_LINE "); break; 919 case XML_REGEXP_SEPAR_PARA: 920 fprintf(output, "SEPAR_PARA "); break; 921 case XML_REGEXP_SYMBOL: 922 fprintf(output, "SYMBOL "); break; 923 case XML_REGEXP_SYMBOL_MATH: 924 fprintf(output, "SYMBOL_MATH "); break; 925 case XML_REGEXP_SYMBOL_CURRENCY: 926 fprintf(output, "SYMBOL_CURRENCY "); break; 927 case XML_REGEXP_SYMBOL_MODIFIER: 928 fprintf(output, "SYMBOL_MODIFIER "); break; 929 case XML_REGEXP_SYMBOL_OTHERS: 930 fprintf(output, "SYMBOL_OTHERS "); break; 931 case XML_REGEXP_OTHER: 932 fprintf(output, "OTHER "); break; 933 case XML_REGEXP_OTHER_CONTROL: 934 fprintf(output, "OTHER_CONTROL "); break; 935 case XML_REGEXP_OTHER_FORMAT: 936 fprintf(output, "OTHER_FORMAT "); break; 937 case XML_REGEXP_OTHER_PRIVATE: 938 fprintf(output, "OTHER_PRIVATE "); break; 939 case XML_REGEXP_OTHER_NA: 940 fprintf(output, "OTHER_NA "); break; 941 case XML_REGEXP_BLOCK_NAME: 942 fprintf(output, "BLOCK "); break; 943 } 944} 945 946static void 947xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { 948 switch (type) { 949 case XML_REGEXP_QUANT_EPSILON: 950 fprintf(output, "epsilon "); break; 951 case XML_REGEXP_QUANT_ONCE: 952 fprintf(output, "once "); break; 953 case XML_REGEXP_QUANT_OPT: 954 fprintf(output, "? "); break; 955 case XML_REGEXP_QUANT_MULT: 956 fprintf(output, "* "); break; 957 case XML_REGEXP_QUANT_PLUS: 958 fprintf(output, "+ "); break; 959 case XML_REGEXP_QUANT_RANGE: 960 fprintf(output, "range "); break; 961 case XML_REGEXP_QUANT_ONCEONLY: 962 fprintf(output, "onceonly "); break; 963 case XML_REGEXP_QUANT_ALL: 964 fprintf(output, "all "); break; 965 } 966} 967static void 968xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { 969 fprintf(output, " range: "); 970 if (range->neg) 971 fprintf(output, "negative "); 972 xmlRegPrintAtomType(output, range->type); 973 fprintf(output, "%c - %c\n", range->start, range->end); 974} 975 976static void 977xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { 978 fprintf(output, " atom: "); 979 if (atom == NULL) { 980 fprintf(output, "NULL\n"); 981 return; 982 } 983 if (atom->neg) 984 fprintf(output, "not "); 985 xmlRegPrintAtomType(output, atom->type); 986 xmlRegPrintQuantType(output, atom->quant); 987 if (atom->quant == XML_REGEXP_QUANT_RANGE) 988 fprintf(output, "%d-%d ", atom->min, atom->max); 989 if (atom->type == XML_REGEXP_STRING) 990 fprintf(output, "'%s' ", (char *) atom->valuep); 991 if (atom->type == XML_REGEXP_CHARVAL) 992 fprintf(output, "char %c\n", atom->codepoint); 993 else if (atom->type == XML_REGEXP_RANGES) { 994 int i; 995 fprintf(output, "%d entries\n", atom->nbRanges); 996 for (i = 0; i < atom->nbRanges;i++) 997 xmlRegPrintRange(output, atom->ranges[i]); 998 } else if (atom->type == XML_REGEXP_SUBREG) { 999 fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); 1000 } else { 1001 fprintf(output, "\n"); 1002 } 1003} 1004 1005static void 1006xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { 1007 fprintf(output, " trans: "); 1008 if (trans == NULL) { 1009 fprintf(output, "NULL\n"); 1010 return; 1011 } 1012 if (trans->to < 0) { 1013 fprintf(output, "removed\n"); 1014 return; 1015 } 1016 if (trans->counter >= 0) { 1017 fprintf(output, "counted %d, ", trans->counter); 1018 } 1019 if (trans->count == REGEXP_ALL_COUNTER) { 1020 fprintf(output, "all transition, "); 1021 } else if (trans->count >= 0) { 1022 fprintf(output, "count based %d, ", trans->count); 1023 } 1024 if (trans->atom == NULL) { 1025 fprintf(output, "epsilon to %d\n", trans->to); 1026 return; 1027 } 1028 if (trans->atom->type == XML_REGEXP_CHARVAL) 1029 fprintf(output, "char %c ", trans->atom->codepoint); 1030 fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); 1031} 1032 1033static void 1034xmlRegPrintState(FILE *output, xmlRegStatePtr state) { 1035 int i; 1036 1037 fprintf(output, " state: "); 1038 if (state == NULL) { 1039 fprintf(output, "NULL\n"); 1040 return; 1041 } 1042 if (state->type == XML_REGEXP_START_STATE) 1043 fprintf(output, "START "); 1044 if (state->type == XML_REGEXP_FINAL_STATE) 1045 fprintf(output, "FINAL "); 1046 1047 fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); 1048 for (i = 0;i < state->nbTrans; i++) { 1049 xmlRegPrintTrans(output, &(state->trans[i])); 1050 } 1051} 1052 1053#ifdef DEBUG_REGEXP_GRAPH 1054static void 1055xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { 1056 int i; 1057 1058 fprintf(output, " ctxt: "); 1059 if (ctxt == NULL) { 1060 fprintf(output, "NULL\n"); 1061 return; 1062 } 1063 fprintf(output, "'%s' ", ctxt->string); 1064 if (ctxt->error) 1065 fprintf(output, "error "); 1066 if (ctxt->neg) 1067 fprintf(output, "neg "); 1068 fprintf(output, "\n"); 1069 fprintf(output, "%d atoms:\n", ctxt->nbAtoms); 1070 for (i = 0;i < ctxt->nbAtoms; i++) { 1071 fprintf(output, " %02d ", i); 1072 xmlRegPrintAtom(output, ctxt->atoms[i]); 1073 } 1074 if (ctxt->atom != NULL) { 1075 fprintf(output, "current atom:\n"); 1076 xmlRegPrintAtom(output, ctxt->atom); 1077 } 1078 fprintf(output, "%d states:", ctxt->nbStates); 1079 if (ctxt->start != NULL) 1080 fprintf(output, " start: %d", ctxt->start->no); 1081 if (ctxt->end != NULL) 1082 fprintf(output, " end: %d", ctxt->end->no); 1083 fprintf(output, "\n"); 1084 for (i = 0;i < ctxt->nbStates; i++) { 1085 xmlRegPrintState(output, ctxt->states[i]); 1086 } 1087 fprintf(output, "%d counters:\n", ctxt->nbCounters); 1088 for (i = 0;i < ctxt->nbCounters; i++) { 1089 fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, 1090 ctxt->counters[i].max); 1091 } 1092} 1093#endif 1094 1095/************************************************************************ 1096 * * 1097 * Finite Automata structures manipulations * 1098 * * 1099 ************************************************************************/ 1100 1101static void 1102xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, 1103 int neg, xmlRegAtomType type, int start, int end, 1104 xmlChar *blockName) { 1105 xmlRegRangePtr range; 1106 1107 if (atom == NULL) { 1108 ERROR("add range: atom is NULL"); 1109 return; 1110 } 1111 if (atom->type != XML_REGEXP_RANGES) { 1112 ERROR("add range: atom is not ranges"); 1113 return; 1114 } 1115 if (atom->maxRanges == 0) { 1116 atom->maxRanges = 4; 1117 atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * 1118 sizeof(xmlRegRangePtr)); 1119 if (atom->ranges == NULL) { 1120 xmlRegexpErrMemory(ctxt, "adding ranges"); 1121 atom->maxRanges = 0; 1122 return; 1123 } 1124 } else if (atom->nbRanges >= atom->maxRanges) { 1125 xmlRegRangePtr *tmp; 1126 atom->maxRanges *= 2; 1127 tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * 1128 sizeof(xmlRegRangePtr)); 1129 if (tmp == NULL) { 1130 xmlRegexpErrMemory(ctxt, "adding ranges"); 1131 atom->maxRanges /= 2; 1132 return; 1133 } 1134 atom->ranges = tmp; 1135 } 1136 range = xmlRegNewRange(ctxt, neg, type, start, end); 1137 if (range == NULL) 1138 return; 1139 range->blockName = blockName; 1140 atom->ranges[atom->nbRanges++] = range; 1141 1142} 1143 1144static int 1145xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { 1146 if (ctxt->maxCounters == 0) { 1147 ctxt->maxCounters = 4; 1148 ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * 1149 sizeof(xmlRegCounter)); 1150 if (ctxt->counters == NULL) { 1151 xmlRegexpErrMemory(ctxt, "allocating counter"); 1152 ctxt->maxCounters = 0; 1153 return(-1); 1154 } 1155 } else if (ctxt->nbCounters >= ctxt->maxCounters) { 1156 xmlRegCounter *tmp; 1157 ctxt->maxCounters *= 2; 1158 tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * 1159 sizeof(xmlRegCounter)); 1160 if (tmp == NULL) { 1161 xmlRegexpErrMemory(ctxt, "allocating counter"); 1162 ctxt->maxCounters /= 2; 1163 return(-1); 1164 } 1165 ctxt->counters = tmp; 1166 } 1167 ctxt->counters[ctxt->nbCounters].min = -1; 1168 ctxt->counters[ctxt->nbCounters].max = -1; 1169 return(ctxt->nbCounters++); 1170} 1171 1172static int 1173xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { 1174 if (atom == NULL) { 1175 ERROR("atom push: atom is NULL"); 1176 return(-1); 1177 } 1178 if (ctxt->maxAtoms == 0) { 1179 ctxt->maxAtoms = 4; 1180 ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * 1181 sizeof(xmlRegAtomPtr)); 1182 if (ctxt->atoms == NULL) { 1183 xmlRegexpErrMemory(ctxt, "pushing atom"); 1184 ctxt->maxAtoms = 0; 1185 return(-1); 1186 } 1187 } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { 1188 xmlRegAtomPtr *tmp; 1189 ctxt->maxAtoms *= 2; 1190 tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * 1191 sizeof(xmlRegAtomPtr)); 1192 if (tmp == NULL) { 1193 xmlRegexpErrMemory(ctxt, "allocating counter"); 1194 ctxt->maxAtoms /= 2; 1195 return(-1); 1196 } 1197 ctxt->atoms = tmp; 1198 } 1199 atom->no = ctxt->nbAtoms; 1200 ctxt->atoms[ctxt->nbAtoms++] = atom; 1201 return(0); 1202} 1203 1204static void 1205xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, 1206 int from) { 1207 if (target->maxTransTo == 0) { 1208 target->maxTransTo = 8; 1209 target->transTo = (int *) xmlMalloc(target->maxTransTo * 1210 sizeof(int)); 1211 if (target->transTo == NULL) { 1212 xmlRegexpErrMemory(ctxt, "adding transition"); 1213 target->maxTransTo = 0; 1214 return; 1215 } 1216 } else if (target->nbTransTo >= target->maxTransTo) { 1217 int *tmp; 1218 target->maxTransTo *= 2; 1219 tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * 1220 sizeof(int)); 1221 if (tmp == NULL) { 1222 xmlRegexpErrMemory(ctxt, "adding transition"); 1223 target->maxTransTo /= 2; 1224 return; 1225 } 1226 target->transTo = tmp; 1227 } 1228 target->transTo[target->nbTransTo] = from; 1229 target->nbTransTo++; 1230} 1231 1232static void 1233xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 1234 xmlRegAtomPtr atom, xmlRegStatePtr target, 1235 int counter, int count, int nchk) { 1236 1237 int nrtrans; 1238 1239 if (state == NULL) { 1240 ERROR("add state: state is NULL"); 1241 return; 1242 } 1243 if (target == NULL) { 1244 ERROR("add state: target is NULL"); 1245 return; 1246 } 1247 /* 1248 * Other routines follow the philosophy 'When in doubt, add a transition' 1249 * so we check here whether such a transition is already present and, if 1250 * so, silently ignore this request. 1251 */ 1252 1253 if (nchk == 0) { 1254 for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) { 1255 xmlRegTransPtr trans = &(state->trans[nrtrans]); 1256 if ((trans->atom == atom) && 1257 (trans->to == target->no) && 1258 (trans->counter == counter) && 1259 (trans->count == count)) { 1260#ifdef DEBUG_REGEXP_GRAPH 1261 printf("Ignoring duplicate transition from %d to %d\n", 1262 state->no, target->no); 1263#endif 1264 return; 1265 } 1266 } 1267 } 1268 1269 if (state->maxTrans == 0) { 1270 state->maxTrans = 8; 1271 state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * 1272 sizeof(xmlRegTrans)); 1273 if (state->trans == NULL) { 1274 xmlRegexpErrMemory(ctxt, "adding transition"); 1275 state->maxTrans = 0; 1276 return; 1277 } 1278 } else if (state->nbTrans >= state->maxTrans) { 1279 xmlRegTrans *tmp; 1280 state->maxTrans *= 2; 1281 tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * 1282 sizeof(xmlRegTrans)); 1283 if (tmp == NULL) { 1284 xmlRegexpErrMemory(ctxt, "adding transition"); 1285 state->maxTrans /= 2; 1286 return; 1287 } 1288 state->trans = tmp; 1289 } 1290#ifdef DEBUG_REGEXP_GRAPH 1291 printf("Add trans from %d to %d ", state->no, target->no); 1292 if (count == REGEXP_ALL_COUNTER) 1293 printf("all transition\n"); 1294 else if (count >= 0) 1295 printf("count based %d\n", count); 1296 else if (counter >= 0) 1297 printf("counted %d\n", counter); 1298 else if (atom == NULL) 1299 printf("epsilon transition\n"); 1300 else if (atom != NULL) 1301 xmlRegPrintAtom(stdout, atom); 1302#endif 1303 1304 state->trans[state->nbTrans].atom = atom; 1305 state->trans[state->nbTrans].to = target->no; 1306 state->trans[state->nbTrans].counter = counter; 1307 state->trans[state->nbTrans].count = count; 1308 state->nbTrans++; 1309 xmlRegStateAddTransTo(ctxt, target, state->no); 1310} 1311 1312static int 1313xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { 1314 if (state == NULL) return(-1); 1315 if (ctxt->maxStates == 0) { 1316 ctxt->maxStates = 4; 1317 ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * 1318 sizeof(xmlRegStatePtr)); 1319 if (ctxt->states == NULL) { 1320 xmlRegexpErrMemory(ctxt, "adding state"); 1321 ctxt->maxStates = 0; 1322 return(-1); 1323 } 1324 } else if (ctxt->nbStates >= ctxt->maxStates) { 1325 xmlRegStatePtr *tmp; 1326 ctxt->maxStates *= 2; 1327 tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * 1328 sizeof(xmlRegStatePtr)); 1329 if (tmp == NULL) { 1330 xmlRegexpErrMemory(ctxt, "adding state"); 1331 ctxt->maxStates /= 2; 1332 return(-1); 1333 } 1334 ctxt->states = tmp; 1335 } 1336 state->no = ctxt->nbStates; 1337 ctxt->states[ctxt->nbStates++] = state; 1338 return(0); 1339} 1340 1341/** 1342 * xmlFAGenerateAllTransition: 1343 * @ctxt: a regexp parser context 1344 * @from: the from state 1345 * @to: the target state or NULL for building a new one 1346 * @lax: 1347 * 1348 */ 1349static void 1350xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, 1351 xmlRegStatePtr from, xmlRegStatePtr to, 1352 int lax) { 1353 if (to == NULL) { 1354 to = xmlRegNewState(ctxt); 1355 xmlRegStatePush(ctxt, to); 1356 ctxt->state = to; 1357 } 1358 if (lax) 1359 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER, 0); 1360 else 1361 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER, 0); 1362} 1363 1364/** 1365 * xmlFAGenerateEpsilonTransition: 1366 * @ctxt: a regexp parser context 1367 * @from: the from state 1368 * @to: the target state or NULL for building a new one 1369 * 1370 */ 1371static void 1372xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, 1373 xmlRegStatePtr from, xmlRegStatePtr to) { 1374 if (to == NULL) { 1375 to = xmlRegNewState(ctxt); 1376 xmlRegStatePush(ctxt, to); 1377 ctxt->state = to; 1378 } 1379 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1, 0); 1380} 1381 1382/** 1383 * xmlFAGenerateCountedEpsilonTransition: 1384 * @ctxt: a regexp parser context 1385 * @from: the from state 1386 * @to: the target state or NULL for building a new one 1387 * counter: the counter for that transition 1388 * 1389 */ 1390static void 1391xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, 1392 xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 1393 if (to == NULL) { 1394 to = xmlRegNewState(ctxt); 1395 xmlRegStatePush(ctxt, to); 1396 ctxt->state = to; 1397 } 1398 xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1, 0); 1399} 1400 1401/** 1402 * xmlFAGenerateCountedTransition: 1403 * @ctxt: a regexp parser context 1404 * @from: the from state 1405 * @to: the target state or NULL for building a new one 1406 * counter: the counter for that transition 1407 * 1408 */ 1409static void 1410xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, 1411 xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 1412 if (to == NULL) { 1413 to = xmlRegNewState(ctxt); 1414 xmlRegStatePush(ctxt, to); 1415 ctxt->state = to; 1416 } 1417 xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter, 0); 1418} 1419 1420/** 1421 * xmlFAGenerateTransitions: 1422 * @ctxt: a regexp parser context 1423 * @from: the from state 1424 * @to: the target state or NULL for building a new one 1425 * @atom: the atom generating the transition 1426 * 1427 * Returns 0 if success and -1 in case of error. 1428 */ 1429static int 1430xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, 1431 xmlRegStatePtr to, xmlRegAtomPtr atom) { 1432 if (atom == NULL) { 1433 ERROR("genrate transition: atom == NULL"); 1434 return(-1); 1435 } 1436 if (atom->type == XML_REGEXP_SUBREG) { 1437 /* 1438 * this is a subexpression handling one should not need to 1439 * create a new node except for XML_REGEXP_QUANT_RANGE. 1440 */ 1441 if (xmlRegAtomPush(ctxt, atom) < 0) { 1442 return(-1); 1443 } 1444 if ((to != NULL) && (atom->stop != to) && 1445 (atom->quant != XML_REGEXP_QUANT_RANGE)) { 1446 /* 1447 * Generate an epsilon transition to link to the target 1448 */ 1449 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 1450 } 1451 switch (atom->quant) { 1452 case XML_REGEXP_QUANT_OPT: 1453 atom->quant = XML_REGEXP_QUANT_ONCE; 1454 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); 1455 break; 1456 case XML_REGEXP_QUANT_MULT: 1457 atom->quant = XML_REGEXP_QUANT_ONCE; 1458 xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); 1459 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 1460 break; 1461 case XML_REGEXP_QUANT_PLUS: 1462 atom->quant = XML_REGEXP_QUANT_ONCE; 1463 xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 1464 break; 1465 case XML_REGEXP_QUANT_RANGE: { 1466 int counter; 1467 xmlRegStatePtr newstate; 1468 1469 /* 1470 * This one is nasty: 1471 * 1/ if range has minOccurs == 0, create a new state 1472 * and create epsilon transitions from atom->start 1473 * to atom->stop, as well as atom->start to the new 1474 * state 1475 * 2/ register a new counter 1476 * 3/ register an epsilon transition associated to 1477 * this counter going from atom->stop to atom->start 1478 * 4/ create a new state 1479 * 5/ generate a counted transition from atom->stop to 1480 * that state 1481 */ 1482 if (atom->min == 0) { 1483 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 1484 atom->stop); 1485 newstate = xmlRegNewState(ctxt); 1486 xmlRegStatePush(ctxt, newstate); 1487 ctxt->state = newstate; 1488 xmlFAGenerateEpsilonTransition(ctxt, atom->start, 1489 newstate); 1490 } 1491 counter = xmlRegGetCounter(ctxt); 1492 ctxt->counters[counter].min = atom->min - 1; 1493 ctxt->counters[counter].max = atom->max - 1; 1494 atom->min = 0; 1495 atom->max = 0; 1496 atom->quant = XML_REGEXP_QUANT_ONCE; 1497 xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, 1498 atom->start, counter); 1499 if (to != NULL) { 1500 newstate = to; 1501 } else { 1502 newstate = xmlRegNewState(ctxt); 1503 xmlRegStatePush(ctxt, newstate); 1504 ctxt->state = newstate; 1505 } 1506 xmlFAGenerateCountedTransition(ctxt, atom->stop, 1507 newstate, counter); 1508 } 1509 default: 1510 break; 1511 } 1512 return(0); 1513 } else if ((atom->min == 0) && (atom->max == 0) && 1514 (atom->quant == XML_REGEXP_QUANT_RANGE)) { 1515 /* 1516 * we can discard the atom and generate an epsilon transition instead 1517 */ 1518 if (to == NULL) { 1519 to = xmlRegNewState(ctxt); 1520 if (to != NULL) 1521 xmlRegStatePush(ctxt, to); 1522 else { 1523 return(-1); 1524 } 1525 } 1526 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1527 ctxt->state = to; 1528 xmlRegFreeAtom(atom); 1529 return(0); 1530 } else { 1531 if (to == NULL) { 1532 to = xmlRegNewState(ctxt); 1533 if (to != NULL) 1534 xmlRegStatePush(ctxt, to); 1535 else { 1536 return(-1); 1537 } 1538 } 1539 if (xmlRegAtomPush(ctxt, atom) < 0) { 1540 return(-1); 1541 } 1542 xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1, 0); 1543 ctxt->state = to; 1544 } 1545 switch (atom->quant) { 1546 case XML_REGEXP_QUANT_OPT: 1547 atom->quant = XML_REGEXP_QUANT_ONCE; 1548 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1549 break; 1550 case XML_REGEXP_QUANT_MULT: 1551 atom->quant = XML_REGEXP_QUANT_ONCE; 1552 xmlFAGenerateEpsilonTransition(ctxt, from, to); 1553 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0); 1554 break; 1555 case XML_REGEXP_QUANT_PLUS: 1556 atom->quant = XML_REGEXP_QUANT_ONCE; 1557 xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0); 1558 break; 1559 default: 1560 break; 1561 } 1562 return(0); 1563} 1564 1565/** 1566 * xmlFAReduceEpsilonTransitions: 1567 * @ctxt: a regexp parser context 1568 * @fromnr: the from state 1569 * @tonr: the to state 1570 * @counter: should that transition be associated to a counted 1571 * 1572 */ 1573static void 1574xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, 1575 int tonr, int counter) { 1576 int transnr; 1577 xmlRegStatePtr from; 1578 xmlRegStatePtr to; 1579 1580#ifdef DEBUG_REGEXP_GRAPH 1581 printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); 1582#endif 1583 from = ctxt->states[fromnr]; 1584 if (from == NULL) 1585 return; 1586 to = ctxt->states[tonr]; 1587 if (to == NULL) 1588 return; 1589 if ((to->mark == XML_REGEXP_MARK_START) || 1590 (to->mark == XML_REGEXP_MARK_VISITED)) 1591 return; 1592 1593 to->mark = XML_REGEXP_MARK_VISITED; 1594 if (to->type == XML_REGEXP_FINAL_STATE) { 1595#ifdef DEBUG_REGEXP_GRAPH 1596 printf("State %d is final, so %d becomes final\n", tonr, fromnr); 1597#endif 1598 from->type = XML_REGEXP_FINAL_STATE; 1599 } 1600 for (transnr = 0;transnr < to->nbTrans;transnr++) { 1601 if (to->trans[transnr].to < 0) 1602 continue; 1603 if (to->trans[transnr].atom == NULL) { 1604 /* 1605 * Don't remove counted transitions 1606 * Don't loop either 1607 */ 1608 if (to->trans[transnr].to != fromnr) { 1609 if (to->trans[transnr].count >= 0) { 1610 int newto = to->trans[transnr].to; 1611 1612 xmlRegStateAddTrans(ctxt, from, NULL, 1613 ctxt->states[newto], 1614 -1, to->trans[transnr].count, 0); 1615 } else { 1616#ifdef DEBUG_REGEXP_GRAPH 1617 printf("Found epsilon trans %d from %d to %d\n", 1618 transnr, tonr, to->trans[transnr].to); 1619#endif 1620 if (to->trans[transnr].counter >= 0) { 1621 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1622 to->trans[transnr].to, 1623 to->trans[transnr].counter); 1624 } else { 1625 xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1626 to->trans[transnr].to, 1627 counter); 1628 } 1629 } 1630 } 1631 } else { 1632 int newto = to->trans[transnr].to; 1633 1634 if (to->trans[transnr].counter >= 0) { 1635 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1636 ctxt->states[newto], 1637 to->trans[transnr].counter, -1, 1); 1638 } else { 1639 xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1640 ctxt->states[newto], counter, -1, 1); 1641 } 1642 } 1643 } 1644 to->mark = XML_REGEXP_MARK_NORMAL; 1645} 1646 1647/** 1648 * xmlFAEliminateSimpleEpsilonTransitions: 1649 * @ctxt: a regexp parser context 1650 * 1651 * Eliminating general epsilon transitions can get costly in the general 1652 * algorithm due to the large amount of generated new transitions and 1653 * associated comparisons. However for simple epsilon transition used just 1654 * to separate building blocks when generating the automata this can be 1655 * reduced to state elimination: 1656 * - if there exists an epsilon from X to Y 1657 * - if there is no other transition from X 1658 * then X and Y are semantically equivalent and X can be eliminated 1659 * If X is the start state then make Y the start state, else replace the 1660 * target of all transitions to X by transitions to Y. 1661 */ 1662static void 1663xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 1664 int statenr, i, j, newto; 1665 xmlRegStatePtr state, tmp; 1666 1667 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1668 state = ctxt->states[statenr]; 1669 if (state == NULL) 1670 continue; 1671 if (state->nbTrans != 1) 1672 continue; 1673 /* is the only transition out a basic transition */ 1674 if ((state->trans[0].atom == NULL) && 1675 (state->trans[0].to >= 0) && 1676 (state->trans[0].to != statenr) && 1677 (state->trans[0].counter < 0) && 1678 (state->trans[0].count < 0)) { 1679 newto = state->trans[0].to; 1680 1681 if (state->type == XML_REGEXP_START_STATE) { 1682#ifdef DEBUG_REGEXP_GRAPH 1683 printf("Found simple epsilon trans from start %d to %d\n", 1684 statenr, newto); 1685#endif 1686 } else { 1687#ifdef DEBUG_REGEXP_GRAPH 1688 printf("Found simple epsilon trans from %d to %d\n", 1689 statenr, newto); 1690#endif 1691 for (i = 0;i < state->nbTransTo;i++) { 1692 tmp = ctxt->states[state->transTo[i]]; 1693 for (j = 0;j < tmp->nbTrans;j++) { 1694 if (tmp->trans[j].to == statenr) { 1695 tmp->trans[j].to = newto; 1696#ifdef DEBUG_REGEXP_GRAPH 1697 printf("Changed transition %d on %d to go to %d\n", 1698 j, tmp->no, newto); 1699#endif 1700 xmlRegStateAddTransTo(ctxt, ctxt->states[newto], 1701 tmp->no); 1702 } 1703 } 1704 } 1705#if 0 1706 for (i = 0;i < ctxt->nbStates;i++) { 1707 tmp = ctxt->states[i]; 1708 for (j = 0;j < tmp->nbTrans;j++) { 1709 if (tmp->trans[j].to == statenr) { 1710 tmp->trans[j].to = newto; 1711#ifdef DEBUG_REGEXP_GRAPH 1712 printf("Changed transition %d on %d to go to %d\n", 1713 j, tmp->no, newto); 1714#endif 1715 } 1716 } 1717 } 1718#endif 1719 if (state->type == XML_REGEXP_FINAL_STATE) 1720 ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; 1721 /* eliminate the transition completely */ 1722 state->nbTrans = 0; 1723 1724 1725 } 1726 1727 } 1728 } 1729} 1730/** 1731 * xmlFAEliminateEpsilonTransitions: 1732 * @ctxt: a regexp parser context 1733 * 1734 */ 1735static void 1736xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 1737 int statenr, transnr; 1738 xmlRegStatePtr state; 1739 int has_epsilon; 1740 1741 if (ctxt->states == NULL) return; 1742 1743 xmlFAEliminateSimpleEpsilonTransitions(ctxt); 1744 1745 has_epsilon = 0; 1746 1747 /* 1748 * build the completed transitions bypassing the epsilons 1749 * Use a marking algorithm to avoid loops 1750 * mark sink states too. 1751 */ 1752 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1753 state = ctxt->states[statenr]; 1754 if (state == NULL) 1755 continue; 1756 if ((state->nbTrans == 0) && 1757 (state->type != XML_REGEXP_FINAL_STATE)) { 1758 state->type = XML_REGEXP_SINK_STATE; 1759 } 1760 for (transnr = 0;transnr < state->nbTrans;transnr++) { 1761 if ((state->trans[transnr].atom == NULL) && 1762 (state->trans[transnr].to >= 0)) { 1763 if (state->trans[transnr].to == statenr) { 1764 state->trans[transnr].to = -1; 1765#ifdef DEBUG_REGEXP_GRAPH 1766 printf("Removed loopback epsilon trans %d on %d\n", 1767 transnr, statenr); 1768#endif 1769 } else if (state->trans[transnr].count < 0) { 1770 int newto = state->trans[transnr].to; 1771 1772#ifdef DEBUG_REGEXP_GRAPH 1773 printf("Found epsilon trans %d from %d to %d\n", 1774 transnr, statenr, newto); 1775#endif 1776 state->mark = XML_REGEXP_MARK_START; 1777 has_epsilon = 1; 1778 xmlFAReduceEpsilonTransitions(ctxt, statenr, 1779 newto, state->trans[transnr].counter); 1780 state->mark = XML_REGEXP_MARK_NORMAL; 1781#ifdef DEBUG_REGEXP_GRAPH 1782 } else { 1783 printf("Found counted transition %d on %d\n", 1784 transnr, statenr); 1785#endif 1786 } 1787 } 1788 } 1789 } 1790 /* 1791 * Eliminate the epsilon transitions 1792 */ 1793 if (has_epsilon) { 1794 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1795 state = ctxt->states[statenr]; 1796 if (state == NULL) 1797 continue; 1798 for (transnr = 0;transnr < state->nbTrans;transnr++) { 1799 xmlRegTransPtr trans = &(state->trans[transnr]); 1800 if ((trans->atom == NULL) && 1801 (trans->count < 0) && 1802 (trans->to >= 0)) { 1803 trans->to = -1; 1804 } 1805 } 1806 } 1807 } 1808 1809 /* 1810 * Use this pass to detect unreachable states too 1811 */ 1812 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1813 state = ctxt->states[statenr]; 1814 if (state != NULL) 1815 state->reached = XML_REGEXP_MARK_NORMAL; 1816 } 1817 state = ctxt->states[0]; 1818 if (state != NULL) 1819 state->reached = XML_REGEXP_MARK_START; 1820 while (state != NULL) { 1821 xmlRegStatePtr target = NULL; 1822 state->reached = XML_REGEXP_MARK_VISITED; 1823 /* 1824 * Mark all states reachable from the current reachable state 1825 */ 1826 for (transnr = 0;transnr < state->nbTrans;transnr++) { 1827 if ((state->trans[transnr].to >= 0) && 1828 ((state->trans[transnr].atom != NULL) || 1829 (state->trans[transnr].count >= 0))) { 1830 int newto = state->trans[transnr].to; 1831 1832 if (ctxt->states[newto] == NULL) 1833 continue; 1834 if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { 1835 ctxt->states[newto]->reached = XML_REGEXP_MARK_START; 1836 target = ctxt->states[newto]; 1837 } 1838 } 1839 } 1840 1841 /* 1842 * find the next accessible state not explored 1843 */ 1844 if (target == NULL) { 1845 for (statenr = 1;statenr < ctxt->nbStates;statenr++) { 1846 state = ctxt->states[statenr]; 1847 if ((state != NULL) && (state->reached == 1848 XML_REGEXP_MARK_START)) { 1849 target = state; 1850 break; 1851 } 1852 } 1853 } 1854 state = target; 1855 } 1856 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1857 state = ctxt->states[statenr]; 1858 if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) { 1859#ifdef DEBUG_REGEXP_GRAPH 1860 printf("Removed unreachable state %d\n", statenr); 1861#endif 1862 xmlRegFreeState(state); 1863 ctxt->states[statenr] = NULL; 1864 } 1865 } 1866 1867} 1868 1869/** 1870 * xmlFACompareAtoms: 1871 * @atom1: an atom 1872 * @atom2: an atom 1873 * 1874 * Compares two atoms to check whether they are equivalents 1875 * 1876 * Returns 1 if yes and 0 otherwise 1877 */ 1878static int 1879xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) { 1880 int ret; 1881 1882 if (atom1 == atom2) 1883 return(1); 1884 if ((atom1 == NULL) || (atom2 == NULL)) 1885 return(0); 1886 1887 if (atom1->type != atom2->type) 1888 return(0); 1889 switch (atom1->type) { 1890 case XML_REGEXP_STRING: 1891 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep, 1892 (xmlChar *)atom2->valuep); 1893 break; 1894 case XML_REGEXP_EPSILON: 1895 return(1); 1896 case XML_REGEXP_CHARVAL: 1897 ret = atom1->codepoint == atom2->codepoint; 1898 break; 1899 case XML_REGEXP_RANGES: 1900 TODO; 1901 return(0); 1902 default: 1903 return(1); 1904 } 1905 if (atom1->neg != atom2->neg) 1906 ret = !ret; 1907 return(ret); 1908} 1909 1910/** 1911 * xmlFARecurseDeterminism: 1912 * @ctxt: a regexp parser context 1913 * 1914 * Check whether the associated regexp is determinist, 1915 * should be called after xmlFAEliminateEpsilonTransitions() 1916 * 1917 */ 1918static int 1919xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 1920 int to, xmlRegAtomPtr atom) { 1921 int ret = 1; 1922 int transnr; 1923 xmlRegTransPtr t1; 1924 1925 if (state == NULL) 1926 return(ret); 1927 for (transnr = 0;transnr < state->nbTrans;transnr++) { 1928 t1 = &(state->trans[transnr]); 1929 /* 1930 * check transitions conflicting with the one looked at 1931 */ 1932 if (t1->atom == NULL) { 1933 if (t1->to == -1) 1934 continue; 1935 ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 1936 to, atom); 1937 if (ret == 0) 1938 return(0); 1939 continue; 1940 } 1941 if (t1->to != to) 1942 continue; 1943 if (xmlFACompareAtoms(t1->atom, atom)) 1944 return(0); 1945 } 1946 return(ret); 1947} 1948 1949/** 1950 * xmlFAComputesDeterminism: 1951 * @ctxt: a regexp parser context 1952 * 1953 * Check whether the associated regexp is determinist, 1954 * should be called after xmlFAEliminateEpsilonTransitions() 1955 * 1956 */ 1957static int 1958xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { 1959 int statenr, transnr; 1960 xmlRegStatePtr state; 1961 xmlRegTransPtr t1, t2; 1962 int i; 1963 int ret = 1; 1964 1965#ifdef DEBUG_REGEXP_GRAPH 1966 printf("xmlFAComputesDeterminism\n"); 1967 xmlRegPrintCtxt(stdout, ctxt); 1968#endif 1969 if (ctxt->determinist != -1) 1970 return(ctxt->determinist); 1971 1972 /* 1973 * Check for all states that there aren't 2 transitions 1974 * with the same atom and a different target. 1975 */ 1976 for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1977 state = ctxt->states[statenr]; 1978 if (state == NULL) 1979 continue; 1980 for (transnr = 0;transnr < state->nbTrans;transnr++) { 1981 t1 = &(state->trans[transnr]); 1982 /* 1983 * Determinism checks in case of counted or all transitions 1984 * will have to be handled separately 1985 */ 1986 if (t1->atom == NULL) 1987 continue; 1988 if (t1->to == -1) /* eliminated */ 1989 continue; 1990 for (i = 0;i < transnr;i++) { 1991 t2 = &(state->trans[i]); 1992 if (t2->to == -1) /* eliminated */ 1993 continue; 1994 if (t2->atom != NULL) { 1995 if (t1->to == t2->to) { 1996 if (xmlFACompareAtoms(t1->atom, t2->atom)) 1997 t2->to = -1; /* eliminated */ 1998 } else { 1999 /* not determinist ! */ 2000 if (xmlFACompareAtoms(t1->atom, t2->atom)) 2001 ret = 0; 2002 } 2003 } else if (t1->to != -1) { 2004 /* 2005 * do the closure in case of remaining specific 2006 * epsilon transitions like choices or all 2007 */ 2008 ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 2009 t2->to, t2->atom); 2010 if (ret == 0) 2011 return(0); 2012 } 2013 } 2014 if (ret == 0) 2015 break; 2016 } 2017 if (ret == 0) 2018 break; 2019 } 2020 ctxt->determinist = ret; 2021 return(ret); 2022} 2023 2024/************************************************************************ 2025 * * 2026 * Routines to check input against transition atoms * 2027 * * 2028 ************************************************************************/ 2029 2030static int 2031xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, 2032 int start, int end, const xmlChar *blockName) { 2033 int ret = 0; 2034 2035 switch (type) { 2036 case XML_REGEXP_STRING: 2037 case XML_REGEXP_SUBREG: 2038 case XML_REGEXP_RANGES: 2039 case XML_REGEXP_EPSILON: 2040 return(-1); 2041 case XML_REGEXP_ANYCHAR: 2042 ret = ((codepoint != '\n') && (codepoint != '\r')); 2043 break; 2044 case XML_REGEXP_CHARVAL: 2045 ret = ((codepoint >= start) && (codepoint <= end)); 2046 break; 2047 case XML_REGEXP_NOTSPACE: 2048 neg = !neg; 2049 case XML_REGEXP_ANYSPACE: 2050 ret = ((codepoint == '\n') || (codepoint == '\r') || 2051 (codepoint == '\t') || (codepoint == ' ')); 2052 break; 2053 case XML_REGEXP_NOTINITNAME: 2054 neg = !neg; 2055 case XML_REGEXP_INITNAME: 2056 ret = (IS_LETTER(codepoint) || 2057 (codepoint == '_') || (codepoint == ':')); 2058 break; 2059 case XML_REGEXP_NOTNAMECHAR: 2060 neg = !neg; 2061 case XML_REGEXP_NAMECHAR: 2062 ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || 2063 (codepoint == '.') || (codepoint == '-') || 2064 (codepoint == '_') || (codepoint == ':') || 2065 IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); 2066 break; 2067 case XML_REGEXP_NOTDECIMAL: 2068 neg = !neg; 2069 case XML_REGEXP_DECIMAL: 2070 ret = xmlUCSIsCatNd(codepoint); 2071 break; 2072 case XML_REGEXP_REALCHAR: 2073 neg = !neg; 2074 case XML_REGEXP_NOTREALCHAR: 2075 ret = xmlUCSIsCatP(codepoint); 2076 if (ret == 0) 2077 ret = xmlUCSIsCatZ(codepoint); 2078 if (ret == 0) 2079 ret = xmlUCSIsCatC(codepoint); 2080 break; 2081 case XML_REGEXP_LETTER: 2082 ret = xmlUCSIsCatL(codepoint); 2083 break; 2084 case XML_REGEXP_LETTER_UPPERCASE: 2085 ret = xmlUCSIsCatLu(codepoint); 2086 break; 2087 case XML_REGEXP_LETTER_LOWERCASE: 2088 ret = xmlUCSIsCatLl(codepoint); 2089 break; 2090 case XML_REGEXP_LETTER_TITLECASE: 2091 ret = xmlUCSIsCatLt(codepoint); 2092 break; 2093 case XML_REGEXP_LETTER_MODIFIER: 2094 ret = xmlUCSIsCatLm(codepoint); 2095 break; 2096 case XML_REGEXP_LETTER_OTHERS: 2097 ret = xmlUCSIsCatLo(codepoint); 2098 break; 2099 case XML_REGEXP_MARK: 2100 ret = xmlUCSIsCatM(codepoint); 2101 break; 2102 case XML_REGEXP_MARK_NONSPACING: 2103 ret = xmlUCSIsCatMn(codepoint); 2104 break; 2105 case XML_REGEXP_MARK_SPACECOMBINING: 2106 ret = xmlUCSIsCatMc(codepoint); 2107 break; 2108 case XML_REGEXP_MARK_ENCLOSING: 2109 ret = xmlUCSIsCatMe(codepoint); 2110 break; 2111 case XML_REGEXP_NUMBER: 2112 ret = xmlUCSIsCatN(codepoint); 2113 break; 2114 case XML_REGEXP_NUMBER_DECIMAL: 2115 ret = xmlUCSIsCatNd(codepoint); 2116 break; 2117 case XML_REGEXP_NUMBER_LETTER: 2118 ret = xmlUCSIsCatNl(codepoint); 2119 break; 2120 case XML_REGEXP_NUMBER_OTHERS: 2121 ret = xmlUCSIsCatNo(codepoint); 2122 break; 2123 case XML_REGEXP_PUNCT: 2124 ret = xmlUCSIsCatP(codepoint); 2125 break; 2126 case XML_REGEXP_PUNCT_CONNECTOR: 2127 ret = xmlUCSIsCatPc(codepoint); 2128 break; 2129 case XML_REGEXP_PUNCT_DASH: 2130 ret = xmlUCSIsCatPd(codepoint); 2131 break; 2132 case XML_REGEXP_PUNCT_OPEN: 2133 ret = xmlUCSIsCatPs(codepoint); 2134 break; 2135 case XML_REGEXP_PUNCT_CLOSE: 2136 ret = xmlUCSIsCatPe(codepoint); 2137 break; 2138 case XML_REGEXP_PUNCT_INITQUOTE: 2139 ret = xmlUCSIsCatPi(codepoint); 2140 break; 2141 case XML_REGEXP_PUNCT_FINQUOTE: 2142 ret = xmlUCSIsCatPf(codepoint); 2143 break; 2144 case XML_REGEXP_PUNCT_OTHERS: 2145 ret = xmlUCSIsCatPo(codepoint); 2146 break; 2147 case XML_REGEXP_SEPAR: 2148 ret = xmlUCSIsCatZ(codepoint); 2149 break; 2150 case XML_REGEXP_SEPAR_SPACE: 2151 ret = xmlUCSIsCatZs(codepoint); 2152 break; 2153 case XML_REGEXP_SEPAR_LINE: 2154 ret = xmlUCSIsCatZl(codepoint); 2155 break; 2156 case XML_REGEXP_SEPAR_PARA: 2157 ret = xmlUCSIsCatZp(codepoint); 2158 break; 2159 case XML_REGEXP_SYMBOL: 2160 ret = xmlUCSIsCatS(codepoint); 2161 break; 2162 case XML_REGEXP_SYMBOL_MATH: 2163 ret = xmlUCSIsCatSm(codepoint); 2164 break; 2165 case XML_REGEXP_SYMBOL_CURRENCY: 2166 ret = xmlUCSIsCatSc(codepoint); 2167 break; 2168 case XML_REGEXP_SYMBOL_MODIFIER: 2169 ret = xmlUCSIsCatSk(codepoint); 2170 break; 2171 case XML_REGEXP_SYMBOL_OTHERS: 2172 ret = xmlUCSIsCatSo(codepoint); 2173 break; 2174 case XML_REGEXP_OTHER: 2175 ret = xmlUCSIsCatC(codepoint); 2176 break; 2177 case XML_REGEXP_OTHER_CONTROL: 2178 ret = xmlUCSIsCatCc(codepoint); 2179 break; 2180 case XML_REGEXP_OTHER_FORMAT: 2181 ret = xmlUCSIsCatCf(codepoint); 2182 break; 2183 case XML_REGEXP_OTHER_PRIVATE: 2184 ret = xmlUCSIsCatCo(codepoint); 2185 break; 2186 case XML_REGEXP_OTHER_NA: 2187 /* ret = xmlUCSIsCatCn(codepoint); */ 2188 /* Seems it doesn't exist anymore in recent Unicode releases */ 2189 ret = 0; 2190 break; 2191 case XML_REGEXP_BLOCK_NAME: 2192 ret = xmlUCSIsBlock(codepoint, (const char *) blockName); 2193 break; 2194 } 2195 if (neg) 2196 return(!ret); 2197 return(ret); 2198} 2199 2200static int 2201xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { 2202 int i, ret = 0; 2203 xmlRegRangePtr range; 2204 2205 if ((atom == NULL) || (!IS_CHAR(codepoint))) 2206 return(-1); 2207 2208 switch (atom->type) { 2209 case XML_REGEXP_SUBREG: 2210 case XML_REGEXP_EPSILON: 2211 return(-1); 2212 case XML_REGEXP_CHARVAL: 2213 return(codepoint == atom->codepoint); 2214 case XML_REGEXP_RANGES: { 2215 int accept = 0; 2216 2217 for (i = 0;i < atom->nbRanges;i++) { 2218 range = atom->ranges[i]; 2219 if (range->neg == 2) { 2220 ret = xmlRegCheckCharacterRange(range->type, codepoint, 2221 0, range->start, range->end, 2222 range->blockName); 2223 if (ret != 0) 2224 return(0); /* excluded char */ 2225 } else if (range->neg) { 2226 ret = xmlRegCheckCharacterRange(range->type, codepoint, 2227 0, range->start, range->end, 2228 range->blockName); 2229 if (ret == 0) 2230 accept = 1; 2231 else 2232 return(0); 2233 } else { 2234 ret = xmlRegCheckCharacterRange(range->type, codepoint, 2235 0, range->start, range->end, 2236 range->blockName); 2237 if (ret != 0) 2238 accept = 1; /* might still be excluded */ 2239 } 2240 } 2241 return(accept); 2242 } 2243 case XML_REGEXP_STRING: 2244 printf("TODO: XML_REGEXP_STRING\n"); 2245 return(-1); 2246 case XML_REGEXP_ANYCHAR: 2247 case XML_REGEXP_ANYSPACE: 2248 case XML_REGEXP_NOTSPACE: 2249 case XML_REGEXP_INITNAME: 2250 case XML_REGEXP_NOTINITNAME: 2251 case XML_REGEXP_NAMECHAR: 2252 case XML_REGEXP_NOTNAMECHAR: 2253 case XML_REGEXP_DECIMAL: 2254 case XML_REGEXP_NOTDECIMAL: 2255 case XML_REGEXP_REALCHAR: 2256 case XML_REGEXP_NOTREALCHAR: 2257 case XML_REGEXP_LETTER: 2258 case XML_REGEXP_LETTER_UPPERCASE: 2259 case XML_REGEXP_LETTER_LOWERCASE: 2260 case XML_REGEXP_LETTER_TITLECASE: 2261 case XML_REGEXP_LETTER_MODIFIER: 2262 case XML_REGEXP_LETTER_OTHERS: 2263 case XML_REGEXP_MARK: 2264 case XML_REGEXP_MARK_NONSPACING: 2265 case XML_REGEXP_MARK_SPACECOMBINING: 2266 case XML_REGEXP_MARK_ENCLOSING: 2267 case XML_REGEXP_NUMBER: 2268 case XML_REGEXP_NUMBER_DECIMAL: 2269 case XML_REGEXP_NUMBER_LETTER: 2270 case XML_REGEXP_NUMBER_OTHERS: 2271 case XML_REGEXP_PUNCT: 2272 case XML_REGEXP_PUNCT_CONNECTOR: 2273 case XML_REGEXP_PUNCT_DASH: 2274 case XML_REGEXP_PUNCT_OPEN: 2275 case XML_REGEXP_PUNCT_CLOSE: 2276 case XML_REGEXP_PUNCT_INITQUOTE: 2277 case XML_REGEXP_PUNCT_FINQUOTE: 2278 case XML_REGEXP_PUNCT_OTHERS: 2279 case XML_REGEXP_SEPAR: 2280 case XML_REGEXP_SEPAR_SPACE: 2281 case XML_REGEXP_SEPAR_LINE: 2282 case XML_REGEXP_SEPAR_PARA: 2283 case XML_REGEXP_SYMBOL: 2284 case XML_REGEXP_SYMBOL_MATH: 2285 case XML_REGEXP_SYMBOL_CURRENCY: 2286 case XML_REGEXP_SYMBOL_MODIFIER: 2287 case XML_REGEXP_SYMBOL_OTHERS: 2288 case XML_REGEXP_OTHER: 2289 case XML_REGEXP_OTHER_CONTROL: 2290 case XML_REGEXP_OTHER_FORMAT: 2291 case XML_REGEXP_OTHER_PRIVATE: 2292 case XML_REGEXP_OTHER_NA: 2293 case XML_REGEXP_BLOCK_NAME: 2294 ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, 2295 (const xmlChar *)atom->valuep); 2296 if (atom->neg) 2297 ret = !ret; 2298 break; 2299 } 2300 return(ret); 2301} 2302 2303/************************************************************************ 2304 * * 2305 * Saving and restoring state of an execution context * 2306 * * 2307 ************************************************************************/ 2308 2309#ifdef DEBUG_REGEXP_EXEC 2310static void 2311xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { 2312 printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); 2313 if (exec->inputStack != NULL) { 2314 int i; 2315 printf(": "); 2316 for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) 2317 printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); 2318 } else { 2319 printf(": %s", &(exec->inputString[exec->index])); 2320 } 2321 printf("\n"); 2322} 2323#endif 2324 2325static void 2326xmlFARegExecSave(xmlRegExecCtxtPtr exec) { 2327#ifdef DEBUG_REGEXP_EXEC 2328 printf("saving "); 2329 exec->transno++; 2330 xmlFARegDebugExec(exec); 2331 exec->transno--; 2332#endif 2333 2334 if (exec->maxRollbacks == 0) { 2335 exec->maxRollbacks = 4; 2336 exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * 2337 sizeof(xmlRegExecRollback)); 2338 if (exec->rollbacks == NULL) { 2339 xmlRegexpErrMemory(NULL, "saving regexp"); 2340 exec->maxRollbacks = 0; 2341 return; 2342 } 2343 memset(exec->rollbacks, 0, 2344 exec->maxRollbacks * sizeof(xmlRegExecRollback)); 2345 } else if (exec->nbRollbacks >= exec->maxRollbacks) { 2346 xmlRegExecRollback *tmp; 2347 int len = exec->maxRollbacks; 2348 2349 exec->maxRollbacks *= 2; 2350 tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, 2351 exec->maxRollbacks * sizeof(xmlRegExecRollback)); 2352 if (tmp == NULL) { 2353 xmlRegexpErrMemory(NULL, "saving regexp"); 2354 exec->maxRollbacks /= 2; 2355 return; 2356 } 2357 exec->rollbacks = tmp; 2358 tmp = &exec->rollbacks[len]; 2359 memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); 2360 } 2361 exec->rollbacks[exec->nbRollbacks].state = exec->state; 2362 exec->rollbacks[exec->nbRollbacks].index = exec->index; 2363 exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; 2364 if (exec->comp->nbCounters > 0) { 2365 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 2366 exec->rollbacks[exec->nbRollbacks].counts = (int *) 2367 xmlMalloc(exec->comp->nbCounters * sizeof(int)); 2368 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 2369 xmlRegexpErrMemory(NULL, "saving regexp"); 2370 exec->status = -5; 2371 return; 2372 } 2373 } 2374 memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, 2375 exec->comp->nbCounters * sizeof(int)); 2376 } 2377 exec->nbRollbacks++; 2378} 2379 2380static void 2381xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { 2382 if (exec->nbRollbacks <= 0) { 2383 exec->status = -1; 2384#ifdef DEBUG_REGEXP_EXEC 2385 printf("rollback failed on empty stack\n"); 2386#endif 2387 return; 2388 } 2389 exec->nbRollbacks--; 2390 exec->state = exec->rollbacks[exec->nbRollbacks].state; 2391 exec->index = exec->rollbacks[exec->nbRollbacks].index; 2392 exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; 2393 if (exec->comp->nbCounters > 0) { 2394 if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 2395 fprintf(stderr, "exec save: allocation failed"); 2396 exec->status = -6; 2397 return; 2398 } 2399 memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, 2400 exec->comp->nbCounters * sizeof(int)); 2401 } 2402 2403#ifdef DEBUG_REGEXP_EXEC 2404 printf("restored "); 2405 xmlFARegDebugExec(exec); 2406#endif 2407} 2408 2409/************************************************************************ 2410 * * 2411 * Verifier, running an input against a compiled regexp * 2412 * * 2413 ************************************************************************/ 2414 2415static int 2416xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { 2417 xmlRegExecCtxt execval; 2418 xmlRegExecCtxtPtr exec = &execval; 2419 int ret, codepoint = 0, len; 2420 2421 exec->inputString = content; 2422 exec->index = 0; 2423 exec->determinist = 1; 2424 exec->maxRollbacks = 0; 2425 exec->nbRollbacks = 0; 2426 exec->rollbacks = NULL; 2427 exec->status = 0; 2428 exec->comp = comp; 2429 exec->state = comp->states[0]; 2430 exec->transno = 0; 2431 exec->transcount = 0; 2432 exec->inputStack = NULL; 2433 exec->inputStackMax = 0; 2434 if (comp->nbCounters > 0) { 2435 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); 2436 if (exec->counts == NULL) { 2437 xmlRegexpErrMemory(NULL, "running regexp"); 2438 return(-1); 2439 } 2440 memset(exec->counts, 0, comp->nbCounters * sizeof(int)); 2441 } else 2442 exec->counts = NULL; 2443 while ((exec->status == 0) && 2444 ((exec->inputString[exec->index] != 0) || 2445 (exec->state->type != XML_REGEXP_FINAL_STATE))) { 2446 xmlRegTransPtr trans; 2447 xmlRegAtomPtr atom; 2448 2449 /* 2450 * If end of input on non-terminal state, rollback, however we may 2451 * still have epsilon like transition for counted transitions 2452 * on counters, in that case don't break too early. Additionally, 2453 * if we are working on a range like "AB{0,2}", where B is not present, 2454 * we don't want to break. 2455 */ 2456 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { 2457 /* 2458 * if there is a transition, we must check if 2459 * atom allows minOccurs of 0 2460 */ 2461 if (exec->transno < exec->state->nbTrans) { 2462 trans = &exec->state->trans[exec->transno]; 2463 if (trans->to >=0) { 2464 atom = trans->atom; 2465 if (!((atom->min == 0) && (atom->max > 0))) 2466 goto rollback; 2467 } 2468 } else 2469 goto rollback; 2470 } 2471 2472 exec->transcount = 0; 2473 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 2474 trans = &exec->state->trans[exec->transno]; 2475 if (trans->to < 0) 2476 continue; 2477 atom = trans->atom; 2478 ret = 0; 2479 if (trans->count >= 0) { 2480 int count; 2481 xmlRegCounterPtr counter; 2482 2483 /* 2484 * A counted transition. 2485 */ 2486 2487 count = exec->counts[trans->count]; 2488 counter = &exec->comp->counters[trans->count]; 2489#ifdef DEBUG_REGEXP_EXEC 2490 printf("testing count %d: val %d, min %d, max %d\n", 2491 trans->count, count, counter->min, counter->max); 2492#endif 2493 ret = ((count >= counter->min) && (count <= counter->max)); 2494 } else if (atom == NULL) { 2495 fprintf(stderr, "epsilon transition left at runtime\n"); 2496 exec->status = -2; 2497 break; 2498 } else if (exec->inputString[exec->index] != 0) { 2499 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 2500 ret = xmlRegCheckCharacter(atom, codepoint); 2501 if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { 2502 xmlRegStatePtr to = comp->states[trans->to]; 2503 2504 /* 2505 * this is a multiple input sequence 2506 */ 2507 if (exec->state->nbTrans > exec->transno + 1) { 2508 xmlFARegExecSave(exec); 2509 } 2510 exec->transcount = 1; 2511 do { 2512 /* 2513 * Try to progress as much as possible on the input 2514 */ 2515 if (exec->transcount == atom->max) { 2516 break; 2517 } 2518 exec->index += len; 2519 /* 2520 * End of input: stop here 2521 */ 2522 if (exec->inputString[exec->index] == 0) { 2523 exec->index -= len; 2524 break; 2525 } 2526 if (exec->transcount >= atom->min) { 2527 int transno = exec->transno; 2528 xmlRegStatePtr state = exec->state; 2529 2530 /* 2531 * The transition is acceptable save it 2532 */ 2533 exec->transno = -1; /* trick */ 2534 exec->state = to; 2535 xmlFARegExecSave(exec); 2536 exec->transno = transno; 2537 exec->state = state; 2538 } 2539 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 2540 len); 2541 ret = xmlRegCheckCharacter(atom, codepoint); 2542 exec->transcount++; 2543 } while (ret == 1); 2544 if (exec->transcount < atom->min) 2545 ret = 0; 2546 2547 /* 2548 * If the last check failed but one transition was found 2549 * possible, rollback 2550 */ 2551 if (ret < 0) 2552 ret = 0; 2553 if (ret == 0) { 2554 goto rollback; 2555 } 2556 } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) { 2557 /* 2558 * we don't match on the codepoint, but minOccurs of 0 2559 * says that's ok. Setting len to 0 inhibits stepping 2560 * over the codepoint. 2561 */ 2562 exec->transcount = 1; 2563 len = 0; 2564 ret = 1; 2565 } 2566 } else if ((atom->min == 0) && (atom->max > 0)) { 2567 /* another spot to match when minOccurs is 0 */ 2568 exec->transcount = 1; 2569 len = 0; 2570 ret = 1; 2571 } 2572 if (ret == 1) { 2573 if (exec->state->nbTrans > exec->transno + 1) { 2574 xmlFARegExecSave(exec); 2575 } 2576 if (trans->counter >= 0) { 2577#ifdef DEBUG_REGEXP_EXEC 2578 printf("Increasing count %d\n", trans->counter); 2579#endif 2580 exec->counts[trans->counter]++; 2581 } 2582#ifdef DEBUG_REGEXP_EXEC 2583 printf("entering state %d\n", trans->to); 2584#endif 2585 exec->state = comp->states[trans->to]; 2586 exec->transno = 0; 2587 if (trans->atom != NULL) { 2588 exec->index += len; 2589 } 2590 goto progress; 2591 } else if (ret < 0) { 2592 exec->status = -4; 2593 break; 2594 } 2595 } 2596 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 2597rollback: 2598 /* 2599 * Failed to find a way out 2600 */ 2601 exec->determinist = 0; 2602 xmlFARegExecRollBack(exec); 2603 } 2604progress: 2605 continue; 2606 } 2607 if (exec->rollbacks != NULL) { 2608 if (exec->counts != NULL) { 2609 int i; 2610 2611 for (i = 0;i < exec->maxRollbacks;i++) 2612 if (exec->rollbacks[i].counts != NULL) 2613 xmlFree(exec->rollbacks[i].counts); 2614 } 2615 xmlFree(exec->rollbacks); 2616 } 2617 if (exec->counts != NULL) 2618 xmlFree(exec->counts); 2619 if (exec->status == 0) 2620 return(1); 2621 if (exec->status == -1) 2622 return(0); 2623 return(exec->status); 2624} 2625 2626/************************************************************************ 2627 * * 2628 * Progressive interface to the verifier one atom at a time * 2629 * * 2630 ************************************************************************/ 2631#ifdef DEBUG_ERR 2632static void testerr(xmlRegExecCtxtPtr exec); 2633#endif 2634 2635/** 2636 * xmlRegNewExecCtxt: 2637 * @comp: a precompiled regular expression 2638 * @callback: a callback function used for handling progresses in the 2639 * automata matching phase 2640 * @data: the context data associated to the callback in this context 2641 * 2642 * Build a context used for progressive evaluation of a regexp. 2643 * 2644 * Returns the new context 2645 */ 2646xmlRegExecCtxtPtr 2647xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { 2648 xmlRegExecCtxtPtr exec; 2649 2650 if (comp == NULL) 2651 return(NULL); 2652 if ((comp->compact == NULL) && (comp->states == NULL)) 2653 return(NULL); 2654 exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); 2655 if (exec == NULL) { 2656 xmlRegexpErrMemory(NULL, "creating execution context"); 2657 return(NULL); 2658 } 2659 memset(exec, 0, sizeof(xmlRegExecCtxt)); 2660 exec->inputString = NULL; 2661 exec->index = 0; 2662 exec->determinist = 1; 2663 exec->maxRollbacks = 0; 2664 exec->nbRollbacks = 0; 2665 exec->rollbacks = NULL; 2666 exec->status = 0; 2667 exec->comp = comp; 2668 if (comp->compact == NULL) 2669 exec->state = comp->states[0]; 2670 exec->transno = 0; 2671 exec->transcount = 0; 2672 exec->callback = callback; 2673 exec->data = data; 2674 if (comp->nbCounters > 0) { 2675 /* 2676 * For error handling, exec->counts is allocated twice the size 2677 * the second half is used to store the data in case of rollback 2678 */ 2679 exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int) 2680 * 2); 2681 if (exec->counts == NULL) { 2682 xmlRegexpErrMemory(NULL, "creating execution context"); 2683 xmlFree(exec); 2684 return(NULL); 2685 } 2686 memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2); 2687 exec->errCounts = &exec->counts[comp->nbCounters]; 2688 } else { 2689 exec->counts = NULL; 2690 exec->errCounts = NULL; 2691 } 2692 exec->inputStackMax = 0; 2693 exec->inputStackNr = 0; 2694 exec->inputStack = NULL; 2695 exec->errStateNo = -1; 2696 exec->errString = NULL; 2697 return(exec); 2698} 2699 2700/** 2701 * xmlRegFreeExecCtxt: 2702 * @exec: a regular expression evaulation context 2703 * 2704 * Free the structures associated to a regular expression evaulation context. 2705 */ 2706void 2707xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { 2708 if (exec == NULL) 2709 return; 2710 2711 if (exec->rollbacks != NULL) { 2712 if (exec->counts != NULL) { 2713 int i; 2714 2715 for (i = 0;i < exec->maxRollbacks;i++) 2716 if (exec->rollbacks[i].counts != NULL) 2717 xmlFree(exec->rollbacks[i].counts); 2718 } 2719 xmlFree(exec->rollbacks); 2720 } 2721 if (exec->counts != NULL) 2722 xmlFree(exec->counts); 2723 if (exec->inputStack != NULL) { 2724 int i; 2725 2726 for (i = 0;i < exec->inputStackNr;i++) { 2727 if (exec->inputStack[i].value != NULL) 2728 xmlFree(exec->inputStack[i].value); 2729 } 2730 xmlFree(exec->inputStack); 2731 } 2732 if (exec->errString != NULL) 2733 xmlFree(exec->errString); 2734 xmlFree(exec); 2735} 2736 2737static void 2738xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, 2739 void *data) { 2740#ifdef DEBUG_PUSH 2741 printf("saving value: %d:%s\n", exec->inputStackNr, value); 2742#endif 2743 if (exec->inputStackMax == 0) { 2744 exec->inputStackMax = 4; 2745 exec->inputStack = (xmlRegInputTokenPtr) 2746 xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); 2747 if (exec->inputStack == NULL) { 2748 xmlRegexpErrMemory(NULL, "pushing input string"); 2749 exec->inputStackMax = 0; 2750 return; 2751 } 2752 } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { 2753 xmlRegInputTokenPtr tmp; 2754 2755 exec->inputStackMax *= 2; 2756 tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, 2757 exec->inputStackMax * sizeof(xmlRegInputToken)); 2758 if (tmp == NULL) { 2759 xmlRegexpErrMemory(NULL, "pushing input string"); 2760 exec->inputStackMax /= 2; 2761 return; 2762 } 2763 exec->inputStack = tmp; 2764 } 2765 exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); 2766 exec->inputStack[exec->inputStackNr].data = data; 2767 exec->inputStackNr++; 2768 exec->inputStack[exec->inputStackNr].value = NULL; 2769 exec->inputStack[exec->inputStackNr].data = NULL; 2770} 2771 2772/** 2773 * xmlRegStrEqualWildcard: 2774 * @expStr: the string to be evaluated 2775 * @valStr: the validation string 2776 * 2777 * Checks if both strings are equal or have the same content. "*" 2778 * can be used as a wildcard in @valStr; "|" is used as a seperator of 2779 * substrings in both @expStr and @valStr. 2780 * 2781 * Returns 1 if the comparison is satisfied and the number of substrings 2782 * is equal, 0 otherwise. 2783 */ 2784 2785static int 2786xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { 2787 if (expStr == valStr) return(1); 2788 if (expStr == NULL) return(0); 2789 if (valStr == NULL) return(0); 2790 do { 2791 /* 2792 * Eval if we have a wildcard for the current item. 2793 */ 2794 if (*expStr != *valStr) { 2795 if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) { 2796 do { 2797 if (*valStr == XML_REG_STRING_SEPARATOR) 2798 break; 2799 valStr++; 2800 } while (*valStr != 0); 2801 continue; 2802 } else 2803 return(0); 2804 } 2805 expStr++; 2806 valStr++; 2807 } while (*valStr != 0); 2808 if (*expStr != 0) 2809 return (0); 2810 else 2811 return (1); 2812} 2813 2814/** 2815 * xmlRegCompactPushString: 2816 * @exec: a regexp execution context 2817 * @comp: the precompiled exec with a compact table 2818 * @value: a string token input 2819 * @data: data associated to the token to reuse in callbacks 2820 * 2821 * Push one input token in the execution context 2822 * 2823 * Returns: 1 if the regexp reached a final state, 0 if non-final, and 2824 * a negative value in case of error. 2825 */ 2826static int 2827xmlRegCompactPushString(xmlRegExecCtxtPtr exec, 2828 xmlRegexpPtr comp, 2829 const xmlChar *value, 2830 void *data) { 2831 int state = exec->index; 2832 int i, target; 2833 2834 if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) 2835 return(-1); 2836 2837 if (value == NULL) { 2838 /* 2839 * are we at a final state ? 2840 */ 2841 if (comp->compact[state * (comp->nbstrings + 1)] == 2842 XML_REGEXP_FINAL_STATE) 2843 return(1); 2844 return(0); 2845 } 2846 2847#ifdef DEBUG_PUSH 2848 printf("value pushed: %s\n", value); 2849#endif 2850 2851 /* 2852 * Examine all outside transitions from current state 2853 */ 2854 for (i = 0;i < comp->nbstrings;i++) { 2855 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 2856 if ((target > 0) && (target <= comp->nbstates)) { 2857 target--; /* to avoid 0 */ 2858 if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { 2859 exec->index = target; 2860 if ((exec->callback != NULL) && (comp->transdata != NULL)) { 2861 exec->callback(exec->data, value, 2862 comp->transdata[state * comp->nbstrings + i], data); 2863 } 2864#ifdef DEBUG_PUSH 2865 printf("entering state %d\n", target); 2866#endif 2867 if (comp->compact[target * (comp->nbstrings + 1)] == 2868 XML_REGEXP_SINK_STATE) 2869 goto error; 2870 2871 if (comp->compact[target * (comp->nbstrings + 1)] == 2872 XML_REGEXP_FINAL_STATE) 2873 return(1); 2874 return(0); 2875 } 2876 } 2877 } 2878 /* 2879 * Failed to find an exit transition out from current state for the 2880 * current token 2881 */ 2882#ifdef DEBUG_PUSH 2883 printf("failed to find a transition for %s on state %d\n", value, state); 2884#endif 2885error: 2886 if (exec->errString != NULL) 2887 xmlFree(exec->errString); 2888 exec->errString = xmlStrdup(value); 2889 exec->errStateNo = state; 2890 exec->status = -1; 2891#ifdef DEBUG_ERR 2892 testerr(exec); 2893#endif 2894 return(-1); 2895} 2896 2897/** 2898 * xmlRegExecPushString: 2899 * @exec: a regexp execution context or NULL to indicate the end 2900 * @value: a string token input 2901 * @data: data associated to the token to reuse in callbacks 2902 * 2903 * Push one input token in the execution context 2904 * 2905 * Returns: 1 if the regexp reached a final state, 0 if non-final, and 2906 * a negative value in case of error. 2907 */ 2908int 2909xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, 2910 void *data) { 2911 xmlRegTransPtr trans; 2912 xmlRegAtomPtr atom; 2913 int ret; 2914 int final = 0; 2915 int progress = 1; 2916 2917 if (exec == NULL) 2918 return(-1); 2919 if (exec->comp == NULL) 2920 return(-1); 2921 if (exec->status != 0) 2922 return(exec->status); 2923 2924 if (exec->comp->compact != NULL) 2925 return(xmlRegCompactPushString(exec, exec->comp, value, data)); 2926 2927 if (value == NULL) { 2928 if (exec->state->type == XML_REGEXP_FINAL_STATE) 2929 return(1); 2930 final = 1; 2931 } 2932 2933#ifdef DEBUG_PUSH 2934 printf("value pushed: %s\n", value); 2935#endif 2936 /* 2937 * If we have an active rollback stack push the new value there 2938 * and get back to where we were left 2939 */ 2940 if ((value != NULL) && (exec->inputStackNr > 0)) { 2941 xmlFARegExecSaveInputString(exec, value, data); 2942 value = exec->inputStack[exec->index].value; 2943 data = exec->inputStack[exec->index].data; 2944#ifdef DEBUG_PUSH 2945 printf("value loaded: %s\n", value); 2946#endif 2947 } 2948 2949 while ((exec->status == 0) && 2950 ((value != NULL) || 2951 ((final == 1) && 2952 (exec->state->type != XML_REGEXP_FINAL_STATE)))) { 2953 2954 /* 2955 * End of input on non-terminal state, rollback, however we may 2956 * still have epsilon like transition for counted transitions 2957 * on counters, in that case don't break too early. 2958 */ 2959 if ((value == NULL) && (exec->counts == NULL)) 2960 goto rollback; 2961 2962 exec->transcount = 0; 2963 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 2964 trans = &exec->state->trans[exec->transno]; 2965 if (trans->to < 0) 2966 continue; 2967 atom = trans->atom; 2968 ret = 0; 2969 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 2970 int i; 2971 int count; 2972 xmlRegTransPtr t; 2973 xmlRegCounterPtr counter; 2974 2975 ret = 0; 2976 2977#ifdef DEBUG_PUSH 2978 printf("testing all lax %d\n", trans->count); 2979#endif 2980 /* 2981 * Check all counted transitions from the current state 2982 */ 2983 if ((value == NULL) && (final)) { 2984 ret = 1; 2985 } else if (value != NULL) { 2986 for (i = 0;i < exec->state->nbTrans;i++) { 2987 t = &exec->state->trans[i]; 2988 if ((t->counter < 0) || (t == trans)) 2989 continue; 2990 counter = &exec->comp->counters[t->counter]; 2991 count = exec->counts[t->counter]; 2992 if ((count < counter->max) && 2993 (t->atom != NULL) && 2994 (xmlStrEqual(value, t->atom->valuep))) { 2995 ret = 0; 2996 break; 2997 } 2998 if ((count >= counter->min) && 2999 (count < counter->max) && 3000 (xmlStrEqual(value, t->atom->valuep))) { 3001 ret = 1; 3002 break; 3003 } 3004 } 3005 } 3006 } else if (trans->count == REGEXP_ALL_COUNTER) { 3007 int i; 3008 int count; 3009 xmlRegTransPtr t; 3010 xmlRegCounterPtr counter; 3011 3012 ret = 1; 3013 3014#ifdef DEBUG_PUSH 3015 printf("testing all %d\n", trans->count); 3016#endif 3017 /* 3018 * Check all counted transitions from the current state 3019 */ 3020 for (i = 0;i < exec->state->nbTrans;i++) { 3021 t = &exec->state->trans[i]; 3022 if ((t->counter < 0) || (t == trans)) 3023 continue; 3024 counter = &exec->comp->counters[t->counter]; 3025 count = exec->counts[t->counter]; 3026 if ((count < counter->min) || (count > counter->max)) { 3027 ret = 0; 3028 break; 3029 } 3030 } 3031 } else if (trans->count >= 0) { 3032 int count; 3033 xmlRegCounterPtr counter; 3034 3035 /* 3036 * A counted transition. 3037 */ 3038 3039 count = exec->counts[trans->count]; 3040 counter = &exec->comp->counters[trans->count]; 3041#ifdef DEBUG_PUSH 3042 printf("testing count %d: val %d, min %d, max %d\n", 3043 trans->count, count, counter->min, counter->max); 3044#endif 3045 ret = ((count >= counter->min) && (count <= counter->max)); 3046 } else if (atom == NULL) { 3047 fprintf(stderr, "epsilon transition left at runtime\n"); 3048 exec->status = -2; 3049 break; 3050 } else if (value != NULL) { 3051 ret = xmlRegStrEqualWildcard(atom->valuep, value); 3052 if (atom->neg) 3053 ret = !ret; 3054 if ((ret == 1) && (trans->counter >= 0)) { 3055 xmlRegCounterPtr counter; 3056 int count; 3057 3058 count = exec->counts[trans->counter]; 3059 counter = &exec->comp->counters[trans->counter]; 3060 if (count >= counter->max) 3061 ret = 0; 3062 } 3063 3064 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 3065 xmlRegStatePtr to = exec->comp->states[trans->to]; 3066 3067 /* 3068 * this is a multiple input sequence 3069 */ 3070 if (exec->state->nbTrans > exec->transno + 1) { 3071 if (exec->inputStackNr <= 0) { 3072 xmlFARegExecSaveInputString(exec, value, data); 3073 } 3074 xmlFARegExecSave(exec); 3075 } 3076 exec->transcount = 1; 3077 do { 3078 /* 3079 * Try to progress as much as possible on the input 3080 */ 3081 if (exec->transcount == atom->max) { 3082 break; 3083 } 3084 exec->index++; 3085 value = exec->inputStack[exec->index].value; 3086 data = exec->inputStack[exec->index].data; 3087#ifdef DEBUG_PUSH 3088 printf("value loaded: %s\n", value); 3089#endif 3090 3091 /* 3092 * End of input: stop here 3093 */ 3094 if (value == NULL) { 3095 exec->index --; 3096 break; 3097 } 3098 if (exec->transcount >= atom->min) { 3099 int transno = exec->transno; 3100 xmlRegStatePtr state = exec->state; 3101 3102 /* 3103 * The transition is acceptable save it 3104 */ 3105 exec->transno = -1; /* trick */ 3106 exec->state = to; 3107 if (exec->inputStackNr <= 0) { 3108 xmlFARegExecSaveInputString(exec, value, data); 3109 } 3110 xmlFARegExecSave(exec); 3111 exec->transno = transno; 3112 exec->state = state; 3113 } 3114 ret = xmlStrEqual(value, atom->valuep); 3115 exec->transcount++; 3116 } while (ret == 1); 3117 if (exec->transcount < atom->min) 3118 ret = 0; 3119 3120 /* 3121 * If the last check failed but one transition was found 3122 * possible, rollback 3123 */ 3124 if (ret < 0) 3125 ret = 0; 3126 if (ret == 0) { 3127 goto rollback; 3128 } 3129 } 3130 } 3131 if (ret == 1) { 3132 if ((exec->callback != NULL) && (atom != NULL) && 3133 (data != NULL)) { 3134 exec->callback(exec->data, atom->valuep, 3135 atom->data, data); 3136 } 3137 if (exec->state->nbTrans > exec->transno + 1) { 3138 if (exec->inputStackNr <= 0) { 3139 xmlFARegExecSaveInputString(exec, value, data); 3140 } 3141 xmlFARegExecSave(exec); 3142 } 3143 if (trans->counter >= 0) { 3144#ifdef DEBUG_PUSH 3145 printf("Increasing count %d\n", trans->counter); 3146#endif 3147 exec->counts[trans->counter]++; 3148 } 3149#ifdef DEBUG_PUSH 3150 printf("entering state %d\n", trans->to); 3151#endif 3152 if ((exec->comp->states[trans->to] != NULL) && 3153 (exec->comp->states[trans->to]->type == 3154 XML_REGEXP_SINK_STATE)) { 3155 /* 3156 * entering a sink state, save the current state as error 3157 * state. 3158 */ 3159 if (exec->errString != NULL) 3160 xmlFree(exec->errString); 3161 exec->errString = xmlStrdup(value); 3162 exec->errState = exec->state; 3163 memcpy(exec->errCounts, exec->counts, 3164 exec->comp->nbCounters * sizeof(int)); 3165 } 3166 exec->state = exec->comp->states[trans->to]; 3167 exec->transno = 0; 3168 if (trans->atom != NULL) { 3169 if (exec->inputStack != NULL) { 3170 exec->index++; 3171 if (exec->index < exec->inputStackNr) { 3172 value = exec->inputStack[exec->index].value; 3173 data = exec->inputStack[exec->index].data; 3174#ifdef DEBUG_PUSH 3175 printf("value loaded: %s\n", value); 3176#endif 3177 } else { 3178 value = NULL; 3179 data = NULL; 3180#ifdef DEBUG_PUSH 3181 printf("end of input\n"); 3182#endif 3183 } 3184 } else { 3185 value = NULL; 3186 data = NULL; 3187#ifdef DEBUG_PUSH 3188 printf("end of input\n"); 3189#endif 3190 } 3191 } 3192 goto progress; 3193 } else if (ret < 0) { 3194 exec->status = -4; 3195 break; 3196 } 3197 } 3198 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 3199rollback: 3200 /* 3201 * if we didn't yet rollback on the current input 3202 * store the current state as the error state. 3203 */ 3204 if ((progress) && (exec->state != NULL) && 3205 (exec->state->type != XML_REGEXP_SINK_STATE)) { 3206 progress = 0; 3207 if (exec->errString != NULL) 3208 xmlFree(exec->errString); 3209 exec->errString = xmlStrdup(value); 3210 exec->errState = exec->state; 3211 memcpy(exec->errCounts, exec->counts, 3212 exec->comp->nbCounters * sizeof(int)); 3213 } 3214 3215 /* 3216 * Failed to find a way out 3217 */ 3218 exec->determinist = 0; 3219 xmlFARegExecRollBack(exec); 3220 if (exec->status == 0) { 3221 value = exec->inputStack[exec->index].value; 3222 data = exec->inputStack[exec->index].data; 3223#ifdef DEBUG_PUSH 3224 printf("value loaded: %s\n", value); 3225#endif 3226 } 3227 } 3228 continue; 3229progress: 3230 progress = 1; 3231 continue; 3232 } 3233 if (exec->status == 0) { 3234 return(exec->state->type == XML_REGEXP_FINAL_STATE); 3235 } 3236#ifdef DEBUG_ERR 3237 if (exec->status < 0) { 3238 testerr(exec); 3239 } 3240#endif 3241 return(exec->status); 3242} 3243 3244/** 3245 * xmlRegExecPushString2: 3246 * @exec: a regexp execution context or NULL to indicate the end 3247 * @value: the first string token input 3248 * @value2: the second string token input 3249 * @data: data associated to the token to reuse in callbacks 3250 * 3251 * Push one input token in the execution context 3252 * 3253 * Returns: 1 if the regexp reached a final state, 0 if non-final, and 3254 * a negative value in case of error. 3255 */ 3256int 3257xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value, 3258 const xmlChar *value2, void *data) { 3259 xmlChar buf[150]; 3260 int lenn, lenp, ret; 3261 xmlChar *str; 3262 3263 if (exec == NULL) 3264 return(-1); 3265 if (exec->comp == NULL) 3266 return(-1); 3267 if (exec->status != 0) 3268 return(exec->status); 3269 3270 if (value2 == NULL) 3271 return(xmlRegExecPushString(exec, value, data)); 3272 3273 lenn = strlen((char *) value2); 3274 lenp = strlen((char *) value); 3275 3276 if (150 < lenn + lenp + 2) { 3277 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 3278 if (str == NULL) { 3279 exec->status = -1; 3280 return(-1); 3281 } 3282 } else { 3283 str = buf; 3284 } 3285 memcpy(&str[0], value, lenp); 3286 str[lenp] = XML_REG_STRING_SEPARATOR; 3287 memcpy(&str[lenp + 1], value2, lenn); 3288 str[lenn + lenp + 1] = 0; 3289 3290 if (exec->comp->compact != NULL) 3291 ret = xmlRegCompactPushString(exec, exec->comp, str, data); 3292 else 3293 ret = xmlRegExecPushString(exec, str, data); 3294 3295 if (str != buf) 3296 xmlFree(buf); 3297 return(ret); 3298} 3299 3300/** 3301 * xmlRegExecGetValues: 3302 * @exec: a regexp execution context 3303 * @err: error extraction or normal one 3304 * @nbval: pointer to the number of accepted values IN/OUT 3305 * @nbneg: return number of negative transitions 3306 * @values: pointer to the array of acceptable values 3307 * @terminal: return value if this was a terminal state 3308 * 3309 * Extract informations from the regexp execution, internal routine to 3310 * implement xmlRegExecNextValues() and xmlRegExecErrInfo() 3311 * 3312 * Returns: 0 in case of success or -1 in case of error. 3313 */ 3314static int 3315xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, 3316 int *nbval, int *nbneg, 3317 xmlChar **values, int *terminal) { 3318 int maxval; 3319 int nb = 0; 3320 3321 if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 3322 (values == NULL) || (*nbval <= 0)) 3323 return(-1); 3324 3325 maxval = *nbval; 3326 *nbval = 0; 3327 *nbneg = 0; 3328 if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { 3329 xmlRegexpPtr comp; 3330 int target, i, state; 3331 3332 comp = exec->comp; 3333 3334 if (err) { 3335 if (exec->errStateNo == -1) return(-1); 3336 state = exec->errStateNo; 3337 } else { 3338 state = exec->index; 3339 } 3340 if (terminal != NULL) { 3341 if (comp->compact[state * (comp->nbstrings + 1)] == 3342 XML_REGEXP_FINAL_STATE) 3343 *terminal = 1; 3344 else 3345 *terminal = 0; 3346 } 3347 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 3348 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 3349 if ((target > 0) && (target <= comp->nbstates) && 3350 (comp->compact[(target - 1) * (comp->nbstrings + 1)] != 3351 XML_REGEXP_SINK_STATE)) { 3352 values[nb++] = comp->stringMap[i]; 3353 (*nbval)++; 3354 } 3355 } 3356 for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 3357 target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 3358 if ((target > 0) && (target <= comp->nbstates) && 3359 (comp->compact[(target - 1) * (comp->nbstrings + 1)] == 3360 XML_REGEXP_SINK_STATE)) { 3361 values[nb++] = comp->stringMap[i]; 3362 (*nbneg)++; 3363 } 3364 } 3365 } else { 3366 int transno; 3367 xmlRegTransPtr trans; 3368 xmlRegAtomPtr atom; 3369 xmlRegStatePtr state; 3370 3371 if (terminal != NULL) { 3372 if (exec->state->type == XML_REGEXP_FINAL_STATE) 3373 *terminal = 1; 3374 else 3375 *terminal = 0; 3376 } 3377 3378 if (err) { 3379 if (exec->errState == NULL) return(-1); 3380 state = exec->errState; 3381 } else { 3382 if (exec->state == NULL) return(-1); 3383 state = exec->state; 3384 } 3385 for (transno = 0; 3386 (transno < state->nbTrans) && (nb < maxval); 3387 transno++) { 3388 trans = &state->trans[transno]; 3389 if (trans->to < 0) 3390 continue; 3391 atom = trans->atom; 3392 if ((atom == NULL) || (atom->valuep == NULL)) 3393 continue; 3394 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 3395 /* this should not be reached but ... */ 3396 TODO; 3397 } else if (trans->count == REGEXP_ALL_COUNTER) { 3398 /* this should not be reached but ... */ 3399 TODO; 3400 } else if (trans->counter >= 0) { 3401 xmlRegCounterPtr counter; 3402 int count; 3403 3404 if (err) 3405 count = exec->errCounts[trans->counter]; 3406 else 3407 count = exec->counts[trans->counter]; 3408 counter = &exec->comp->counters[trans->counter]; 3409 if (count < counter->max) { 3410 if (atom->neg) 3411 values[nb++] = (xmlChar *) atom->valuep2; 3412 else 3413 values[nb++] = (xmlChar *) atom->valuep; 3414 (*nbval)++; 3415 } 3416 } else { 3417 if ((exec->comp->states[trans->to] != NULL) && 3418 (exec->comp->states[trans->to]->type != 3419 XML_REGEXP_SINK_STATE)) { 3420 if (atom->neg) 3421 values[nb++] = (xmlChar *) atom->valuep2; 3422 else 3423 values[nb++] = (xmlChar *) atom->valuep; 3424 (*nbval)++; 3425 } 3426 } 3427 } 3428 for (transno = 0; 3429 (transno < state->nbTrans) && (nb < maxval); 3430 transno++) { 3431 trans = &state->trans[transno]; 3432 if (trans->to < 0) 3433 continue; 3434 atom = trans->atom; 3435 if ((atom == NULL) || (atom->valuep == NULL)) 3436 continue; 3437 if (trans->count == REGEXP_ALL_LAX_COUNTER) { 3438 continue; 3439 } else if (trans->count == REGEXP_ALL_COUNTER) { 3440 continue; 3441 } else if (trans->counter >= 0) { 3442 continue; 3443 } else { 3444 if ((exec->comp->states[trans->to] != NULL) && 3445 (exec->comp->states[trans->to]->type == 3446 XML_REGEXP_SINK_STATE)) { 3447 if (atom->neg) 3448 values[nb++] = (xmlChar *) atom->valuep2; 3449 else 3450 values[nb++] = (xmlChar *) atom->valuep; 3451 (*nbneg)++; 3452 } 3453 } 3454 } 3455 } 3456 return(0); 3457} 3458 3459/** 3460 * xmlRegExecNextValues: 3461 * @exec: a regexp execution context 3462 * @nbval: pointer to the number of accepted values IN/OUT 3463 * @nbneg: return number of negative transitions 3464 * @values: pointer to the array of acceptable values 3465 * @terminal: return value if this was a terminal state 3466 * 3467 * Extract informations from the regexp execution, 3468 * the parameter @values must point to an array of @nbval string pointers 3469 * on return nbval will contain the number of possible strings in that 3470 * state and the @values array will be updated with them. The string values 3471 * returned will be freed with the @exec context and don't need to be 3472 * deallocated. 3473 * 3474 * Returns: 0 in case of success or -1 in case of error. 3475 */ 3476int 3477xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg, 3478 xmlChar **values, int *terminal) { 3479 return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal)); 3480} 3481 3482/** 3483 * xmlRegExecErrInfo: 3484 * @exec: a regexp execution context generating an error 3485 * @string: return value for the error string 3486 * @nbval: pointer to the number of accepted values IN/OUT 3487 * @nbneg: return number of negative transitions 3488 * @values: pointer to the array of acceptable values 3489 * @terminal: return value if this was a terminal state 3490 * 3491 * Extract error informations from the regexp execution, the parameter 3492 * @string will be updated with the value pushed and not accepted, 3493 * the parameter @values must point to an array of @nbval string pointers 3494 * on return nbval will contain the number of possible strings in that 3495 * state and the @values array will be updated with them. The string values 3496 * returned will be freed with the @exec context and don't need to be 3497 * deallocated. 3498 * 3499 * Returns: 0 in case of success or -1 in case of error. 3500 */ 3501int 3502xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string, 3503 int *nbval, int *nbneg, xmlChar **values, int *terminal) { 3504 if (exec == NULL) 3505 return(-1); 3506 if (string != NULL) { 3507 if (exec->status != 0) 3508 *string = exec->errString; 3509 else 3510 *string = NULL; 3511 } 3512 return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal)); 3513} 3514 3515#ifdef DEBUG_ERR 3516static void testerr(xmlRegExecCtxtPtr exec) { 3517 const xmlChar *string; 3518 xmlChar *values[5]; 3519 int nb = 5; 3520 int nbneg; 3521 int terminal; 3522 xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal); 3523} 3524#endif 3525 3526#if 0 3527static int 3528xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { 3529 xmlRegTransPtr trans; 3530 xmlRegAtomPtr atom; 3531 int ret; 3532 int codepoint, len; 3533 3534 if (exec == NULL) 3535 return(-1); 3536 if (exec->status != 0) 3537 return(exec->status); 3538 3539 while ((exec->status == 0) && 3540 ((exec->inputString[exec->index] != 0) || 3541 (exec->state->type != XML_REGEXP_FINAL_STATE))) { 3542 3543 /* 3544 * End of input on non-terminal state, rollback, however we may 3545 * still have epsilon like transition for counted transitions 3546 * on counters, in that case don't break too early. 3547 */ 3548 if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) 3549 goto rollback; 3550 3551 exec->transcount = 0; 3552 for (;exec->transno < exec->state->nbTrans;exec->transno++) { 3553 trans = &exec->state->trans[exec->transno]; 3554 if (trans->to < 0) 3555 continue; 3556 atom = trans->atom; 3557 ret = 0; 3558 if (trans->count >= 0) { 3559 int count; 3560 xmlRegCounterPtr counter; 3561 3562 /* 3563 * A counted transition. 3564 */ 3565 3566 count = exec->counts[trans->count]; 3567 counter = &exec->comp->counters[trans->count]; 3568#ifdef DEBUG_REGEXP_EXEC 3569 printf("testing count %d: val %d, min %d, max %d\n", 3570 trans->count, count, counter->min, counter->max); 3571#endif 3572 ret = ((count >= counter->min) && (count <= counter->max)); 3573 } else if (atom == NULL) { 3574 fprintf(stderr, "epsilon transition left at runtime\n"); 3575 exec->status = -2; 3576 break; 3577 } else if (exec->inputString[exec->index] != 0) { 3578 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 3579 ret = xmlRegCheckCharacter(atom, codepoint); 3580 if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 3581 xmlRegStatePtr to = exec->comp->states[trans->to]; 3582 3583 /* 3584 * this is a multiple input sequence 3585 */ 3586 if (exec->state->nbTrans > exec->transno + 1) { 3587 xmlFARegExecSave(exec); 3588 } 3589 exec->transcount = 1; 3590 do { 3591 /* 3592 * Try to progress as much as possible on the input 3593 */ 3594 if (exec->transcount == atom->max) { 3595 break; 3596 } 3597 exec->index += len; 3598 /* 3599 * End of input: stop here 3600 */ 3601 if (exec->inputString[exec->index] == 0) { 3602 exec->index -= len; 3603 break; 3604 } 3605 if (exec->transcount >= atom->min) { 3606 int transno = exec->transno; 3607 xmlRegStatePtr state = exec->state; 3608 3609 /* 3610 * The transition is acceptable save it 3611 */ 3612 exec->transno = -1; /* trick */ 3613 exec->state = to; 3614 xmlFARegExecSave(exec); 3615 exec->transno = transno; 3616 exec->state = state; 3617 } 3618 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 3619 len); 3620 ret = xmlRegCheckCharacter(atom, codepoint); 3621 exec->transcount++; 3622 } while (ret == 1); 3623 if (exec->transcount < atom->min) 3624 ret = 0; 3625 3626 /* 3627 * If the last check failed but one transition was found 3628 * possible, rollback 3629 */ 3630 if (ret < 0) 3631 ret = 0; 3632 if (ret == 0) { 3633 goto rollback; 3634 } 3635 } 3636 } 3637 if (ret == 1) { 3638 if (exec->state->nbTrans > exec->transno + 1) { 3639 xmlFARegExecSave(exec); 3640 } 3641 if (trans->counter >= 0) { 3642#ifdef DEBUG_REGEXP_EXEC 3643 printf("Increasing count %d\n", trans->counter); 3644#endif 3645 exec->counts[trans->counter]++; 3646 } 3647#ifdef DEBUG_REGEXP_EXEC 3648 printf("entering state %d\n", trans->to); 3649#endif 3650 exec->state = exec->comp->states[trans->to]; 3651 exec->transno = 0; 3652 if (trans->atom != NULL) { 3653 exec->index += len; 3654 } 3655 goto progress; 3656 } else if (ret < 0) { 3657 exec->status = -4; 3658 break; 3659 } 3660 } 3661 if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 3662rollback: 3663 /* 3664 * Failed to find a way out 3665 */ 3666 exec->determinist = 0; 3667 xmlFARegExecRollBack(exec); 3668 } 3669progress: 3670 continue; 3671 } 3672} 3673#endif 3674/************************************************************************ 3675 * * 3676 * Parser for the Schemas Datatype Regular Expressions * 3677 * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * 3678 * * 3679 ************************************************************************/ 3680 3681/** 3682 * xmlFAIsChar: 3683 * @ctxt: a regexp parser context 3684 * 3685 * [10] Char ::= [^.\?*+()|#x5B#x5D] 3686 */ 3687static int 3688xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { 3689 int cur; 3690 int len; 3691 3692 cur = CUR_SCHAR(ctxt->cur, len); 3693 if ((cur == '.') || (cur == '\\') || (cur == '?') || 3694 (cur == '*') || (cur == '+') || (cur == '(') || 3695 (cur == ')') || (cur == '|') || (cur == 0x5B) || 3696 (cur == 0x5D) || (cur == 0)) 3697 return(-1); 3698 return(cur); 3699} 3700 3701/** 3702 * xmlFAParseCharProp: 3703 * @ctxt: a regexp parser context 3704 * 3705 * [27] charProp ::= IsCategory | IsBlock 3706 * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | 3707 * Separators | Symbols | Others 3708 * [29] Letters ::= 'L' [ultmo]? 3709 * [30] Marks ::= 'M' [nce]? 3710 * [31] Numbers ::= 'N' [dlo]? 3711 * [32] Punctuation ::= 'P' [cdseifo]? 3712 * [33] Separators ::= 'Z' [slp]? 3713 * [34] Symbols ::= 'S' [mcko]? 3714 * [35] Others ::= 'C' [cfon]? 3715 * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ 3716 */ 3717static void 3718xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { 3719 int cur; 3720 xmlRegAtomType type = (xmlRegAtomType) 0; 3721 xmlChar *blockName = NULL; 3722 3723 cur = CUR; 3724 if (cur == 'L') { 3725 NEXT; 3726 cur = CUR; 3727 if (cur == 'u') { 3728 NEXT; 3729 type = XML_REGEXP_LETTER_UPPERCASE; 3730 } else if (cur == 'l') { 3731 NEXT; 3732 type = XML_REGEXP_LETTER_LOWERCASE; 3733 } else if (cur == 't') { 3734 NEXT; 3735 type = XML_REGEXP_LETTER_TITLECASE; 3736 } else if (cur == 'm') { 3737 NEXT; 3738 type = XML_REGEXP_LETTER_MODIFIER; 3739 } else if (cur == 'o') { 3740 NEXT; 3741 type = XML_REGEXP_LETTER_OTHERS; 3742 } else { 3743 type = XML_REGEXP_LETTER; 3744 } 3745 } else if (cur == 'M') { 3746 NEXT; 3747 cur = CUR; 3748 if (cur == 'n') { 3749 NEXT; 3750 /* nonspacing */ 3751 type = XML_REGEXP_MARK_NONSPACING; 3752 } else if (cur == 'c') { 3753 NEXT; 3754 /* spacing combining */ 3755 type = XML_REGEXP_MARK_SPACECOMBINING; 3756 } else if (cur == 'e') { 3757 NEXT; 3758 /* enclosing */ 3759 type = XML_REGEXP_MARK_ENCLOSING; 3760 } else { 3761 /* all marks */ 3762 type = XML_REGEXP_MARK; 3763 } 3764 } else if (cur == 'N') { 3765 NEXT; 3766 cur = CUR; 3767 if (cur == 'd') { 3768 NEXT; 3769 /* digital */ 3770 type = XML_REGEXP_NUMBER_DECIMAL; 3771 } else if (cur == 'l') { 3772 NEXT; 3773 /* letter */ 3774 type = XML_REGEXP_NUMBER_LETTER; 3775 } else if (cur == 'o') { 3776 NEXT; 3777 /* other */ 3778 type = XML_REGEXP_NUMBER_OTHERS; 3779 } else { 3780 /* all numbers */ 3781 type = XML_REGEXP_NUMBER; 3782 } 3783 } else if (cur == 'P') { 3784 NEXT; 3785 cur = CUR; 3786 if (cur == 'c') { 3787 NEXT; 3788 /* connector */ 3789 type = XML_REGEXP_PUNCT_CONNECTOR; 3790 } else if (cur == 'd') { 3791 NEXT; 3792 /* dash */ 3793 type = XML_REGEXP_PUNCT_DASH; 3794 } else if (cur == 's') { 3795 NEXT; 3796 /* open */ 3797 type = XML_REGEXP_PUNCT_OPEN; 3798 } else if (cur == 'e') { 3799 NEXT; 3800 /* close */ 3801 type = XML_REGEXP_PUNCT_CLOSE; 3802 } else if (cur == 'i') { 3803 NEXT; 3804 /* initial quote */ 3805 type = XML_REGEXP_PUNCT_INITQUOTE; 3806 } else if (cur == 'f') { 3807 NEXT; 3808 /* final quote */ 3809 type = XML_REGEXP_PUNCT_FINQUOTE; 3810 } else if (cur == 'o') { 3811 NEXT; 3812 /* other */ 3813 type = XML_REGEXP_PUNCT_OTHERS; 3814 } else { 3815 /* all punctuation */ 3816 type = XML_REGEXP_PUNCT; 3817 } 3818 } else if (cur == 'Z') { 3819 NEXT; 3820 cur = CUR; 3821 if (cur == 's') { 3822 NEXT; 3823 /* space */ 3824 type = XML_REGEXP_SEPAR_SPACE; 3825 } else if (cur == 'l') { 3826 NEXT; 3827 /* line */ 3828 type = XML_REGEXP_SEPAR_LINE; 3829 } else if (cur == 'p') { 3830 NEXT; 3831 /* paragraph */ 3832 type = XML_REGEXP_SEPAR_PARA; 3833 } else { 3834 /* all separators */ 3835 type = XML_REGEXP_SEPAR; 3836 } 3837 } else if (cur == 'S') { 3838 NEXT; 3839 cur = CUR; 3840 if (cur == 'm') { 3841 NEXT; 3842 type = XML_REGEXP_SYMBOL_MATH; 3843 /* math */ 3844 } else if (cur == 'c') { 3845 NEXT; 3846 type = XML_REGEXP_SYMBOL_CURRENCY; 3847 /* currency */ 3848 } else if (cur == 'k') { 3849 NEXT; 3850 type = XML_REGEXP_SYMBOL_MODIFIER; 3851 /* modifiers */ 3852 } else if (cur == 'o') { 3853 NEXT; 3854 type = XML_REGEXP_SYMBOL_OTHERS; 3855 /* other */ 3856 } else { 3857 /* all symbols */ 3858 type = XML_REGEXP_SYMBOL; 3859 } 3860 } else if (cur == 'C') { 3861 NEXT; 3862 cur = CUR; 3863 if (cur == 'c') { 3864 NEXT; 3865 /* control */ 3866 type = XML_REGEXP_OTHER_CONTROL; 3867 } else if (cur == 'f') { 3868 NEXT; 3869 /* format */ 3870 type = XML_REGEXP_OTHER_FORMAT; 3871 } else if (cur == 'o') { 3872 NEXT; 3873 /* private use */ 3874 type = XML_REGEXP_OTHER_PRIVATE; 3875 } else if (cur == 'n') { 3876 NEXT; 3877 /* not assigned */ 3878 type = XML_REGEXP_OTHER_NA; 3879 } else { 3880 /* all others */ 3881 type = XML_REGEXP_OTHER; 3882 } 3883 } else if (cur == 'I') { 3884 const xmlChar *start; 3885 NEXT; 3886 cur = CUR; 3887 if (cur != 's') { 3888 ERROR("IsXXXX expected"); 3889 return; 3890 } 3891 NEXT; 3892 start = ctxt->cur; 3893 cur = CUR; 3894 if (((cur >= 'a') && (cur <= 'z')) || 3895 ((cur >= 'A') && (cur <= 'Z')) || 3896 ((cur >= '0') && (cur <= '9')) || 3897 (cur == 0x2D)) { 3898 NEXT; 3899 cur = CUR; 3900 while (((cur >= 'a') && (cur <= 'z')) || 3901 ((cur >= 'A') && (cur <= 'Z')) || 3902 ((cur >= '0') && (cur <= '9')) || 3903 (cur == 0x2D)) { 3904 NEXT; 3905 cur = CUR; 3906 } 3907 } 3908 type = XML_REGEXP_BLOCK_NAME; 3909 blockName = xmlStrndup(start, ctxt->cur - start); 3910 } else { 3911 ERROR("Unknown char property"); 3912 return; 3913 } 3914 if (ctxt->atom == NULL) { 3915 ctxt->atom = xmlRegNewAtom(ctxt, type); 3916 if (ctxt->atom != NULL) 3917 ctxt->atom->valuep = blockName; 3918 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 3919 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 3920 type, 0, 0, blockName); 3921 } 3922} 3923 3924/** 3925 * xmlFAParseCharClassEsc: 3926 * @ctxt: a regexp parser context 3927 * 3928 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 3929 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] 3930 * [25] catEsc ::= '\p{' charProp '}' 3931 * [26] complEsc ::= '\P{' charProp '}' 3932 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) 3933 */ 3934static void 3935xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { 3936 int cur; 3937 3938 if (CUR == '.') { 3939 if (ctxt->atom == NULL) { 3940 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); 3941 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 3942 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 3943 XML_REGEXP_ANYCHAR, 0, 0, NULL); 3944 } 3945 NEXT; 3946 return; 3947 } 3948 if (CUR != '\\') { 3949 ERROR("Escaped sequence: expecting \\"); 3950 return; 3951 } 3952 NEXT; 3953 cur = CUR; 3954 if (cur == 'p') { 3955 NEXT; 3956 if (CUR != '{') { 3957 ERROR("Expecting '{'"); 3958 return; 3959 } 3960 NEXT; 3961 xmlFAParseCharProp(ctxt); 3962 if (CUR != '}') { 3963 ERROR("Expecting '}'"); 3964 return; 3965 } 3966 NEXT; 3967 } else if (cur == 'P') { 3968 NEXT; 3969 if (CUR != '{') { 3970 ERROR("Expecting '{'"); 3971 return; 3972 } 3973 NEXT; 3974 xmlFAParseCharProp(ctxt); 3975 ctxt->atom->neg = 1; 3976 if (CUR != '}') { 3977 ERROR("Expecting '}'"); 3978 return; 3979 } 3980 NEXT; 3981 } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || 3982 (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || 3983 (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || 3984 (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || 3985 (cur == 0x5E)) { 3986 if (ctxt->atom == NULL) { 3987 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 3988 if (ctxt->atom != NULL) { 3989 switch (cur) { 3990 case 'n': 3991 ctxt->atom->codepoint = '\n'; 3992 break; 3993 case 'r': 3994 ctxt->atom->codepoint = '\r'; 3995 break; 3996 case 't': 3997 ctxt->atom->codepoint = '\t'; 3998 break; 3999 default: 4000 ctxt->atom->codepoint = cur; 4001 } 4002 } 4003 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 4004 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4005 XML_REGEXP_CHARVAL, cur, cur, NULL); 4006 } 4007 NEXT; 4008 } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || 4009 (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || 4010 (cur == 'w') || (cur == 'W')) { 4011 xmlRegAtomType type = XML_REGEXP_ANYSPACE; 4012 4013 switch (cur) { 4014 case 's': 4015 type = XML_REGEXP_ANYSPACE; 4016 break; 4017 case 'S': 4018 type = XML_REGEXP_NOTSPACE; 4019 break; 4020 case 'i': 4021 type = XML_REGEXP_INITNAME; 4022 break; 4023 case 'I': 4024 type = XML_REGEXP_NOTINITNAME; 4025 break; 4026 case 'c': 4027 type = XML_REGEXP_NAMECHAR; 4028 break; 4029 case 'C': 4030 type = XML_REGEXP_NOTNAMECHAR; 4031 break; 4032 case 'd': 4033 type = XML_REGEXP_DECIMAL; 4034 break; 4035 case 'D': 4036 type = XML_REGEXP_NOTDECIMAL; 4037 break; 4038 case 'w': 4039 type = XML_REGEXP_REALCHAR; 4040 break; 4041 case 'W': 4042 type = XML_REGEXP_NOTREALCHAR; 4043 break; 4044 } 4045 NEXT; 4046 if (ctxt->atom == NULL) { 4047 ctxt->atom = xmlRegNewAtom(ctxt, type); 4048 } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 4049 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4050 type, 0, 0, NULL); 4051 } 4052 } 4053} 4054 4055/** 4056 * xmlFAParseCharRef: 4057 * @ctxt: a regexp parser context 4058 * 4059 * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' ) 4060 */ 4061static int 4062xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) { 4063 int ret = 0, cur; 4064 4065 if ((CUR != '&') || (NXT(1) != '#')) 4066 return(-1); 4067 NEXT; 4068 NEXT; 4069 cur = CUR; 4070 if (cur == 'x') { 4071 NEXT; 4072 cur = CUR; 4073 if (((cur >= '0') && (cur <= '9')) || 4074 ((cur >= 'a') && (cur <= 'f')) || 4075 ((cur >= 'A') && (cur <= 'F'))) { 4076 while (((cur >= '0') && (cur <= '9')) || 4077 ((cur >= 'A') && (cur <= 'F'))) { 4078 if ((cur >= '0') && (cur <= '9')) 4079 ret = ret * 16 + cur - '0'; 4080 else if ((cur >= 'a') && (cur <= 'f')) 4081 ret = ret * 16 + 10 + (cur - 'a'); 4082 else 4083 ret = ret * 16 + 10 + (cur - 'A'); 4084 NEXT; 4085 cur = CUR; 4086 } 4087 } else { 4088 ERROR("Char ref: expecting [0-9A-F]"); 4089 return(-1); 4090 } 4091 } else { 4092 if ((cur >= '0') && (cur <= '9')) { 4093 while ((cur >= '0') && (cur <= '9')) { 4094 ret = ret * 10 + cur - '0'; 4095 NEXT; 4096 cur = CUR; 4097 } 4098 } else { 4099 ERROR("Char ref: expecting [0-9]"); 4100 return(-1); 4101 } 4102 } 4103 if (cur != ';') { 4104 ERROR("Char ref: expecting ';'"); 4105 return(-1); 4106 } else { 4107 NEXT; 4108 } 4109 return(ret); 4110} 4111 4112/** 4113 * xmlFAParseCharRange: 4114 * @ctxt: a regexp parser context 4115 * 4116 * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash 4117 * [18] seRange ::= charOrEsc '-' charOrEsc 4118 * [20] charOrEsc ::= XmlChar | SingleCharEsc 4119 * [21] XmlChar ::= [^\#x2D#x5B#x5D] 4120 * [22] XmlCharIncDash ::= [^\#x5B#x5D] 4121 */ 4122static void 4123xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { 4124 int cur, len; 4125 int start = -1; 4126 int end = -1; 4127 4128 if ((CUR == '&') && (NXT(1) == '#')) { 4129 end = start = xmlFAParseCharRef(ctxt); 4130 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4131 XML_REGEXP_CHARVAL, start, end, NULL); 4132 return; 4133 } 4134 cur = CUR; 4135 if (cur == '\\') { 4136 NEXT; 4137 cur = CUR; 4138 switch (cur) { 4139 case 'n': start = 0xA; break; 4140 case 'r': start = 0xD; break; 4141 case 't': start = 0x9; break; 4142 case '\\': case '|': case '.': case '-': case '^': case '?': 4143 case '*': case '+': case '{': case '}': case '(': case ')': 4144 case '[': case ']': 4145 start = cur; break; 4146 default: 4147 ERROR("Invalid escape value"); 4148 return; 4149 } 4150 end = start; 4151 len = 1; 4152 } else if ((cur != 0x5B) && (cur != 0x5D)) { 4153 end = start = CUR_SCHAR(ctxt->cur, len); 4154 } else { 4155 ERROR("Expecting a char range"); 4156 return; 4157 } 4158 NEXTL(len); 4159 if (start == '-') { 4160 return; 4161 } 4162 cur = CUR; 4163 if ((cur != '-') || (NXT(1) == ']')) { 4164 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4165 XML_REGEXP_CHARVAL, start, end, NULL); 4166 return; 4167 } 4168 NEXT; 4169 cur = CUR; 4170 if (cur == '\\') { 4171 NEXT; 4172 cur = CUR; 4173 switch (cur) { 4174 case 'n': end = 0xA; break; 4175 case 'r': end = 0xD; break; 4176 case 't': end = 0x9; break; 4177 case '\\': case '|': case '.': case '-': case '^': case '?': 4178 case '*': case '+': case '{': case '}': case '(': case ')': 4179 case '[': case ']': 4180 end = cur; break; 4181 default: 4182 ERROR("Invalid escape value"); 4183 return; 4184 } 4185 len = 1; 4186 } else if ((cur != 0x5B) && (cur != 0x5D)) { 4187 end = CUR_SCHAR(ctxt->cur, len); 4188 } else { 4189 ERROR("Expecting the end of a char range"); 4190 return; 4191 } 4192 NEXTL(len); 4193 /* TODO check that the values are acceptable character ranges for XML */ 4194 if (end < start) { 4195 ERROR("End of range is before start of range"); 4196 } else { 4197 xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 4198 XML_REGEXP_CHARVAL, start, end, NULL); 4199 } 4200 return; 4201} 4202 4203/** 4204 * xmlFAParsePosCharGroup: 4205 * @ctxt: a regexp parser context 4206 * 4207 * [14] posCharGroup ::= ( charRange | charClassEsc )+ 4208 */ 4209static void 4210xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { 4211 do { 4212 if ((CUR == '\\') || (CUR == '.')) { 4213 xmlFAParseCharClassEsc(ctxt); 4214 } else { 4215 xmlFAParseCharRange(ctxt); 4216 } 4217 } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && 4218 (ctxt->error == 0)); 4219} 4220 4221/** 4222 * xmlFAParseCharGroup: 4223 * @ctxt: a regexp parser context 4224 * 4225 * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub 4226 * [15] negCharGroup ::= '^' posCharGroup 4227 * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr 4228 * [12] charClassExpr ::= '[' charGroup ']' 4229 */ 4230static void 4231xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { 4232 int n = ctxt->neg; 4233 while ((CUR != ']') && (ctxt->error == 0)) { 4234 if (CUR == '^') { 4235 int neg = ctxt->neg; 4236 4237 NEXT; 4238 ctxt->neg = !ctxt->neg; 4239 xmlFAParsePosCharGroup(ctxt); 4240 ctxt->neg = neg; 4241 } else if ((CUR == '-') && (NXT(1) == '[')) { 4242 int neg = ctxt->neg; 4243 ctxt->neg = 2; 4244 NEXT; /* eat the '-' */ 4245 NEXT; /* eat the '[' */ 4246 xmlFAParseCharGroup(ctxt); 4247 if (CUR == ']') { 4248 NEXT; 4249 } else { 4250 ERROR("charClassExpr: ']' expected"); 4251 break; 4252 } 4253 ctxt->neg = neg; 4254 break; 4255 } else if (CUR != ']') { 4256 xmlFAParsePosCharGroup(ctxt); 4257 } 4258 } 4259 ctxt->neg = n; 4260} 4261 4262/** 4263 * xmlFAParseCharClass: 4264 * @ctxt: a regexp parser context 4265 * 4266 * [11] charClass ::= charClassEsc | charClassExpr 4267 * [12] charClassExpr ::= '[' charGroup ']' 4268 */ 4269static void 4270xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { 4271 if (CUR == '[') { 4272 NEXT; 4273 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); 4274 if (ctxt->atom == NULL) 4275 return; 4276 xmlFAParseCharGroup(ctxt); 4277 if (CUR == ']') { 4278 NEXT; 4279 } else { 4280 ERROR("xmlFAParseCharClass: ']' expected"); 4281 } 4282 } else { 4283 xmlFAParseCharClassEsc(ctxt); 4284 } 4285} 4286 4287/** 4288 * xmlFAParseQuantExact: 4289 * @ctxt: a regexp parser context 4290 * 4291 * [8] QuantExact ::= [0-9]+ 4292 * 4293 * Returns 0 if success or -1 in case of error 4294 */ 4295static int 4296xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { 4297 int ret = 0; 4298 int ok = 0; 4299 4300 while ((CUR >= '0') && (CUR <= '9')) { 4301 ret = ret * 10 + (CUR - '0'); 4302 ok = 1; 4303 NEXT; 4304 } 4305 if (ok != 1) { 4306 return(-1); 4307 } 4308 return(ret); 4309} 4310 4311/** 4312 * xmlFAParseQuantifier: 4313 * @ctxt: a regexp parser context 4314 * 4315 * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) 4316 * [5] quantity ::= quantRange | quantMin | QuantExact 4317 * [6] quantRange ::= QuantExact ',' QuantExact 4318 * [7] quantMin ::= QuantExact ',' 4319 * [8] QuantExact ::= [0-9]+ 4320 */ 4321static int 4322xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { 4323 int cur; 4324 4325 cur = CUR; 4326 if ((cur == '?') || (cur == '*') || (cur == '+')) { 4327 if (ctxt->atom != NULL) { 4328 if (cur == '?') 4329 ctxt->atom->quant = XML_REGEXP_QUANT_OPT; 4330 else if (cur == '*') 4331 ctxt->atom->quant = XML_REGEXP_QUANT_MULT; 4332 else if (cur == '+') 4333 ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; 4334 } 4335 NEXT; 4336 return(1); 4337 } 4338 if (cur == '{') { 4339 int min = 0, max = 0; 4340 4341 NEXT; 4342 cur = xmlFAParseQuantExact(ctxt); 4343 if (cur >= 0) 4344 min = cur; 4345 if (CUR == ',') { 4346 NEXT; 4347 if (CUR == '}') 4348 max = INT_MAX; 4349 else { 4350 cur = xmlFAParseQuantExact(ctxt); 4351 if (cur >= 0) 4352 max = cur; 4353 else { 4354 ERROR("Improper quantifier"); 4355 } 4356 } 4357 } 4358 if (CUR == '}') { 4359 NEXT; 4360 } else { 4361 ERROR("Unterminated quantifier"); 4362 } 4363 if (max == 0) 4364 max = min; 4365 if (ctxt->atom != NULL) { 4366 ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; 4367 ctxt->atom->min = min; 4368 ctxt->atom->max = max; 4369 } 4370 return(1); 4371 } 4372 return(0); 4373} 4374 4375/** 4376 * xmlFAParseAtom: 4377 * @ctxt: a regexp parser context 4378 * 4379 * [9] atom ::= Char | charClass | ( '(' regExp ')' ) 4380 */ 4381static int 4382xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { 4383 int codepoint, len; 4384 4385 codepoint = xmlFAIsChar(ctxt); 4386 if (codepoint > 0) { 4387 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 4388 if (ctxt->atom == NULL) 4389 return(-1); 4390 codepoint = CUR_SCHAR(ctxt->cur, len); 4391 ctxt->atom->codepoint = codepoint; 4392 NEXTL(len); 4393 return(1); 4394 } else if (CUR == '|') { 4395 return(0); 4396 } else if (CUR == 0) { 4397 return(0); 4398 } else if (CUR == ')') { 4399 return(0); 4400 } else if (CUR == '(') { 4401 xmlRegStatePtr start, oldend; 4402 4403 NEXT; 4404 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); 4405 start = ctxt->state; 4406 oldend = ctxt->end; 4407 ctxt->end = NULL; 4408 ctxt->atom = NULL; 4409 xmlFAParseRegExp(ctxt, 0); 4410 if (CUR == ')') { 4411 NEXT; 4412 } else { 4413 ERROR("xmlFAParseAtom: expecting ')'"); 4414 } 4415 ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); 4416 if (ctxt->atom == NULL) 4417 return(-1); 4418 ctxt->atom->start = start; 4419 ctxt->atom->stop = ctxt->state; 4420 ctxt->end = oldend; 4421 return(1); 4422 } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { 4423 xmlFAParseCharClass(ctxt); 4424 return(1); 4425 } 4426 return(0); 4427} 4428 4429/** 4430 * xmlFAParsePiece: 4431 * @ctxt: a regexp parser context 4432 * 4433 * [3] piece ::= atom quantifier? 4434 */ 4435static int 4436xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { 4437 int ret; 4438 4439 ctxt->atom = NULL; 4440 ret = xmlFAParseAtom(ctxt); 4441 if (ret == 0) 4442 return(0); 4443 if (ctxt->atom == NULL) { 4444 ERROR("internal: no atom generated"); 4445 } 4446 xmlFAParseQuantifier(ctxt); 4447 return(1); 4448} 4449 4450/** 4451 * xmlFAParseBranch: 4452 * @ctxt: a regexp parser context 4453 * 4454 * [2] branch ::= piece* 4455 8 4456 */ 4457static int 4458xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) { 4459 xmlRegStatePtr previous; 4460 int ret; 4461 4462 previous = ctxt->state; 4463 ret = xmlFAParsePiece(ctxt); 4464 if (ret != 0) { 4465 if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0) 4466 return(-1); 4467 previous = ctxt->state; 4468 ctxt->atom = NULL; 4469 } 4470 while ((ret != 0) && (ctxt->error == 0)) { 4471 ret = xmlFAParsePiece(ctxt); 4472 if (ret != 0) { 4473 if (xmlFAGenerateTransitions(ctxt, previous, NULL, 4474 ctxt->atom) < 0) 4475 return(-1); 4476 previous = ctxt->state; 4477 ctxt->atom = NULL; 4478 } 4479 } 4480 return(0); 4481} 4482 4483/** 4484 * xmlFAParseRegExp: 4485 * @ctxt: a regexp parser context 4486 * @top: is this the top-level expression ? 4487 * 4488 * [1] regExp ::= branch ( '|' branch )* 4489 */ 4490static void 4491xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { 4492 xmlRegStatePtr start, end; 4493 4494 /* if not top start should have been generated by an epsilon trans */ 4495 start = ctxt->state; 4496 ctxt->end = NULL; 4497 xmlFAParseBranch(ctxt); 4498 if (top) { 4499#ifdef DEBUG_REGEXP_GRAPH 4500 printf("State %d is final\n", ctxt->state->no); 4501#endif 4502 ctxt->state->type = XML_REGEXP_FINAL_STATE; 4503 } 4504 if (CUR != '|') { 4505 ctxt->end = ctxt->state; 4506 return; 4507 } 4508 end = ctxt->state; 4509 while ((CUR == '|') && (ctxt->error == 0)) { 4510 NEXT; 4511 ctxt->state = start; 4512 ctxt->end = NULL; 4513 xmlFAParseBranch(ctxt); 4514 if (top) { 4515 ctxt->state->type = XML_REGEXP_FINAL_STATE; 4516#ifdef DEBUG_REGEXP_GRAPH 4517 printf("State %d is final\n", ctxt->state->no); 4518#endif 4519 } else { 4520 xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end); 4521 } 4522 } 4523 if (!top) { 4524 ctxt->state = end; 4525 ctxt->end = end; 4526 } 4527} 4528 4529/************************************************************************ 4530 * * 4531 * The basic API * 4532 * * 4533 ************************************************************************/ 4534 4535/** 4536 * xmlRegexpPrint: 4537 * @output: the file for the output debug 4538 * @regexp: the compiled regexp 4539 * 4540 * Print the content of the compiled regular expression 4541 */ 4542void 4543xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { 4544 int i; 4545 4546 if (output == NULL) 4547 return; 4548 fprintf(output, " regexp: "); 4549 if (regexp == NULL) { 4550 fprintf(output, "NULL\n"); 4551 return; 4552 } 4553 fprintf(output, "'%s' ", regexp->string); 4554 fprintf(output, "\n"); 4555 fprintf(output, "%d atoms:\n", regexp->nbAtoms); 4556 for (i = 0;i < regexp->nbAtoms; i++) { 4557 fprintf(output, " %02d ", i); 4558 xmlRegPrintAtom(output, regexp->atoms[i]); 4559 } 4560 fprintf(output, "%d states:", regexp->nbStates); 4561 fprintf(output, "\n"); 4562 for (i = 0;i < regexp->nbStates; i++) { 4563 xmlRegPrintState(output, regexp->states[i]); 4564 } 4565 fprintf(output, "%d counters:\n", regexp->nbCounters); 4566 for (i = 0;i < regexp->nbCounters; i++) { 4567 fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, 4568 regexp->counters[i].max); 4569 } 4570} 4571 4572/** 4573 * xmlRegexpCompile: 4574 * @regexp: a regular expression string 4575 * 4576 * Parses a regular expression conforming to XML Schemas Part 2 Datatype 4577 * Appendix F and builds an automata suitable for testing strings against 4578 * that regular expression 4579 * 4580 * Returns the compiled expression or NULL in case of error 4581 */ 4582xmlRegexpPtr 4583xmlRegexpCompile(const xmlChar *regexp) { 4584 xmlRegexpPtr ret; 4585 xmlRegParserCtxtPtr ctxt; 4586 4587 ctxt = xmlRegNewParserCtxt(regexp); 4588 if (ctxt == NULL) 4589 return(NULL); 4590 4591 /* initialize the parser */ 4592 ctxt->end = NULL; 4593 ctxt->start = ctxt->state = xmlRegNewState(ctxt); 4594 xmlRegStatePush(ctxt, ctxt->start); 4595 4596 /* parse the expression building an automata */ 4597 xmlFAParseRegExp(ctxt, 1); 4598 if (CUR != 0) { 4599 ERROR("xmlFAParseRegExp: extra characters"); 4600 } 4601 ctxt->end = ctxt->state; 4602 ctxt->start->type = XML_REGEXP_START_STATE; 4603 ctxt->end->type = XML_REGEXP_FINAL_STATE; 4604 4605 /* remove the Epsilon except for counted transitions */ 4606 xmlFAEliminateEpsilonTransitions(ctxt); 4607 4608 4609 if (ctxt->error != 0) { 4610 xmlRegFreeParserCtxt(ctxt); 4611 return(NULL); 4612 } 4613 ret = xmlRegEpxFromParse(ctxt); 4614 xmlRegFreeParserCtxt(ctxt); 4615 return(ret); 4616} 4617 4618/** 4619 * xmlRegexpExec: 4620 * @comp: the compiled regular expression 4621 * @content: the value to check against the regular expression 4622 * 4623 * Check if the regular expression generates the value 4624 * 4625 * Returns 1 if it matches, 0 if not and a negative value in case of error 4626 */ 4627int 4628xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { 4629 if ((comp == NULL) || (content == NULL)) 4630 return(-1); 4631 return(xmlFARegExec(comp, content)); 4632} 4633 4634/** 4635 * xmlRegexpIsDeterminist: 4636 * @comp: the compiled regular expression 4637 * 4638 * Check if the regular expression is determinist 4639 * 4640 * Returns 1 if it yes, 0 if not and a negative value in case of error 4641 */ 4642int 4643xmlRegexpIsDeterminist(xmlRegexpPtr comp) { 4644 xmlAutomataPtr am; 4645 int ret; 4646 4647 if (comp == NULL) 4648 return(-1); 4649 if (comp->determinist != -1) 4650 return(comp->determinist); 4651 4652 am = xmlNewAutomata(); 4653 if (am->states != NULL) { 4654 int i; 4655 4656 for (i = 0;i < am->nbStates;i++) 4657 xmlRegFreeState(am->states[i]); 4658 xmlFree(am->states); 4659 } 4660 am->nbAtoms = comp->nbAtoms; 4661 am->atoms = comp->atoms; 4662 am->nbStates = comp->nbStates; 4663 am->states = comp->states; 4664 am->determinist = -1; 4665 ret = xmlFAComputesDeterminism(am); 4666 am->atoms = NULL; 4667 am->states = NULL; 4668 xmlFreeAutomata(am); 4669 return(ret); 4670} 4671 4672/** 4673 * xmlRegFreeRegexp: 4674 * @regexp: the regexp 4675 * 4676 * Free a regexp 4677 */ 4678void 4679xmlRegFreeRegexp(xmlRegexpPtr regexp) { 4680 int i; 4681 if (regexp == NULL) 4682 return; 4683 4684 if (regexp->string != NULL) 4685 xmlFree(regexp->string); 4686 if (regexp->states != NULL) { 4687 for (i = 0;i < regexp->nbStates;i++) 4688 xmlRegFreeState(regexp->states[i]); 4689 xmlFree(regexp->states); 4690 } 4691 if (regexp->atoms != NULL) { 4692 for (i = 0;i < regexp->nbAtoms;i++) 4693 xmlRegFreeAtom(regexp->atoms[i]); 4694 xmlFree(regexp->atoms); 4695 } 4696 if (regexp->counters != NULL) 4697 xmlFree(regexp->counters); 4698 if (regexp->compact != NULL) 4699 xmlFree(regexp->compact); 4700 if (regexp->transdata != NULL) 4701 xmlFree(regexp->transdata); 4702 if (regexp->stringMap != NULL) { 4703 for (i = 0; i < regexp->nbstrings;i++) 4704 xmlFree(regexp->stringMap[i]); 4705 xmlFree(regexp->stringMap); 4706 } 4707 4708 xmlFree(regexp); 4709} 4710 4711#ifdef LIBXML_AUTOMATA_ENABLED 4712/************************************************************************ 4713 * * 4714 * The Automata interface * 4715 * * 4716 ************************************************************************/ 4717 4718/** 4719 * xmlNewAutomata: 4720 * 4721 * Create a new automata 4722 * 4723 * Returns the new object or NULL in case of failure 4724 */ 4725xmlAutomataPtr 4726xmlNewAutomata(void) { 4727 xmlAutomataPtr ctxt; 4728 4729 ctxt = xmlRegNewParserCtxt(NULL); 4730 if (ctxt == NULL) 4731 return(NULL); 4732 4733 /* initialize the parser */ 4734 ctxt->end = NULL; 4735 ctxt->start = ctxt->state = xmlRegNewState(ctxt); 4736 ctxt->start->type = XML_REGEXP_START_STATE; 4737 if (ctxt->start == NULL) { 4738 xmlFreeAutomata(ctxt); 4739 return(NULL); 4740 } 4741 if (xmlRegStatePush(ctxt, ctxt->start) < 0) { 4742 xmlRegFreeState(ctxt->start); 4743 xmlFreeAutomata(ctxt); 4744 return(NULL); 4745 } 4746 4747 return(ctxt); 4748} 4749 4750/** 4751 * xmlFreeAutomata: 4752 * @am: an automata 4753 * 4754 * Free an automata 4755 */ 4756void 4757xmlFreeAutomata(xmlAutomataPtr am) { 4758 if (am == NULL) 4759 return; 4760 xmlRegFreeParserCtxt(am); 4761} 4762 4763/** 4764 * xmlAutomataGetInitState: 4765 * @am: an automata 4766 * 4767 * Initial state lookup 4768 * 4769 * Returns the initial state of the automata 4770 */ 4771xmlAutomataStatePtr 4772xmlAutomataGetInitState(xmlAutomataPtr am) { 4773 if (am == NULL) 4774 return(NULL); 4775 return(am->start); 4776} 4777 4778/** 4779 * xmlAutomataSetFinalState: 4780 * @am: an automata 4781 * @state: a state in this automata 4782 * 4783 * Makes that state a final state 4784 * 4785 * Returns 0 or -1 in case of error 4786 */ 4787int 4788xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { 4789 if ((am == NULL) || (state == NULL)) 4790 return(-1); 4791 state->type = XML_REGEXP_FINAL_STATE; 4792 return(0); 4793} 4794 4795/** 4796 * xmlAutomataNewTransition: 4797 * @am: an automata 4798 * @from: the starting point of the transition 4799 * @to: the target point of the transition or NULL 4800 * @token: the input string associated to that transition 4801 * @data: data passed to the callback function if the transition is activated 4802 * 4803 * If @to is NULL, this creates first a new target state in the automata 4804 * and then adds a transition from the @from state to the target state 4805 * activated by the value of @token 4806 * 4807 * Returns the target state or NULL in case of error 4808 */ 4809xmlAutomataStatePtr 4810xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, 4811 xmlAutomataStatePtr to, const xmlChar *token, 4812 void *data) { 4813 xmlRegAtomPtr atom; 4814 4815 if ((am == NULL) || (from == NULL) || (token == NULL)) 4816 return(NULL); 4817 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 4818 if (atom == NULL) 4819 return(NULL); 4820 atom->data = data; 4821 if (atom == NULL) 4822 return(NULL); 4823 atom->valuep = xmlStrdup(token); 4824 4825 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 4826 xmlRegFreeAtom(atom); 4827 return(NULL); 4828 } 4829 if (to == NULL) 4830 return(am->state); 4831 return(to); 4832} 4833 4834/** 4835 * xmlAutomataNewTransition2: 4836 * @am: an automata 4837 * @from: the starting point of the transition 4838 * @to: the target point of the transition or NULL 4839 * @token: the first input string associated to that transition 4840 * @token2: the second input string associated to that transition 4841 * @data: data passed to the callback function if the transition is activated 4842 * 4843 * If @to is NULL, this creates first a new target state in the automata 4844 * and then adds a transition from the @from state to the target state 4845 * activated by the value of @token 4846 * 4847 * Returns the target state or NULL in case of error 4848 */ 4849xmlAutomataStatePtr 4850xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from, 4851 xmlAutomataStatePtr to, const xmlChar *token, 4852 const xmlChar *token2, void *data) { 4853 xmlRegAtomPtr atom; 4854 4855 if ((am == NULL) || (from == NULL) || (token == NULL)) 4856 return(NULL); 4857 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 4858 atom->data = data; 4859 if (atom == NULL) 4860 return(NULL); 4861 if ((token2 == NULL) || (*token2 == 0)) { 4862 atom->valuep = xmlStrdup(token); 4863 } else { 4864 int lenn, lenp; 4865 xmlChar *str; 4866 4867 lenn = strlen((char *) token2); 4868 lenp = strlen((char *) token); 4869 4870 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 4871 if (str == NULL) { 4872 xmlRegFreeAtom(atom); 4873 return(NULL); 4874 } 4875 memcpy(&str[0], token, lenp); 4876 str[lenp] = '|'; 4877 memcpy(&str[lenp + 1], token2, lenn); 4878 str[lenn + lenp + 1] = 0; 4879 4880 atom->valuep = str; 4881 } 4882 4883 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 4884 xmlRegFreeAtom(atom); 4885 return(NULL); 4886 } 4887 if (to == NULL) 4888 return(am->state); 4889 return(to); 4890} 4891 4892/** 4893 * xmlAutomataNewNegTrans: 4894 * @am: an automata 4895 * @from: the starting point of the transition 4896 * @to: the target point of the transition or NULL 4897 * @token: the first input string associated to that transition 4898 * @token2: the second input string associated to that transition 4899 * @data: data passed to the callback function if the transition is activated 4900 * 4901 * If @to is NULL, this creates first a new target state in the automata 4902 * and then adds a transition from the @from state to the target state 4903 * activated by any value except (@token,@token2) 4904 * 4905 * Returns the target state or NULL in case of error 4906 */ 4907xmlAutomataStatePtr 4908xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 4909 xmlAutomataStatePtr to, const xmlChar *token, 4910 const xmlChar *token2, void *data) { 4911 xmlRegAtomPtr atom; 4912 xmlChar err_msg[200]; 4913 4914 if ((am == NULL) || (from == NULL) || (token == NULL)) 4915 return(NULL); 4916 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 4917 if (atom == NULL) 4918 return(NULL); 4919 atom->data = data; 4920 atom->neg = 1; 4921 if ((token2 == NULL) || (*token2 == 0)) { 4922 atom->valuep = xmlStrdup(token); 4923 } else { 4924 int lenn, lenp; 4925 xmlChar *str; 4926 4927 lenn = strlen((char *) token2); 4928 lenp = strlen((char *) token); 4929 4930 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 4931 if (str == NULL) { 4932 xmlRegFreeAtom(atom); 4933 return(NULL); 4934 } 4935 memcpy(&str[0], token, lenp); 4936 str[lenp] = '|'; 4937 memcpy(&str[lenp + 1], token2, lenn); 4938 str[lenn + lenp + 1] = 0; 4939 4940 atom->valuep = str; 4941 } 4942 snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep); 4943 err_msg[199] = 0; 4944 atom->valuep2 = xmlStrdup(err_msg); 4945 4946 if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 4947 xmlRegFreeAtom(atom); 4948 return(NULL); 4949 } 4950 if (to == NULL) 4951 return(am->state); 4952 return(to); 4953} 4954 4955/** 4956 * xmlAutomataNewCountTrans2: 4957 * @am: an automata 4958 * @from: the starting point of the transition 4959 * @to: the target point of the transition or NULL 4960 * @token: the input string associated to that transition 4961 * @token2: the second input string associated to that transition 4962 * @min: the minimum successive occurences of token 4963 * @max: the maximum successive occurences of token 4964 * @data: data associated to the transition 4965 * 4966 * If @to is NULL, this creates first a new target state in the automata 4967 * and then adds a transition from the @from state to the target state 4968 * activated by a succession of input of value @token and @token2 and 4969 * whose number is between @min and @max 4970 * 4971 * Returns the target state or NULL in case of error 4972 */ 4973xmlAutomataStatePtr 4974xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 4975 xmlAutomataStatePtr to, const xmlChar *token, 4976 const xmlChar *token2, 4977 int min, int max, void *data) { 4978 xmlRegAtomPtr atom; 4979 int counter; 4980 4981 if ((am == NULL) || (from == NULL) || (token == NULL)) 4982 return(NULL); 4983 if (min < 0) 4984 return(NULL); 4985 if ((max < min) || (max < 1)) 4986 return(NULL); 4987 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 4988 if (atom == NULL) 4989 return(NULL); 4990 if ((token2 == NULL) || (*token2 == 0)) { 4991 atom->valuep = xmlStrdup(token); 4992 } else { 4993 int lenn, lenp; 4994 xmlChar *str; 4995 4996 lenn = strlen((char *) token2); 4997 lenp = strlen((char *) token); 4998 4999 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 5000 if (str == NULL) { 5001 xmlRegFreeAtom(atom); 5002 return(NULL); 5003 } 5004 memcpy(&str[0], token, lenp); 5005 str[lenp] = '|'; 5006 memcpy(&str[lenp + 1], token2, lenn); 5007 str[lenn + lenp + 1] = 0; 5008 5009 atom->valuep = str; 5010 } 5011 atom->data = data; 5012 if (min == 0) 5013 atom->min = 1; 5014 else 5015 atom->min = min; 5016 atom->max = max; 5017 5018 /* 5019 * associate a counter to the transition. 5020 */ 5021 counter = xmlRegGetCounter(am); 5022 am->counters[counter].min = min; 5023 am->counters[counter].max = max; 5024 5025 /* xmlFAGenerateTransitions(am, from, to, atom); */ 5026 if (to == NULL) { 5027 to = xmlRegNewState(am); 5028 xmlRegStatePush(am, to); 5029 } 5030 xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 5031 xmlRegAtomPush(am, atom); 5032 am->state = to; 5033 5034 if (to == NULL) 5035 to = am->state; 5036 if (to == NULL) 5037 return(NULL); 5038 if (min == 0) 5039 xmlFAGenerateEpsilonTransition(am, from, to); 5040 return(to); 5041} 5042 5043/** 5044 * xmlAutomataNewCountTrans: 5045 * @am: an automata 5046 * @from: the starting point of the transition 5047 * @to: the target point of the transition or NULL 5048 * @token: the input string associated to that transition 5049 * @min: the minimum successive occurences of token 5050 * @max: the maximum successive occurences of token 5051 * @data: data associated to the transition 5052 * 5053 * If @to is NULL, this creates first a new target state in the automata 5054 * and then adds a transition from the @from state to the target state 5055 * activated by a succession of input of value @token and whose number 5056 * is between @min and @max 5057 * 5058 * Returns the target state or NULL in case of error 5059 */ 5060xmlAutomataStatePtr 5061xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5062 xmlAutomataStatePtr to, const xmlChar *token, 5063 int min, int max, void *data) { 5064 xmlRegAtomPtr atom; 5065 int counter; 5066 5067 if ((am == NULL) || (from == NULL) || (token == NULL)) 5068 return(NULL); 5069 if (min < 0) 5070 return(NULL); 5071 if ((max < min) || (max < 1)) 5072 return(NULL); 5073 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 5074 if (atom == NULL) 5075 return(NULL); 5076 atom->valuep = xmlStrdup(token); 5077 atom->data = data; 5078 if (min == 0) 5079 atom->min = 1; 5080 else 5081 atom->min = min; 5082 atom->max = max; 5083 5084 /* 5085 * associate a counter to the transition. 5086 */ 5087 counter = xmlRegGetCounter(am); 5088 am->counters[counter].min = min; 5089 am->counters[counter].max = max; 5090 5091 /* xmlFAGenerateTransitions(am, from, to, atom); */ 5092 if (to == NULL) { 5093 to = xmlRegNewState(am); 5094 xmlRegStatePush(am, to); 5095 } 5096 xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 5097 xmlRegAtomPush(am, atom); 5098 am->state = to; 5099 5100 if (to == NULL) 5101 to = am->state; 5102 if (to == NULL) 5103 return(NULL); 5104 if (min == 0) 5105 xmlFAGenerateEpsilonTransition(am, from, to); 5106 return(to); 5107} 5108 5109/** 5110 * xmlAutomataNewOnceTrans2: 5111 * @am: an automata 5112 * @from: the starting point of the transition 5113 * @to: the target point of the transition or NULL 5114 * @token: the input string associated to that transition 5115 * @token2: the second input string associated to that transition 5116 * @min: the minimum successive occurences of token 5117 * @max: the maximum successive occurences of token 5118 * @data: data associated to the transition 5119 * 5120 * If @to is NULL, this creates first a new target state in the automata 5121 * and then adds a transition from the @from state to the target state 5122 * activated by a succession of input of value @token and @token2 and whose 5123 * number is between @min and @max, moreover that transition can only be 5124 * crossed once. 5125 * 5126 * Returns the target state or NULL in case of error 5127 */ 5128xmlAutomataStatePtr 5129xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 5130 xmlAutomataStatePtr to, const xmlChar *token, 5131 const xmlChar *token2, 5132 int min, int max, void *data) { 5133 xmlRegAtomPtr atom; 5134 int counter; 5135 5136 if ((am == NULL) || (from == NULL) || (token == NULL)) 5137 return(NULL); 5138 if (min < 1) 5139 return(NULL); 5140 if ((max < min) || (max < 1)) 5141 return(NULL); 5142 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 5143 if (atom == NULL) 5144 return(NULL); 5145 if ((token2 == NULL) || (*token2 == 0)) { 5146 atom->valuep = xmlStrdup(token); 5147 } else { 5148 int lenn, lenp; 5149 xmlChar *str; 5150 5151 lenn = strlen((char *) token2); 5152 lenp = strlen((char *) token); 5153 5154 str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 5155 if (str == NULL) { 5156 xmlRegFreeAtom(atom); 5157 return(NULL); 5158 } 5159 memcpy(&str[0], token, lenp); 5160 str[lenp] = '|'; 5161 memcpy(&str[lenp + 1], token2, lenn); 5162 str[lenn + lenp + 1] = 0; 5163 5164 atom->valuep = str; 5165 } 5166 atom->data = data; 5167 atom->quant = XML_REGEXP_QUANT_ONCEONLY; 5168 if (min == 0) 5169 atom->min = 1; 5170 else 5171 atom->min = min; 5172 atom->max = max; 5173 /* 5174 * associate a counter to the transition. 5175 */ 5176 counter = xmlRegGetCounter(am); 5177 am->counters[counter].min = 1; 5178 am->counters[counter].max = 1; 5179 5180 /* xmlFAGenerateTransitions(am, from, to, atom); */ 5181 if (to == NULL) { 5182 to = xmlRegNewState(am); 5183 xmlRegStatePush(am, to); 5184 } 5185 xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 5186 xmlRegAtomPush(am, atom); 5187 am->state = to; 5188 return(to); 5189} 5190 5191 5192 5193/** 5194 * xmlAutomataNewOnceTrans: 5195 * @am: an automata 5196 * @from: the starting point of the transition 5197 * @to: the target point of the transition or NULL 5198 * @token: the input string associated to that transition 5199 * @min: the minimum successive occurences of token 5200 * @max: the maximum successive occurences of token 5201 * @data: data associated to the transition 5202 * 5203 * If @to is NULL, this creates first a new target state in the automata 5204 * and then adds a transition from the @from state to the target state 5205 * activated by a succession of input of value @token and whose number 5206 * is between @min and @max, moreover that transition can only be crossed 5207 * once. 5208 * 5209 * Returns the target state or NULL in case of error 5210 */ 5211xmlAutomataStatePtr 5212xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5213 xmlAutomataStatePtr to, const xmlChar *token, 5214 int min, int max, void *data) { 5215 xmlRegAtomPtr atom; 5216 int counter; 5217 5218 if ((am == NULL) || (from == NULL) || (token == NULL)) 5219 return(NULL); 5220 if (min < 1) 5221 return(NULL); 5222 if ((max < min) || (max < 1)) 5223 return(NULL); 5224 atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 5225 if (atom == NULL) 5226 return(NULL); 5227 atom->valuep = xmlStrdup(token); 5228 atom->data = data; 5229 atom->quant = XML_REGEXP_QUANT_ONCEONLY; 5230 if (min == 0) 5231 atom->min = 1; 5232 else 5233 atom->min = min; 5234 atom->max = max; 5235 /* 5236 * associate a counter to the transition. 5237 */ 5238 counter = xmlRegGetCounter(am); 5239 am->counters[counter].min = 1; 5240 am->counters[counter].max = 1; 5241 5242 /* xmlFAGenerateTransitions(am, from, to, atom); */ 5243 if (to == NULL) { 5244 to = xmlRegNewState(am); 5245 xmlRegStatePush(am, to); 5246 } 5247 xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 5248 xmlRegAtomPush(am, atom); 5249 am->state = to; 5250 return(to); 5251} 5252 5253/** 5254 * xmlAutomataNewState: 5255 * @am: an automata 5256 * 5257 * Create a new disconnected state in the automata 5258 * 5259 * Returns the new state or NULL in case of error 5260 */ 5261xmlAutomataStatePtr 5262xmlAutomataNewState(xmlAutomataPtr am) { 5263 xmlAutomataStatePtr to; 5264 5265 if (am == NULL) 5266 return(NULL); 5267 to = xmlRegNewState(am); 5268 xmlRegStatePush(am, to); 5269 return(to); 5270} 5271 5272/** 5273 * xmlAutomataNewEpsilon: 5274 * @am: an automata 5275 * @from: the starting point of the transition 5276 * @to: the target point of the transition or NULL 5277 * 5278 * If @to is NULL, this creates first a new target state in the automata 5279 * and then adds an epsilon transition from the @from state to the 5280 * target state 5281 * 5282 * Returns the target state or NULL in case of error 5283 */ 5284xmlAutomataStatePtr 5285xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, 5286 xmlAutomataStatePtr to) { 5287 if ((am == NULL) || (from == NULL)) 5288 return(NULL); 5289 xmlFAGenerateEpsilonTransition(am, from, to); 5290 if (to == NULL) 5291 return(am->state); 5292 return(to); 5293} 5294 5295/** 5296 * xmlAutomataNewAllTrans: 5297 * @am: an automata 5298 * @from: the starting point of the transition 5299 * @to: the target point of the transition or NULL 5300 * @lax: allow to transition if not all all transitions have been activated 5301 * 5302 * If @to is NULL, this creates first a new target state in the automata 5303 * and then adds a an ALL transition from the @from state to the 5304 * target state. That transition is an epsilon transition allowed only when 5305 * all transitions from the @from node have been activated. 5306 * 5307 * Returns the target state or NULL in case of error 5308 */ 5309xmlAutomataStatePtr 5310xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5311 xmlAutomataStatePtr to, int lax) { 5312 if ((am == NULL) || (from == NULL)) 5313 return(NULL); 5314 xmlFAGenerateAllTransition(am, from, to, lax); 5315 if (to == NULL) 5316 return(am->state); 5317 return(to); 5318} 5319 5320/** 5321 * xmlAutomataNewCounter: 5322 * @am: an automata 5323 * @min: the minimal value on the counter 5324 * @max: the maximal value on the counter 5325 * 5326 * Create a new counter 5327 * 5328 * Returns the counter number or -1 in case of error 5329 */ 5330int 5331xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { 5332 int ret; 5333 5334 if (am == NULL) 5335 return(-1); 5336 5337 ret = xmlRegGetCounter(am); 5338 if (ret < 0) 5339 return(-1); 5340 am->counters[ret].min = min; 5341 am->counters[ret].max = max; 5342 return(ret); 5343} 5344 5345/** 5346 * xmlAutomataNewCountedTrans: 5347 * @am: an automata 5348 * @from: the starting point of the transition 5349 * @to: the target point of the transition or NULL 5350 * @counter: the counter associated to that transition 5351 * 5352 * If @to is NULL, this creates first a new target state in the automata 5353 * and then adds an epsilon transition from the @from state to the target state 5354 * which will increment the counter provided 5355 * 5356 * Returns the target state or NULL in case of error 5357 */ 5358xmlAutomataStatePtr 5359xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5360 xmlAutomataStatePtr to, int counter) { 5361 if ((am == NULL) || (from == NULL) || (counter < 0)) 5362 return(NULL); 5363 xmlFAGenerateCountedEpsilonTransition(am, from, to, counter); 5364 if (to == NULL) 5365 return(am->state); 5366 return(to); 5367} 5368 5369/** 5370 * xmlAutomataNewCounterTrans: 5371 * @am: an automata 5372 * @from: the starting point of the transition 5373 * @to: the target point of the transition or NULL 5374 * @counter: the counter associated to that transition 5375 * 5376 * If @to is NULL, this creates first a new target state in the automata 5377 * and then adds an epsilon transition from the @from state to the target state 5378 * which will be allowed only if the counter is within the right range. 5379 * 5380 * Returns the target state or NULL in case of error 5381 */ 5382xmlAutomataStatePtr 5383xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5384 xmlAutomataStatePtr to, int counter) { 5385 if ((am == NULL) || (from == NULL) || (counter < 0)) 5386 return(NULL); 5387 xmlFAGenerateCountedTransition(am, from, to, counter); 5388 if (to == NULL) 5389 return(am->state); 5390 return(to); 5391} 5392 5393/** 5394 * xmlAutomataCompile: 5395 * @am: an automata 5396 * 5397 * Compile the automata into a Reg Exp ready for being executed. 5398 * The automata should be free after this point. 5399 * 5400 * Returns the compiled regexp or NULL in case of error 5401 */ 5402xmlRegexpPtr 5403xmlAutomataCompile(xmlAutomataPtr am) { 5404 xmlRegexpPtr ret; 5405 5406 if ((am == NULL) || (am->error != 0)) return(NULL); 5407 xmlFAEliminateEpsilonTransitions(am); 5408 /* xmlFAComputesDeterminism(am); */ 5409 ret = xmlRegEpxFromParse(am); 5410 5411 return(ret); 5412} 5413 5414/** 5415 * xmlAutomataIsDeterminist: 5416 * @am: an automata 5417 * 5418 * Checks if an automata is determinist. 5419 * 5420 * Returns 1 if true, 0 if not, and -1 in case of error 5421 */ 5422int 5423xmlAutomataIsDeterminist(xmlAutomataPtr am) { 5424 int ret; 5425 5426 if (am == NULL) 5427 return(-1); 5428 5429 ret = xmlFAComputesDeterminism(am); 5430 return(ret); 5431} 5432#endif /* LIBXML_AUTOMATA_ENABLED */ 5433#define bottom_xmlregexp 5434#include "elfgcchack.h" 5435#endif /* LIBXML_REGEXP_ENABLED */ 5436