1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications these include:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22/* #define DEBUG_ERR */
23
24#include <stdio.h>
25#include <string.h>
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <libxml/tree.h>
31#include <libxml/parserInternals.h>
32#include <libxml/xmlregexp.h>
33#include <libxml/xmlautomata.h>
34#include <libxml/xmlunicode.h>
35
36#ifndef INT_MAX
37#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38#endif
39
40/* #define DEBUG_REGEXP_GRAPH */
41/* #define DEBUG_REGEXP_EXEC */
42/* #define DEBUG_PUSH */
43/* #define DEBUG_COMPACTION */
44
45#define MAX_PUSH 10000000
46
47#define ERROR(str)							\
48    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
49    xmlRegexpErrCompile(ctxt, str);
50#define NEXT ctxt->cur++
51#define CUR (*(ctxt->cur))
52#define NXT(index) (ctxt->cur[index])
53
54#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
55#define NEXTL(l) ctxt->cur += l;
56#define XML_REG_STRING_SEPARATOR '|'
57/*
58 * Need PREV to check on a '-' within a Character Group. May only be used
59 * when it's guaranteed that cur is not at the beginning of ctxt->string!
60 */
61#define PREV (ctxt->cur[-1])
62
63/**
64 * TODO:
65 *
66 * macro to flag unimplemented blocks
67 */
68#define TODO 								\
69    xmlGenericError(xmlGenericErrorContext,				\
70	    "Unimplemented block at %s:%d\n",				\
71            __FILE__, __LINE__);
72
73/************************************************************************
74 * 									*
75 * 			Datatypes and structures			*
76 * 									*
77 ************************************************************************/
78
79/*
80 * Note: the order of the enums below is significant, do not shuffle
81 */
82typedef enum {
83    XML_REGEXP_EPSILON = 1,
84    XML_REGEXP_CHARVAL,
85    XML_REGEXP_RANGES,
86    XML_REGEXP_SUBREG,  /* used for () sub regexps */
87    XML_REGEXP_STRING,
88    XML_REGEXP_ANYCHAR, /* . */
89    XML_REGEXP_ANYSPACE, /* \s */
90    XML_REGEXP_NOTSPACE, /* \S */
91    XML_REGEXP_INITNAME, /* \l */
92    XML_REGEXP_NOTINITNAME, /* \L */
93    XML_REGEXP_NAMECHAR, /* \c */
94    XML_REGEXP_NOTNAMECHAR, /* \C */
95    XML_REGEXP_DECIMAL, /* \d */
96    XML_REGEXP_NOTDECIMAL, /* \D */
97    XML_REGEXP_REALCHAR, /* \w */
98    XML_REGEXP_NOTREALCHAR, /* \W */
99    XML_REGEXP_LETTER = 100,
100    XML_REGEXP_LETTER_UPPERCASE,
101    XML_REGEXP_LETTER_LOWERCASE,
102    XML_REGEXP_LETTER_TITLECASE,
103    XML_REGEXP_LETTER_MODIFIER,
104    XML_REGEXP_LETTER_OTHERS,
105    XML_REGEXP_MARK,
106    XML_REGEXP_MARK_NONSPACING,
107    XML_REGEXP_MARK_SPACECOMBINING,
108    XML_REGEXP_MARK_ENCLOSING,
109    XML_REGEXP_NUMBER,
110    XML_REGEXP_NUMBER_DECIMAL,
111    XML_REGEXP_NUMBER_LETTER,
112    XML_REGEXP_NUMBER_OTHERS,
113    XML_REGEXP_PUNCT,
114    XML_REGEXP_PUNCT_CONNECTOR,
115    XML_REGEXP_PUNCT_DASH,
116    XML_REGEXP_PUNCT_OPEN,
117    XML_REGEXP_PUNCT_CLOSE,
118    XML_REGEXP_PUNCT_INITQUOTE,
119    XML_REGEXP_PUNCT_FINQUOTE,
120    XML_REGEXP_PUNCT_OTHERS,
121    XML_REGEXP_SEPAR,
122    XML_REGEXP_SEPAR_SPACE,
123    XML_REGEXP_SEPAR_LINE,
124    XML_REGEXP_SEPAR_PARA,
125    XML_REGEXP_SYMBOL,
126    XML_REGEXP_SYMBOL_MATH,
127    XML_REGEXP_SYMBOL_CURRENCY,
128    XML_REGEXP_SYMBOL_MODIFIER,
129    XML_REGEXP_SYMBOL_OTHERS,
130    XML_REGEXP_OTHER,
131    XML_REGEXP_OTHER_CONTROL,
132    XML_REGEXP_OTHER_FORMAT,
133    XML_REGEXP_OTHER_PRIVATE,
134    XML_REGEXP_OTHER_NA,
135    XML_REGEXP_BLOCK_NAME
136} xmlRegAtomType;
137
138typedef enum {
139    XML_REGEXP_QUANT_EPSILON = 1,
140    XML_REGEXP_QUANT_ONCE,
141    XML_REGEXP_QUANT_OPT,
142    XML_REGEXP_QUANT_MULT,
143    XML_REGEXP_QUANT_PLUS,
144    XML_REGEXP_QUANT_ONCEONLY,
145    XML_REGEXP_QUANT_ALL,
146    XML_REGEXP_QUANT_RANGE
147} xmlRegQuantType;
148
149typedef enum {
150    XML_REGEXP_START_STATE = 1,
151    XML_REGEXP_FINAL_STATE,
152    XML_REGEXP_TRANS_STATE,
153    XML_REGEXP_SINK_STATE,
154    XML_REGEXP_UNREACH_STATE
155} xmlRegStateType;
156
157typedef enum {
158    XML_REGEXP_MARK_NORMAL = 0,
159    XML_REGEXP_MARK_START,
160    XML_REGEXP_MARK_VISITED
161} xmlRegMarkedType;
162
163typedef struct _xmlRegRange xmlRegRange;
164typedef xmlRegRange *xmlRegRangePtr;
165
166struct _xmlRegRange {
167    int neg;		/* 0 normal, 1 not, 2 exclude */
168    xmlRegAtomType type;
169    int start;
170    int end;
171    xmlChar *blockName;
172};
173
174typedef struct _xmlRegAtom xmlRegAtom;
175typedef xmlRegAtom *xmlRegAtomPtr;
176
177typedef struct _xmlAutomataState xmlRegState;
178typedef xmlRegState *xmlRegStatePtr;
179
180struct _xmlRegAtom {
181    int no;
182    xmlRegAtomType type;
183    xmlRegQuantType quant;
184    int min;
185    int max;
186
187    void *valuep;
188    void *valuep2;
189    int neg;
190    int codepoint;
191    xmlRegStatePtr start;
192    xmlRegStatePtr start0;
193    xmlRegStatePtr stop;
194    int maxRanges;
195    int nbRanges;
196    xmlRegRangePtr *ranges;
197    void *data;
198};
199
200typedef struct _xmlRegCounter xmlRegCounter;
201typedef xmlRegCounter *xmlRegCounterPtr;
202
203struct _xmlRegCounter {
204    int min;
205    int max;
206};
207
208typedef struct _xmlRegTrans xmlRegTrans;
209typedef xmlRegTrans *xmlRegTransPtr;
210
211struct _xmlRegTrans {
212    xmlRegAtomPtr atom;
213    int to;
214    int counter;
215    int count;
216    int nd;
217};
218
219struct _xmlAutomataState {
220    xmlRegStateType type;
221    xmlRegMarkedType mark;
222    xmlRegMarkedType reached;
223    int no;
224    int maxTrans;
225    int nbTrans;
226    xmlRegTrans *trans;
227    /*  knowing states ponting to us can speed things up */
228    int maxTransTo;
229    int nbTransTo;
230    int *transTo;
231};
232
233typedef struct _xmlAutomata xmlRegParserCtxt;
234typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
235
236#define AM_AUTOMATA_RNG 1
237
238struct _xmlAutomata {
239    xmlChar *string;
240    xmlChar *cur;
241
242    int error;
243    int neg;
244
245    xmlRegStatePtr start;
246    xmlRegStatePtr end;
247    xmlRegStatePtr state;
248
249    xmlRegAtomPtr atom;
250
251    int maxAtoms;
252    int nbAtoms;
253    xmlRegAtomPtr *atoms;
254
255    int maxStates;
256    int nbStates;
257    xmlRegStatePtr *states;
258
259    int maxCounters;
260    int nbCounters;
261    xmlRegCounter *counters;
262
263    int determinist;
264    int negs;
265    int flags;
266};
267
268struct _xmlRegexp {
269    xmlChar *string;
270    int nbStates;
271    xmlRegStatePtr *states;
272    int nbAtoms;
273    xmlRegAtomPtr *atoms;
274    int nbCounters;
275    xmlRegCounter *counters;
276    int determinist;
277    int flags;
278    /*
279     * That's the compact form for determinists automatas
280     */
281    int nbstates;
282    int *compact;
283    void **transdata;
284    int nbstrings;
285    xmlChar **stringMap;
286};
287
288typedef struct _xmlRegExecRollback xmlRegExecRollback;
289typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
290
291struct _xmlRegExecRollback {
292    xmlRegStatePtr state;/* the current state */
293    int index;		/* the index in the input stack */
294    int nextbranch;	/* the next transition to explore in that state */
295    int *counts;	/* save the automata state if it has some */
296};
297
298typedef struct _xmlRegInputToken xmlRegInputToken;
299typedef xmlRegInputToken *xmlRegInputTokenPtr;
300
301struct _xmlRegInputToken {
302    xmlChar *value;
303    void *data;
304};
305
306struct _xmlRegExecCtxt {
307    int status;		/* execution status != 0 indicate an error */
308    int determinist;	/* did we find an indeterministic behaviour */
309    xmlRegexpPtr comp;	/* the compiled regexp */
310    xmlRegExecCallbacks callback;
311    void *data;
312
313    xmlRegStatePtr state;/* the current state */
314    int transno;	/* the current transition on that state */
315    int transcount;	/* the number of chars in char counted transitions */
316
317    /*
318     * A stack of rollback states
319     */
320    int maxRollbacks;
321    int nbRollbacks;
322    xmlRegExecRollback *rollbacks;
323
324    /*
325     * The state of the automata if any
326     */
327    int *counts;
328
329    /*
330     * The input stack
331     */
332    int inputStackMax;
333    int inputStackNr;
334    int index;
335    int *charStack;
336    const xmlChar *inputString; /* when operating on characters */
337    xmlRegInputTokenPtr inputStack;/* when operating on strings */
338
339    /*
340     * error handling
341     */
342    int errStateNo;		/* the error state number */
343    xmlRegStatePtr errState;    /* the error state */
344    xmlChar *errString;		/* the string raising the error */
345    int *errCounts;		/* counters at the error state */
346    int nbPush;
347};
348
349#define REGEXP_ALL_COUNTER	0x123456
350#define REGEXP_ALL_LAX_COUNTER	0x123457
351
352static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
353static void xmlRegFreeState(xmlRegStatePtr state);
354static void xmlRegFreeAtom(xmlRegAtomPtr atom);
355static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
356static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
357static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
358                  int neg, int start, int end, const xmlChar *blockName);
359
360void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
361
362/************************************************************************
363 *									*
364 * 		Regexp memory error handler				*
365 *									*
366 ************************************************************************/
367/**
368 * xmlRegexpErrMemory:
369 * @extra:  extra information
370 *
371 * Handle an out of memory condition
372 */
373static void
374xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
375{
376    const char *regexp = NULL;
377    if (ctxt != NULL) {
378        regexp = (const char *) ctxt->string;
379	ctxt->error = XML_ERR_NO_MEMORY;
380    }
381    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
382		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
383		    regexp, NULL, 0, 0,
384		    "Memory allocation failed : %s\n", extra);
385}
386
387/**
388 * xmlRegexpErrCompile:
389 * @extra:  extra information
390 *
391 * Handle a compilation failure
392 */
393static void
394xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
395{
396    const char *regexp = NULL;
397    int idx = 0;
398
399    if (ctxt != NULL) {
400        regexp = (const char *) ctxt->string;
401	idx = ctxt->cur - ctxt->string;
402	ctxt->error = XML_REGEXP_COMPILE_ERROR;
403    }
404    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
405		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
406		    regexp, NULL, idx, 0,
407		    "failed to compile: %s\n", extra);
408}
409
410/************************************************************************
411 * 									*
412 * 			Allocation/Deallocation				*
413 * 									*
414 ************************************************************************/
415
416static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
417/**
418 * xmlRegEpxFromParse:
419 * @ctxt:  the parser context used to build it
420 *
421 * Allocate a new regexp and fill it with the result from the parser
422 *
423 * Returns the new regexp or NULL in case of error
424 */
425static xmlRegexpPtr
426xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
427    xmlRegexpPtr ret;
428
429    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
430    if (ret == NULL) {
431	xmlRegexpErrMemory(ctxt, "compiling regexp");
432	return(NULL);
433    }
434    memset(ret, 0, sizeof(xmlRegexp));
435    ret->string = ctxt->string;
436    ret->nbStates = ctxt->nbStates;
437    ret->states = ctxt->states;
438    ret->nbAtoms = ctxt->nbAtoms;
439    ret->atoms = ctxt->atoms;
440    ret->nbCounters = ctxt->nbCounters;
441    ret->counters = ctxt->counters;
442    ret->determinist = ctxt->determinist;
443    ret->flags = ctxt->flags;
444    if (ret->determinist == -1) {
445        xmlRegexpIsDeterminist(ret);
446    }
447
448    if ((ret->determinist != 0) &&
449	(ret->nbCounters == 0) &&
450	(ctxt->negs == 0) &&
451	(ret->atoms != NULL) &&
452	(ret->atoms[0] != NULL) &&
453	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
454	int i, j, nbstates = 0, nbatoms = 0;
455	int *stateRemap;
456	int *stringRemap;
457	int *transitions;
458	void **transdata;
459	xmlChar **stringMap;
460        xmlChar *value;
461
462	/*
463	 * Switch to a compact representation
464	 * 1/ counting the effective number of states left
465	 * 2/ counting the unique number of atoms, and check that
466	 *    they are all of the string type
467	 * 3/ build a table state x atom for the transitions
468	 */
469
470	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
471	if (stateRemap == NULL) {
472	    xmlRegexpErrMemory(ctxt, "compiling regexp");
473	    xmlFree(ret);
474	    return(NULL);
475	}
476	for (i = 0;i < ret->nbStates;i++) {
477	    if (ret->states[i] != NULL) {
478		stateRemap[i] = nbstates;
479		nbstates++;
480	    } else {
481		stateRemap[i] = -1;
482	    }
483	}
484#ifdef DEBUG_COMPACTION
485	printf("Final: %d states\n", nbstates);
486#endif
487	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
488	if (stringMap == NULL) {
489	    xmlRegexpErrMemory(ctxt, "compiling regexp");
490	    xmlFree(stateRemap);
491	    xmlFree(ret);
492	    return(NULL);
493	}
494	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
495	if (stringRemap == NULL) {
496	    xmlRegexpErrMemory(ctxt, "compiling regexp");
497	    xmlFree(stringMap);
498	    xmlFree(stateRemap);
499	    xmlFree(ret);
500	    return(NULL);
501	}
502	for (i = 0;i < ret->nbAtoms;i++) {
503	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
504		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
505		value = ret->atoms[i]->valuep;
506                for (j = 0;j < nbatoms;j++) {
507		    if (xmlStrEqual(stringMap[j], value)) {
508			stringRemap[i] = j;
509			break;
510		    }
511		}
512		if (j >= nbatoms) {
513		    stringRemap[i] = nbatoms;
514		    stringMap[nbatoms] = xmlStrdup(value);
515		    if (stringMap[nbatoms] == NULL) {
516			for (i = 0;i < nbatoms;i++)
517			    xmlFree(stringMap[i]);
518			xmlFree(stringRemap);
519			xmlFree(stringMap);
520			xmlFree(stateRemap);
521			xmlFree(ret);
522			return(NULL);
523		    }
524		    nbatoms++;
525		}
526	    } else {
527		xmlFree(stateRemap);
528		xmlFree(stringRemap);
529		for (i = 0;i < nbatoms;i++)
530		    xmlFree(stringMap[i]);
531		xmlFree(stringMap);
532		xmlFree(ret);
533		return(NULL);
534	    }
535	}
536#ifdef DEBUG_COMPACTION
537	printf("Final: %d atoms\n", nbatoms);
538#endif
539	transitions = (int *) xmlMalloc((nbstates + 1) *
540	                                (nbatoms + 1) * sizeof(int));
541	if (transitions == NULL) {
542	    xmlFree(stateRemap);
543	    xmlFree(stringRemap);
544	    xmlFree(stringMap);
545	    xmlFree(ret);
546	    return(NULL);
547	}
548	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
549
550	/*
551	 * Allocate the transition table. The first entry for each
552	 * state corresponds to the state type.
553	 */
554	transdata = NULL;
555
556	for (i = 0;i < ret->nbStates;i++) {
557	    int stateno, atomno, targetno, prev;
558	    xmlRegStatePtr state;
559	    xmlRegTransPtr trans;
560
561	    stateno = stateRemap[i];
562	    if (stateno == -1)
563		continue;
564	    state = ret->states[i];
565
566	    transitions[stateno * (nbatoms + 1)] = state->type;
567
568	    for (j = 0;j < state->nbTrans;j++) {
569		trans = &(state->trans[j]);
570		if ((trans->to == -1) || (trans->atom == NULL))
571		    continue;
572                atomno = stringRemap[trans->atom->no];
573		if ((trans->atom->data != NULL) && (transdata == NULL)) {
574		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
575			                            sizeof(void *));
576		    if (transdata != NULL)
577			memset(transdata, 0,
578			       nbstates * nbatoms * sizeof(void *));
579		    else {
580			xmlRegexpErrMemory(ctxt, "compiling regexp");
581			break;
582		    }
583		}
584		targetno = stateRemap[trans->to];
585		/*
586		 * if the same atom can generate transitions to 2 different
587		 * states then it means the automata is not determinist and
588		 * the compact form can't be used !
589		 */
590		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
591		if (prev != 0) {
592		    if (prev != targetno + 1) {
593			ret->determinist = 0;
594#ifdef DEBUG_COMPACTION
595			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
596			       i, j, trans->atom->no, trans->to, atomno, targetno);
597			printf("       previous to is %d\n", prev);
598#endif
599			if (transdata != NULL)
600			    xmlFree(transdata);
601			xmlFree(transitions);
602			xmlFree(stateRemap);
603			xmlFree(stringRemap);
604			for (i = 0;i < nbatoms;i++)
605			    xmlFree(stringMap[i]);
606			xmlFree(stringMap);
607			goto not_determ;
608		    }
609		} else {
610#if 0
611		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
612			   i, j, trans->atom->no, trans->to, atomno, targetno);
613#endif
614		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
615			targetno + 1; /* to avoid 0 */
616		    if (transdata != NULL)
617			transdata[stateno * nbatoms + atomno] =
618			    trans->atom->data;
619		}
620	    }
621	}
622	ret->determinist = 1;
623#ifdef DEBUG_COMPACTION
624	/*
625	 * Debug
626	 */
627	for (i = 0;i < nbstates;i++) {
628	    for (j = 0;j < nbatoms + 1;j++) {
629                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
630	    }
631	    printf("\n");
632	}
633	printf("\n");
634#endif
635	/*
636	 * Cleanup of the old data
637	 */
638	if (ret->states != NULL) {
639	    for (i = 0;i < ret->nbStates;i++)
640		xmlRegFreeState(ret->states[i]);
641	    xmlFree(ret->states);
642	}
643	ret->states = NULL;
644	ret->nbStates = 0;
645	if (ret->atoms != NULL) {
646	    for (i = 0;i < ret->nbAtoms;i++)
647		xmlRegFreeAtom(ret->atoms[i]);
648	    xmlFree(ret->atoms);
649	}
650	ret->atoms = NULL;
651	ret->nbAtoms = 0;
652
653	ret->compact = transitions;
654	ret->transdata = transdata;
655	ret->stringMap = stringMap;
656	ret->nbstrings = nbatoms;
657	ret->nbstates = nbstates;
658	xmlFree(stateRemap);
659	xmlFree(stringRemap);
660    }
661not_determ:
662    ctxt->string = NULL;
663    ctxt->nbStates = 0;
664    ctxt->states = NULL;
665    ctxt->nbAtoms = 0;
666    ctxt->atoms = NULL;
667    ctxt->nbCounters = 0;
668    ctxt->counters = NULL;
669    return(ret);
670}
671
672/**
673 * xmlRegNewParserCtxt:
674 * @string:  the string to parse
675 *
676 * Allocate a new regexp parser context
677 *
678 * Returns the new context or NULL in case of error
679 */
680static xmlRegParserCtxtPtr
681xmlRegNewParserCtxt(const xmlChar *string) {
682    xmlRegParserCtxtPtr ret;
683
684    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
685    if (ret == NULL)
686	return(NULL);
687    memset(ret, 0, sizeof(xmlRegParserCtxt));
688    if (string != NULL)
689	ret->string = xmlStrdup(string);
690    ret->cur = ret->string;
691    ret->neg = 0;
692    ret->negs = 0;
693    ret->error = 0;
694    ret->determinist = -1;
695    return(ret);
696}
697
698/**
699 * xmlRegNewRange:
700 * @ctxt:  the regexp parser context
701 * @neg:  is that negative
702 * @type:  the type of range
703 * @start:  the start codepoint
704 * @end:  the end codepoint
705 *
706 * Allocate a new regexp range
707 *
708 * Returns the new range or NULL in case of error
709 */
710static xmlRegRangePtr
711xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
712	       int neg, xmlRegAtomType type, int start, int end) {
713    xmlRegRangePtr ret;
714
715    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
716    if (ret == NULL) {
717	xmlRegexpErrMemory(ctxt, "allocating range");
718	return(NULL);
719    }
720    ret->neg = neg;
721    ret->type = type;
722    ret->start = start;
723    ret->end = end;
724    return(ret);
725}
726
727/**
728 * xmlRegFreeRange:
729 * @range:  the regexp range
730 *
731 * Free a regexp range
732 */
733static void
734xmlRegFreeRange(xmlRegRangePtr range) {
735    if (range == NULL)
736	return;
737
738    if (range->blockName != NULL)
739	xmlFree(range->blockName);
740    xmlFree(range);
741}
742
743/**
744 * xmlRegCopyRange:
745 * @range:  the regexp range
746 *
747 * Copy a regexp range
748 *
749 * Returns the new copy or NULL in case of error.
750 */
751static xmlRegRangePtr
752xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
753    xmlRegRangePtr ret;
754
755    if (range == NULL)
756	return(NULL);
757
758    ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
759                         range->end);
760    if (ret == NULL)
761        return(NULL);
762    if (range->blockName != NULL) {
763	ret->blockName = xmlStrdup(range->blockName);
764	if (ret->blockName == NULL) {
765	    xmlRegexpErrMemory(ctxt, "allocating range");
766	    xmlRegFreeRange(ret);
767	    return(NULL);
768	}
769    }
770    return(ret);
771}
772
773/**
774 * xmlRegNewAtom:
775 * @ctxt:  the regexp parser context
776 * @type:  the type of atom
777 *
778 * Allocate a new atom
779 *
780 * Returns the new atom or NULL in case of error
781 */
782static xmlRegAtomPtr
783xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
784    xmlRegAtomPtr ret;
785
786    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
787    if (ret == NULL) {
788	xmlRegexpErrMemory(ctxt, "allocating atom");
789	return(NULL);
790    }
791    memset(ret, 0, sizeof(xmlRegAtom));
792    ret->type = type;
793    ret->quant = XML_REGEXP_QUANT_ONCE;
794    ret->min = 0;
795    ret->max = 0;
796    return(ret);
797}
798
799/**
800 * xmlRegFreeAtom:
801 * @atom:  the regexp atom
802 *
803 * Free a regexp atom
804 */
805static void
806xmlRegFreeAtom(xmlRegAtomPtr atom) {
807    int i;
808
809    if (atom == NULL)
810	return;
811
812    for (i = 0;i < atom->nbRanges;i++)
813	xmlRegFreeRange(atom->ranges[i]);
814    if (atom->ranges != NULL)
815	xmlFree(atom->ranges);
816    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
817	xmlFree(atom->valuep);
818    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
819	xmlFree(atom->valuep2);
820    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
821	xmlFree(atom->valuep);
822    xmlFree(atom);
823}
824
825/**
826 * xmlRegCopyAtom:
827 * @ctxt:  the regexp parser context
828 * @atom:  the oiginal atom
829 *
830 * Allocate a new regexp range
831 *
832 * Returns the new atom or NULL in case of error
833 */
834static xmlRegAtomPtr
835xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
836    xmlRegAtomPtr ret;
837
838    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
839    if (ret == NULL) {
840	xmlRegexpErrMemory(ctxt, "copying atom");
841	return(NULL);
842    }
843    memset(ret, 0, sizeof(xmlRegAtom));
844    ret->type = atom->type;
845    ret->quant = atom->quant;
846    ret->min = atom->min;
847    ret->max = atom->max;
848    if (atom->nbRanges > 0) {
849        int i;
850
851        ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
852	                                           atom->nbRanges);
853	if (ret->ranges == NULL) {
854	    xmlRegexpErrMemory(ctxt, "copying atom");
855	    goto error;
856	}
857	for (i = 0;i < atom->nbRanges;i++) {
858	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
859	    if (ret->ranges[i] == NULL)
860	        goto error;
861	    ret->nbRanges = i + 1;
862	}
863    }
864    return(ret);
865
866error:
867    xmlRegFreeAtom(ret);
868    return(NULL);
869}
870
871static xmlRegStatePtr
872xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
873    xmlRegStatePtr ret;
874
875    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
876    if (ret == NULL) {
877	xmlRegexpErrMemory(ctxt, "allocating state");
878	return(NULL);
879    }
880    memset(ret, 0, sizeof(xmlRegState));
881    ret->type = XML_REGEXP_TRANS_STATE;
882    ret->mark = XML_REGEXP_MARK_NORMAL;
883    return(ret);
884}
885
886/**
887 * xmlRegFreeState:
888 * @state:  the regexp state
889 *
890 * Free a regexp state
891 */
892static void
893xmlRegFreeState(xmlRegStatePtr state) {
894    if (state == NULL)
895	return;
896
897    if (state->trans != NULL)
898	xmlFree(state->trans);
899    if (state->transTo != NULL)
900	xmlFree(state->transTo);
901    xmlFree(state);
902}
903
904/**
905 * xmlRegFreeParserCtxt:
906 * @ctxt:  the regexp parser context
907 *
908 * Free a regexp parser context
909 */
910static void
911xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
912    int i;
913    if (ctxt == NULL)
914	return;
915
916    if (ctxt->string != NULL)
917	xmlFree(ctxt->string);
918    if (ctxt->states != NULL) {
919	for (i = 0;i < ctxt->nbStates;i++)
920	    xmlRegFreeState(ctxt->states[i]);
921	xmlFree(ctxt->states);
922    }
923    if (ctxt->atoms != NULL) {
924	for (i = 0;i < ctxt->nbAtoms;i++)
925	    xmlRegFreeAtom(ctxt->atoms[i]);
926	xmlFree(ctxt->atoms);
927    }
928    if (ctxt->counters != NULL)
929	xmlFree(ctxt->counters);
930    xmlFree(ctxt);
931}
932
933/************************************************************************
934 * 									*
935 * 			Display of Data structures			*
936 * 									*
937 ************************************************************************/
938
939static void
940xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
941    switch (type) {
942        case XML_REGEXP_EPSILON:
943	    fprintf(output, "epsilon "); break;
944        case XML_REGEXP_CHARVAL:
945	    fprintf(output, "charval "); break;
946        case XML_REGEXP_RANGES:
947	    fprintf(output, "ranges "); break;
948        case XML_REGEXP_SUBREG:
949	    fprintf(output, "subexpr "); break;
950        case XML_REGEXP_STRING:
951	    fprintf(output, "string "); break;
952        case XML_REGEXP_ANYCHAR:
953	    fprintf(output, "anychar "); break;
954        case XML_REGEXP_ANYSPACE:
955	    fprintf(output, "anyspace "); break;
956        case XML_REGEXP_NOTSPACE:
957	    fprintf(output, "notspace "); break;
958        case XML_REGEXP_INITNAME:
959	    fprintf(output, "initname "); break;
960        case XML_REGEXP_NOTINITNAME:
961	    fprintf(output, "notinitname "); break;
962        case XML_REGEXP_NAMECHAR:
963	    fprintf(output, "namechar "); break;
964        case XML_REGEXP_NOTNAMECHAR:
965	    fprintf(output, "notnamechar "); break;
966        case XML_REGEXP_DECIMAL:
967	    fprintf(output, "decimal "); break;
968        case XML_REGEXP_NOTDECIMAL:
969	    fprintf(output, "notdecimal "); break;
970        case XML_REGEXP_REALCHAR:
971	    fprintf(output, "realchar "); break;
972        case XML_REGEXP_NOTREALCHAR:
973	    fprintf(output, "notrealchar "); break;
974        case XML_REGEXP_LETTER:
975            fprintf(output, "LETTER "); break;
976        case XML_REGEXP_LETTER_UPPERCASE:
977            fprintf(output, "LETTER_UPPERCASE "); break;
978        case XML_REGEXP_LETTER_LOWERCASE:
979            fprintf(output, "LETTER_LOWERCASE "); break;
980        case XML_REGEXP_LETTER_TITLECASE:
981            fprintf(output, "LETTER_TITLECASE "); break;
982        case XML_REGEXP_LETTER_MODIFIER:
983            fprintf(output, "LETTER_MODIFIER "); break;
984        case XML_REGEXP_LETTER_OTHERS:
985            fprintf(output, "LETTER_OTHERS "); break;
986        case XML_REGEXP_MARK:
987            fprintf(output, "MARK "); break;
988        case XML_REGEXP_MARK_NONSPACING:
989            fprintf(output, "MARK_NONSPACING "); break;
990        case XML_REGEXP_MARK_SPACECOMBINING:
991            fprintf(output, "MARK_SPACECOMBINING "); break;
992        case XML_REGEXP_MARK_ENCLOSING:
993            fprintf(output, "MARK_ENCLOSING "); break;
994        case XML_REGEXP_NUMBER:
995            fprintf(output, "NUMBER "); break;
996        case XML_REGEXP_NUMBER_DECIMAL:
997            fprintf(output, "NUMBER_DECIMAL "); break;
998        case XML_REGEXP_NUMBER_LETTER:
999            fprintf(output, "NUMBER_LETTER "); break;
1000        case XML_REGEXP_NUMBER_OTHERS:
1001            fprintf(output, "NUMBER_OTHERS "); break;
1002        case XML_REGEXP_PUNCT:
1003            fprintf(output, "PUNCT "); break;
1004        case XML_REGEXP_PUNCT_CONNECTOR:
1005            fprintf(output, "PUNCT_CONNECTOR "); break;
1006        case XML_REGEXP_PUNCT_DASH:
1007            fprintf(output, "PUNCT_DASH "); break;
1008        case XML_REGEXP_PUNCT_OPEN:
1009            fprintf(output, "PUNCT_OPEN "); break;
1010        case XML_REGEXP_PUNCT_CLOSE:
1011            fprintf(output, "PUNCT_CLOSE "); break;
1012        case XML_REGEXP_PUNCT_INITQUOTE:
1013            fprintf(output, "PUNCT_INITQUOTE "); break;
1014        case XML_REGEXP_PUNCT_FINQUOTE:
1015            fprintf(output, "PUNCT_FINQUOTE "); break;
1016        case XML_REGEXP_PUNCT_OTHERS:
1017            fprintf(output, "PUNCT_OTHERS "); break;
1018        case XML_REGEXP_SEPAR:
1019            fprintf(output, "SEPAR "); break;
1020        case XML_REGEXP_SEPAR_SPACE:
1021            fprintf(output, "SEPAR_SPACE "); break;
1022        case XML_REGEXP_SEPAR_LINE:
1023            fprintf(output, "SEPAR_LINE "); break;
1024        case XML_REGEXP_SEPAR_PARA:
1025            fprintf(output, "SEPAR_PARA "); break;
1026        case XML_REGEXP_SYMBOL:
1027            fprintf(output, "SYMBOL "); break;
1028        case XML_REGEXP_SYMBOL_MATH:
1029            fprintf(output, "SYMBOL_MATH "); break;
1030        case XML_REGEXP_SYMBOL_CURRENCY:
1031            fprintf(output, "SYMBOL_CURRENCY "); break;
1032        case XML_REGEXP_SYMBOL_MODIFIER:
1033            fprintf(output, "SYMBOL_MODIFIER "); break;
1034        case XML_REGEXP_SYMBOL_OTHERS:
1035            fprintf(output, "SYMBOL_OTHERS "); break;
1036        case XML_REGEXP_OTHER:
1037            fprintf(output, "OTHER "); break;
1038        case XML_REGEXP_OTHER_CONTROL:
1039            fprintf(output, "OTHER_CONTROL "); break;
1040        case XML_REGEXP_OTHER_FORMAT:
1041            fprintf(output, "OTHER_FORMAT "); break;
1042        case XML_REGEXP_OTHER_PRIVATE:
1043            fprintf(output, "OTHER_PRIVATE "); break;
1044        case XML_REGEXP_OTHER_NA:
1045            fprintf(output, "OTHER_NA "); break;
1046        case XML_REGEXP_BLOCK_NAME:
1047	    fprintf(output, "BLOCK "); break;
1048    }
1049}
1050
1051static void
1052xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
1053    switch (type) {
1054        case XML_REGEXP_QUANT_EPSILON:
1055	    fprintf(output, "epsilon "); break;
1056        case XML_REGEXP_QUANT_ONCE:
1057	    fprintf(output, "once "); break;
1058        case XML_REGEXP_QUANT_OPT:
1059	    fprintf(output, "? "); break;
1060        case XML_REGEXP_QUANT_MULT:
1061	    fprintf(output, "* "); break;
1062        case XML_REGEXP_QUANT_PLUS:
1063	    fprintf(output, "+ "); break;
1064	case XML_REGEXP_QUANT_RANGE:
1065	    fprintf(output, "range "); break;
1066	case XML_REGEXP_QUANT_ONCEONLY:
1067	    fprintf(output, "onceonly "); break;
1068	case XML_REGEXP_QUANT_ALL:
1069	    fprintf(output, "all "); break;
1070    }
1071}
1072static void
1073xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
1074    fprintf(output, "  range: ");
1075    if (range->neg)
1076	fprintf(output, "negative ");
1077    xmlRegPrintAtomType(output, range->type);
1078    fprintf(output, "%c - %c\n", range->start, range->end);
1079}
1080
1081static void
1082xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
1083    fprintf(output, " atom: ");
1084    if (atom == NULL) {
1085	fprintf(output, "NULL\n");
1086	return;
1087    }
1088    if (atom->neg)
1089        fprintf(output, "not ");
1090    xmlRegPrintAtomType(output, atom->type);
1091    xmlRegPrintQuantType(output, atom->quant);
1092    if (atom->quant == XML_REGEXP_QUANT_RANGE)
1093	fprintf(output, "%d-%d ", atom->min, atom->max);
1094    if (atom->type == XML_REGEXP_STRING)
1095	fprintf(output, "'%s' ", (char *) atom->valuep);
1096    if (atom->type == XML_REGEXP_CHARVAL)
1097	fprintf(output, "char %c\n", atom->codepoint);
1098    else if (atom->type == XML_REGEXP_RANGES) {
1099	int i;
1100	fprintf(output, "%d entries\n", atom->nbRanges);
1101	for (i = 0; i < atom->nbRanges;i++)
1102	    xmlRegPrintRange(output, atom->ranges[i]);
1103    } else if (atom->type == XML_REGEXP_SUBREG) {
1104	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1105    } else {
1106	fprintf(output, "\n");
1107    }
1108}
1109
1110static void
1111xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1112    fprintf(output, "  trans: ");
1113    if (trans == NULL) {
1114	fprintf(output, "NULL\n");
1115	return;
1116    }
1117    if (trans->to < 0) {
1118	fprintf(output, "removed\n");
1119	return;
1120    }
1121    if (trans->nd != 0) {
1122	if (trans->nd == 2)
1123	    fprintf(output, "last not determinist, ");
1124	else
1125	    fprintf(output, "not determinist, ");
1126    }
1127    if (trans->counter >= 0) {
1128	fprintf(output, "counted %d, ", trans->counter);
1129    }
1130    if (trans->count == REGEXP_ALL_COUNTER) {
1131	fprintf(output, "all transition, ");
1132    } else if (trans->count >= 0) {
1133	fprintf(output, "count based %d, ", trans->count);
1134    }
1135    if (trans->atom == NULL) {
1136	fprintf(output, "epsilon to %d\n", trans->to);
1137	return;
1138    }
1139    if (trans->atom->type == XML_REGEXP_CHARVAL)
1140	fprintf(output, "char %c ", trans->atom->codepoint);
1141    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1142}
1143
1144static void
1145xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1146    int i;
1147
1148    fprintf(output, " state: ");
1149    if (state == NULL) {
1150	fprintf(output, "NULL\n");
1151	return;
1152    }
1153    if (state->type == XML_REGEXP_START_STATE)
1154	fprintf(output, "START ");
1155    if (state->type == XML_REGEXP_FINAL_STATE)
1156	fprintf(output, "FINAL ");
1157
1158    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1159    for (i = 0;i < state->nbTrans; i++) {
1160	xmlRegPrintTrans(output, &(state->trans[i]));
1161    }
1162}
1163
1164#ifdef DEBUG_REGEXP_GRAPH
1165static void
1166xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1167    int i;
1168
1169    fprintf(output, " ctxt: ");
1170    if (ctxt == NULL) {
1171	fprintf(output, "NULL\n");
1172	return;
1173    }
1174    fprintf(output, "'%s' ", ctxt->string);
1175    if (ctxt->error)
1176	fprintf(output, "error ");
1177    if (ctxt->neg)
1178	fprintf(output, "neg ");
1179    fprintf(output, "\n");
1180    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1181    for (i = 0;i < ctxt->nbAtoms; i++) {
1182	fprintf(output, " %02d ", i);
1183	xmlRegPrintAtom(output, ctxt->atoms[i]);
1184    }
1185    if (ctxt->atom != NULL) {
1186	fprintf(output, "current atom:\n");
1187	xmlRegPrintAtom(output, ctxt->atom);
1188    }
1189    fprintf(output, "%d states:", ctxt->nbStates);
1190    if (ctxt->start != NULL)
1191	fprintf(output, " start: %d", ctxt->start->no);
1192    if (ctxt->end != NULL)
1193	fprintf(output, " end: %d", ctxt->end->no);
1194    fprintf(output, "\n");
1195    for (i = 0;i < ctxt->nbStates; i++) {
1196	xmlRegPrintState(output, ctxt->states[i]);
1197    }
1198    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1199    for (i = 0;i < ctxt->nbCounters; i++) {
1200	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1201		                                ctxt->counters[i].max);
1202    }
1203}
1204#endif
1205
1206/************************************************************************
1207 * 									*
1208 *		 Finite Automata structures manipulations		*
1209 * 									*
1210 ************************************************************************/
1211
1212static void
1213xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1214	           int neg, xmlRegAtomType type, int start, int end,
1215		   xmlChar *blockName) {
1216    xmlRegRangePtr range;
1217
1218    if (atom == NULL) {
1219	ERROR("add range: atom is NULL");
1220	return;
1221    }
1222    if (atom->type != XML_REGEXP_RANGES) {
1223	ERROR("add range: atom is not ranges");
1224	return;
1225    }
1226    if (atom->maxRanges == 0) {
1227	atom->maxRanges = 4;
1228	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1229		                             sizeof(xmlRegRangePtr));
1230	if (atom->ranges == NULL) {
1231	    xmlRegexpErrMemory(ctxt, "adding ranges");
1232	    atom->maxRanges = 0;
1233	    return;
1234	}
1235    } else if (atom->nbRanges >= atom->maxRanges) {
1236	xmlRegRangePtr *tmp;
1237	atom->maxRanges *= 2;
1238	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1239		                             sizeof(xmlRegRangePtr));
1240	if (tmp == NULL) {
1241	    xmlRegexpErrMemory(ctxt, "adding ranges");
1242	    atom->maxRanges /= 2;
1243	    return;
1244	}
1245	atom->ranges = tmp;
1246    }
1247    range = xmlRegNewRange(ctxt, neg, type, start, end);
1248    if (range == NULL)
1249	return;
1250    range->blockName = blockName;
1251    atom->ranges[atom->nbRanges++] = range;
1252
1253}
1254
1255static int
1256xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1257    if (ctxt->maxCounters == 0) {
1258	ctxt->maxCounters = 4;
1259	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1260		                             sizeof(xmlRegCounter));
1261	if (ctxt->counters == NULL) {
1262	    xmlRegexpErrMemory(ctxt, "allocating counter");
1263	    ctxt->maxCounters = 0;
1264	    return(-1);
1265	}
1266    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1267	xmlRegCounter *tmp;
1268	ctxt->maxCounters *= 2;
1269	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1270		                           sizeof(xmlRegCounter));
1271	if (tmp == NULL) {
1272	    xmlRegexpErrMemory(ctxt, "allocating counter");
1273	    ctxt->maxCounters /= 2;
1274	    return(-1);
1275	}
1276	ctxt->counters = tmp;
1277    }
1278    ctxt->counters[ctxt->nbCounters].min = -1;
1279    ctxt->counters[ctxt->nbCounters].max = -1;
1280    return(ctxt->nbCounters++);
1281}
1282
1283static int
1284xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1285    if (atom == NULL) {
1286	ERROR("atom push: atom is NULL");
1287	return(-1);
1288    }
1289    if (ctxt->maxAtoms == 0) {
1290	ctxt->maxAtoms = 4;
1291	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1292		                             sizeof(xmlRegAtomPtr));
1293	if (ctxt->atoms == NULL) {
1294	    xmlRegexpErrMemory(ctxt, "pushing atom");
1295	    ctxt->maxAtoms = 0;
1296	    return(-1);
1297	}
1298    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1299	xmlRegAtomPtr *tmp;
1300	ctxt->maxAtoms *= 2;
1301	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1302		                             sizeof(xmlRegAtomPtr));
1303	if (tmp == NULL) {
1304	    xmlRegexpErrMemory(ctxt, "allocating counter");
1305	    ctxt->maxAtoms /= 2;
1306	    return(-1);
1307	}
1308	ctxt->atoms = tmp;
1309    }
1310    atom->no = ctxt->nbAtoms;
1311    ctxt->atoms[ctxt->nbAtoms++] = atom;
1312    return(0);
1313}
1314
1315static void
1316xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1317                      int from) {
1318    if (target->maxTransTo == 0) {
1319	target->maxTransTo = 8;
1320	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1321		                             sizeof(int));
1322	if (target->transTo == NULL) {
1323	    xmlRegexpErrMemory(ctxt, "adding transition");
1324	    target->maxTransTo = 0;
1325	    return;
1326	}
1327    } else if (target->nbTransTo >= target->maxTransTo) {
1328	int *tmp;
1329	target->maxTransTo *= 2;
1330	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1331		                             sizeof(int));
1332	if (tmp == NULL) {
1333	    xmlRegexpErrMemory(ctxt, "adding transition");
1334	    target->maxTransTo /= 2;
1335	    return;
1336	}
1337	target->transTo = tmp;
1338    }
1339    target->transTo[target->nbTransTo] = from;
1340    target->nbTransTo++;
1341}
1342
1343static void
1344xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1345	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1346		    int counter, int count) {
1347
1348    int nrtrans;
1349
1350    if (state == NULL) {
1351	ERROR("add state: state is NULL");
1352	return;
1353    }
1354    if (target == NULL) {
1355	ERROR("add state: target is NULL");
1356	return;
1357    }
1358    /*
1359     * Other routines follow the philosophy 'When in doubt, add a transition'
1360     * so we check here whether such a transition is already present and, if
1361     * so, silently ignore this request.
1362     */
1363
1364    for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1365	xmlRegTransPtr trans = &(state->trans[nrtrans]);
1366	if ((trans->atom == atom) &&
1367	    (trans->to == target->no) &&
1368	    (trans->counter == counter) &&
1369	    (trans->count == count)) {
1370#ifdef DEBUG_REGEXP_GRAPH
1371	    printf("Ignoring duplicate transition from %d to %d\n",
1372		    state->no, target->no);
1373#endif
1374	    return;
1375	}
1376    }
1377
1378    if (state->maxTrans == 0) {
1379	state->maxTrans = 8;
1380	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1381		                             sizeof(xmlRegTrans));
1382	if (state->trans == NULL) {
1383	    xmlRegexpErrMemory(ctxt, "adding transition");
1384	    state->maxTrans = 0;
1385	    return;
1386	}
1387    } else if (state->nbTrans >= state->maxTrans) {
1388	xmlRegTrans *tmp;
1389	state->maxTrans *= 2;
1390	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1391		                             sizeof(xmlRegTrans));
1392	if (tmp == NULL) {
1393	    xmlRegexpErrMemory(ctxt, "adding transition");
1394	    state->maxTrans /= 2;
1395	    return;
1396	}
1397	state->trans = tmp;
1398    }
1399#ifdef DEBUG_REGEXP_GRAPH
1400    printf("Add trans from %d to %d ", state->no, target->no);
1401    if (count == REGEXP_ALL_COUNTER)
1402	printf("all transition\n");
1403    else if (count >= 0)
1404	printf("count based %d\n", count);
1405    else if (counter >= 0)
1406	printf("counted %d\n", counter);
1407    else if (atom == NULL)
1408	printf("epsilon transition\n");
1409    else if (atom != NULL)
1410        xmlRegPrintAtom(stdout, atom);
1411#endif
1412
1413    state->trans[state->nbTrans].atom = atom;
1414    state->trans[state->nbTrans].to = target->no;
1415    state->trans[state->nbTrans].counter = counter;
1416    state->trans[state->nbTrans].count = count;
1417    state->trans[state->nbTrans].nd = 0;
1418    state->nbTrans++;
1419    xmlRegStateAddTransTo(ctxt, target, state->no);
1420}
1421
1422static int
1423xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1424    if (state == NULL) return(-1);
1425    if (ctxt->maxStates == 0) {
1426	ctxt->maxStates = 4;
1427	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1428		                             sizeof(xmlRegStatePtr));
1429	if (ctxt->states == NULL) {
1430	    xmlRegexpErrMemory(ctxt, "adding state");
1431	    ctxt->maxStates = 0;
1432	    return(-1);
1433	}
1434    } else if (ctxt->nbStates >= ctxt->maxStates) {
1435	xmlRegStatePtr *tmp;
1436	ctxt->maxStates *= 2;
1437	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1438		                             sizeof(xmlRegStatePtr));
1439	if (tmp == NULL) {
1440	    xmlRegexpErrMemory(ctxt, "adding state");
1441	    ctxt->maxStates /= 2;
1442	    return(-1);
1443	}
1444	ctxt->states = tmp;
1445    }
1446    state->no = ctxt->nbStates;
1447    ctxt->states[ctxt->nbStates++] = state;
1448    return(0);
1449}
1450
1451/**
1452 * xmlFAGenerateAllTransition:
1453 * @ctxt:  a regexp parser context
1454 * @from:  the from state
1455 * @to:  the target state or NULL for building a new one
1456 * @lax:
1457 *
1458 */
1459static void
1460xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1461			   xmlRegStatePtr from, xmlRegStatePtr to,
1462			   int lax) {
1463    if (to == NULL) {
1464	to = xmlRegNewState(ctxt);
1465	xmlRegStatePush(ctxt, to);
1466	ctxt->state = to;
1467    }
1468    if (lax)
1469	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1470    else
1471	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1472}
1473
1474/**
1475 * xmlFAGenerateEpsilonTransition:
1476 * @ctxt:  a regexp parser context
1477 * @from:  the from state
1478 * @to:  the target state or NULL for building a new one
1479 *
1480 */
1481static void
1482xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1483			       xmlRegStatePtr from, xmlRegStatePtr to) {
1484    if (to == NULL) {
1485	to = xmlRegNewState(ctxt);
1486	xmlRegStatePush(ctxt, to);
1487	ctxt->state = to;
1488    }
1489    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1490}
1491
1492/**
1493 * xmlFAGenerateCountedEpsilonTransition:
1494 * @ctxt:  a regexp parser context
1495 * @from:  the from state
1496 * @to:  the target state or NULL for building a new one
1497 * counter:  the counter for that transition
1498 *
1499 */
1500static void
1501xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1502	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1503    if (to == NULL) {
1504	to = xmlRegNewState(ctxt);
1505	xmlRegStatePush(ctxt, to);
1506	ctxt->state = to;
1507    }
1508    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1509}
1510
1511/**
1512 * xmlFAGenerateCountedTransition:
1513 * @ctxt:  a regexp parser context
1514 * @from:  the from state
1515 * @to:  the target state or NULL for building a new one
1516 * counter:  the counter for that transition
1517 *
1518 */
1519static void
1520xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1521	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1522    if (to == NULL) {
1523	to = xmlRegNewState(ctxt);
1524	xmlRegStatePush(ctxt, to);
1525	ctxt->state = to;
1526    }
1527    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1528}
1529
1530/**
1531 * xmlFAGenerateTransitions:
1532 * @ctxt:  a regexp parser context
1533 * @from:  the from state
1534 * @to:  the target state or NULL for building a new one
1535 * @atom:  the atom generating the transition
1536 *
1537 * Returns 0 if success and -1 in case of error.
1538 */
1539static int
1540xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1541	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1542    xmlRegStatePtr end;
1543
1544    if (atom == NULL) {
1545	ERROR("genrate transition: atom == NULL");
1546	return(-1);
1547    }
1548    if (atom->type == XML_REGEXP_SUBREG) {
1549	/*
1550	 * this is a subexpression handling one should not need to
1551	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1552	 */
1553	if (xmlRegAtomPush(ctxt, atom) < 0) {
1554	    return(-1);
1555	}
1556	if ((to != NULL) && (atom->stop != to) &&
1557	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1558	    /*
1559	     * Generate an epsilon transition to link to the target
1560	     */
1561	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1562#ifdef DV
1563	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1564		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1565	    to = xmlRegNewState(ctxt);
1566	    xmlRegStatePush(ctxt, to);
1567	    ctxt->state = to;
1568	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1569#endif
1570	}
1571	switch (atom->quant) {
1572	    case XML_REGEXP_QUANT_OPT:
1573		atom->quant = XML_REGEXP_QUANT_ONCE;
1574		/*
1575		 * transition done to the state after end of atom.
1576		 *      1. set transition from atom start to new state
1577		 *      2. set transition from atom end to this state.
1578		 */
1579                if (to == NULL) {
1580                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
1581                    xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
1582                                                   ctxt->state);
1583                } else {
1584                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
1585                }
1586		break;
1587	    case XML_REGEXP_QUANT_MULT:
1588		atom->quant = XML_REGEXP_QUANT_ONCE;
1589		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1590		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1591		break;
1592	    case XML_REGEXP_QUANT_PLUS:
1593		atom->quant = XML_REGEXP_QUANT_ONCE;
1594		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1595		break;
1596	    case XML_REGEXP_QUANT_RANGE: {
1597		int counter;
1598		xmlRegStatePtr inter, newstate;
1599
1600		/*
1601		 * create the final state now if needed
1602		 */
1603		if (to != NULL) {
1604		    newstate = to;
1605		} else {
1606		    newstate = xmlRegNewState(ctxt);
1607		    xmlRegStatePush(ctxt, newstate);
1608		}
1609
1610		/*
1611		 * The principle here is to use counted transition
1612		 * to avoid explosion in the number of states in the
1613		 * graph. This is clearly more complex but should not
1614		 * be exploitable at runtime.
1615		 */
1616		if ((atom->min == 0) && (atom->start0 == NULL)) {
1617		    xmlRegAtomPtr copy;
1618		    /*
1619		     * duplicate a transition based on atom to count next
1620		     * occurences after 1. We cannot loop to atom->start
1621		     * directly because we need an epsilon transition to
1622		     * newstate.
1623		     */
1624		     /* ???? For some reason it seems we never reach that
1625		        case, I suppose this got optimized out before when
1626			building the automata */
1627		    copy = xmlRegCopyAtom(ctxt, atom);
1628		    if (copy == NULL)
1629		        return(-1);
1630		    copy->quant = XML_REGEXP_QUANT_ONCE;
1631		    copy->min = 0;
1632		    copy->max = 0;
1633
1634		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
1635		        < 0)
1636			return(-1);
1637		    inter = ctxt->state;
1638		    counter = xmlRegGetCounter(ctxt);
1639		    ctxt->counters[counter].min = atom->min - 1;
1640		    ctxt->counters[counter].max = atom->max - 1;
1641		    /* count the number of times we see it again */
1642		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
1643						   atom->stop, counter);
1644		    /* allow a way out based on the count */
1645		    xmlFAGenerateCountedTransition(ctxt, inter,
1646			                           newstate, counter);
1647		    /* and also allow a direct exit for 0 */
1648		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1649		                                   newstate);
1650		} else {
1651		    /*
1652		     * either we need the atom at least once or there
1653		     * is an atom->start0 allowing to easilly plug the
1654		     * epsilon transition.
1655		     */
1656		    counter = xmlRegGetCounter(ctxt);
1657		    ctxt->counters[counter].min = atom->min - 1;
1658		    ctxt->counters[counter].max = atom->max - 1;
1659		    /* count the number of times we see it again */
1660		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1661						   atom->start, counter);
1662		    /* allow a way out based on the count */
1663		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
1664			                           newstate, counter);
1665		    /* and if needed allow a direct exit for 0 */
1666		    if (atom->min == 0)
1667			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
1668						       newstate);
1669
1670		}
1671		atom->min = 0;
1672		atom->max = 0;
1673		atom->quant = XML_REGEXP_QUANT_ONCE;
1674		ctxt->state = newstate;
1675	    }
1676	    default:
1677		break;
1678	}
1679	return(0);
1680    }
1681    if ((atom->min == 0) && (atom->max == 0) &&
1682               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1683        /*
1684	 * we can discard the atom and generate an epsilon transition instead
1685	 */
1686	if (to == NULL) {
1687	    to = xmlRegNewState(ctxt);
1688	    if (to != NULL)
1689		xmlRegStatePush(ctxt, to);
1690	    else {
1691		return(-1);
1692	    }
1693	}
1694	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1695	ctxt->state = to;
1696	xmlRegFreeAtom(atom);
1697	return(0);
1698    }
1699    if (to == NULL) {
1700	to = xmlRegNewState(ctxt);
1701	if (to != NULL)
1702	    xmlRegStatePush(ctxt, to);
1703	else {
1704	    return(-1);
1705	}
1706    }
1707    end = to;
1708    if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
1709        (atom->quant == XML_REGEXP_QUANT_PLUS)) {
1710	/*
1711	 * Do not pollute the target state by adding transitions from
1712	 * it as it is likely to be the shared target of multiple branches.
1713	 * So isolate with an epsilon transition.
1714	 */
1715        xmlRegStatePtr tmp;
1716
1717	tmp = xmlRegNewState(ctxt);
1718	if (tmp != NULL)
1719	    xmlRegStatePush(ctxt, tmp);
1720	else {
1721	    return(-1);
1722	}
1723	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
1724	to = tmp;
1725    }
1726    if (xmlRegAtomPush(ctxt, atom) < 0) {
1727	return(-1);
1728    }
1729    xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1730    ctxt->state = end;
1731    switch (atom->quant) {
1732	case XML_REGEXP_QUANT_OPT:
1733	    atom->quant = XML_REGEXP_QUANT_ONCE;
1734	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1735	    break;
1736	case XML_REGEXP_QUANT_MULT:
1737	    atom->quant = XML_REGEXP_QUANT_ONCE;
1738	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1739	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1740	    break;
1741	case XML_REGEXP_QUANT_PLUS:
1742	    atom->quant = XML_REGEXP_QUANT_ONCE;
1743	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1744	    break;
1745	case XML_REGEXP_QUANT_RANGE:
1746#if DV_test
1747	    if (atom->min == 0) {
1748		xmlFAGenerateEpsilonTransition(ctxt, from, to);
1749	    }
1750#endif
1751	    break;
1752	default:
1753	    break;
1754    }
1755    return(0);
1756}
1757
1758/**
1759 * xmlFAReduceEpsilonTransitions:
1760 * @ctxt:  a regexp parser context
1761 * @fromnr:  the from state
1762 * @tonr:  the to state
1763 * @counter:  should that transition be associated to a counted
1764 *
1765 */
1766static void
1767xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1768	                      int tonr, int counter) {
1769    int transnr;
1770    xmlRegStatePtr from;
1771    xmlRegStatePtr to;
1772
1773#ifdef DEBUG_REGEXP_GRAPH
1774    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1775#endif
1776    from = ctxt->states[fromnr];
1777    if (from == NULL)
1778	return;
1779    to = ctxt->states[tonr];
1780    if (to == NULL)
1781	return;
1782    if ((to->mark == XML_REGEXP_MARK_START) ||
1783	(to->mark == XML_REGEXP_MARK_VISITED))
1784	return;
1785
1786    to->mark = XML_REGEXP_MARK_VISITED;
1787    if (to->type == XML_REGEXP_FINAL_STATE) {
1788#ifdef DEBUG_REGEXP_GRAPH
1789	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1790#endif
1791	from->type = XML_REGEXP_FINAL_STATE;
1792    }
1793    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1794        if (to->trans[transnr].to < 0)
1795	    continue;
1796	if (to->trans[transnr].atom == NULL) {
1797	    /*
1798	     * Don't remove counted transitions
1799	     * Don't loop either
1800	     */
1801	    if (to->trans[transnr].to != fromnr) {
1802		if (to->trans[transnr].count >= 0) {
1803		    int newto = to->trans[transnr].to;
1804
1805		    xmlRegStateAddTrans(ctxt, from, NULL,
1806					ctxt->states[newto],
1807					-1, to->trans[transnr].count);
1808		} else {
1809#ifdef DEBUG_REGEXP_GRAPH
1810		    printf("Found epsilon trans %d from %d to %d\n",
1811			   transnr, tonr, to->trans[transnr].to);
1812#endif
1813		    if (to->trans[transnr].counter >= 0) {
1814			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1815					      to->trans[transnr].to,
1816					      to->trans[transnr].counter);
1817		    } else {
1818			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1819					      to->trans[transnr].to,
1820					      counter);
1821		    }
1822		}
1823	    }
1824	} else {
1825	    int newto = to->trans[transnr].to;
1826
1827	    if (to->trans[transnr].counter >= 0) {
1828		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1829				    ctxt->states[newto],
1830				    to->trans[transnr].counter, -1);
1831	    } else {
1832		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1833				    ctxt->states[newto], counter, -1);
1834	    }
1835	}
1836    }
1837    to->mark = XML_REGEXP_MARK_NORMAL;
1838}
1839
1840/**
1841 * xmlFAEliminateSimpleEpsilonTransitions:
1842 * @ctxt:  a regexp parser context
1843 *
1844 * Eliminating general epsilon transitions can get costly in the general
1845 * algorithm due to the large amount of generated new transitions and
1846 * associated comparisons. However for simple epsilon transition used just
1847 * to separate building blocks when generating the automata this can be
1848 * reduced to state elimination:
1849 *    - if there exists an epsilon from X to Y
1850 *    - if there is no other transition from X
1851 * then X and Y are semantically equivalent and X can be eliminated
1852 * If X is the start state then make Y the start state, else replace the
1853 * target of all transitions to X by transitions to Y.
1854 */
1855static void
1856xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1857    int statenr, i, j, newto;
1858    xmlRegStatePtr state, tmp;
1859
1860    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1861	state = ctxt->states[statenr];
1862	if (state == NULL)
1863	    continue;
1864	if (state->nbTrans != 1)
1865	    continue;
1866	if (state->type == XML_REGEXP_UNREACH_STATE)
1867	    continue;
1868	/* is the only transition out a basic transition */
1869	if ((state->trans[0].atom == NULL) &&
1870	    (state->trans[0].to >= 0) &&
1871	    (state->trans[0].to != statenr) &&
1872	    (state->trans[0].counter < 0) &&
1873	    (state->trans[0].count < 0)) {
1874	    newto = state->trans[0].to;
1875
1876            if (state->type == XML_REGEXP_START_STATE) {
1877#ifdef DEBUG_REGEXP_GRAPH
1878		printf("Found simple epsilon trans from start %d to %d\n",
1879		       statenr, newto);
1880#endif
1881            } else {
1882#ifdef DEBUG_REGEXP_GRAPH
1883		printf("Found simple epsilon trans from %d to %d\n",
1884		       statenr, newto);
1885#endif
1886	        for (i = 0;i < state->nbTransTo;i++) {
1887		    tmp = ctxt->states[state->transTo[i]];
1888		    for (j = 0;j < tmp->nbTrans;j++) {
1889			if (tmp->trans[j].to == statenr) {
1890#ifdef DEBUG_REGEXP_GRAPH
1891			    printf("Changed transition %d on %d to go to %d\n",
1892				   j, tmp->no, newto);
1893#endif
1894			    tmp->trans[j].to = -1;
1895			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
1896			    			ctxt->states[newto],
1897					        tmp->trans[j].counter,
1898						tmp->trans[j].count);
1899			}
1900		    }
1901		}
1902		if (state->type == XML_REGEXP_FINAL_STATE)
1903		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1904		/* eliminate the transition completely */
1905		state->nbTrans = 0;
1906
1907                state->type = XML_REGEXP_UNREACH_STATE;
1908
1909	    }
1910
1911	}
1912    }
1913}
1914/**
1915 * xmlFAEliminateEpsilonTransitions:
1916 * @ctxt:  a regexp parser context
1917 *
1918 */
1919static void
1920xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1921    int statenr, transnr;
1922    xmlRegStatePtr state;
1923    int has_epsilon;
1924
1925    if (ctxt->states == NULL) return;
1926
1927    /*
1928     * Eliminate simple epsilon transition and the associated unreachable
1929     * states.
1930     */
1931    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1932    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1933	state = ctxt->states[statenr];
1934	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
1935#ifdef DEBUG_REGEXP_GRAPH
1936	    printf("Removed unreachable state %d\n", statenr);
1937#endif
1938	    xmlRegFreeState(state);
1939	    ctxt->states[statenr] = NULL;
1940	}
1941    }
1942
1943    has_epsilon = 0;
1944
1945    /*
1946     * Build the completed transitions bypassing the epsilons
1947     * Use a marking algorithm to avoid loops
1948     * Mark sink states too.
1949     * Process from the latests states backward to the start when
1950     * there is long cascading epsilon chains this minimize the
1951     * recursions and transition compares when adding the new ones
1952     */
1953    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
1954	state = ctxt->states[statenr];
1955	if (state == NULL)
1956	    continue;
1957	if ((state->nbTrans == 0) &&
1958	    (state->type != XML_REGEXP_FINAL_STATE)) {
1959	    state->type = XML_REGEXP_SINK_STATE;
1960	}
1961	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1962	    if ((state->trans[transnr].atom == NULL) &&
1963		(state->trans[transnr].to >= 0)) {
1964		if (state->trans[transnr].to == statenr) {
1965		    state->trans[transnr].to = -1;
1966#ifdef DEBUG_REGEXP_GRAPH
1967		    printf("Removed loopback epsilon trans %d on %d\n",
1968			   transnr, statenr);
1969#endif
1970		} else if (state->trans[transnr].count < 0) {
1971		    int newto = state->trans[transnr].to;
1972
1973#ifdef DEBUG_REGEXP_GRAPH
1974		    printf("Found epsilon trans %d from %d to %d\n",
1975			   transnr, statenr, newto);
1976#endif
1977		    has_epsilon = 1;
1978		    state->trans[transnr].to = -2;
1979		    state->mark = XML_REGEXP_MARK_START;
1980		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1981				      newto, state->trans[transnr].counter);
1982		    state->mark = XML_REGEXP_MARK_NORMAL;
1983#ifdef DEBUG_REGEXP_GRAPH
1984		} else {
1985		    printf("Found counted transition %d on %d\n",
1986			   transnr, statenr);
1987#endif
1988	        }
1989	    }
1990	}
1991    }
1992    /*
1993     * Eliminate the epsilon transitions
1994     */
1995    if (has_epsilon) {
1996	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1997	    state = ctxt->states[statenr];
1998	    if (state == NULL)
1999		continue;
2000	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
2001		xmlRegTransPtr trans = &(state->trans[transnr]);
2002		if ((trans->atom == NULL) &&
2003		    (trans->count < 0) &&
2004		    (trans->to >= 0)) {
2005		    trans->to = -1;
2006		}
2007	    }
2008	}
2009    }
2010
2011    /*
2012     * Use this pass to detect unreachable states too
2013     */
2014    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2015	state = ctxt->states[statenr];
2016	if (state != NULL)
2017	    state->reached = XML_REGEXP_MARK_NORMAL;
2018    }
2019    state = ctxt->states[0];
2020    if (state != NULL)
2021	state->reached = XML_REGEXP_MARK_START;
2022    while (state != NULL) {
2023	xmlRegStatePtr target = NULL;
2024	state->reached = XML_REGEXP_MARK_VISITED;
2025	/*
2026	 * Mark all states reachable from the current reachable state
2027	 */
2028	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2029	    if ((state->trans[transnr].to >= 0) &&
2030		((state->trans[transnr].atom != NULL) ||
2031		 (state->trans[transnr].count >= 0))) {
2032		int newto = state->trans[transnr].to;
2033
2034		if (ctxt->states[newto] == NULL)
2035		    continue;
2036		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
2037		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
2038		    target = ctxt->states[newto];
2039		}
2040	    }
2041	}
2042
2043	/*
2044	 * find the next accessible state not explored
2045	 */
2046	if (target == NULL) {
2047	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
2048		state = ctxt->states[statenr];
2049		if ((state != NULL) && (state->reached ==
2050			XML_REGEXP_MARK_START)) {
2051		    target = state;
2052		    break;
2053		}
2054	    }
2055	}
2056	state = target;
2057    }
2058    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2059	state = ctxt->states[statenr];
2060	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
2061#ifdef DEBUG_REGEXP_GRAPH
2062	    printf("Removed unreachable state %d\n", statenr);
2063#endif
2064	    xmlRegFreeState(state);
2065	    ctxt->states[statenr] = NULL;
2066	}
2067    }
2068
2069}
2070
2071static int
2072xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
2073    int ret = 0;
2074
2075    if ((range1->type == XML_REGEXP_RANGES) ||
2076        (range2->type == XML_REGEXP_RANGES) ||
2077        (range2->type == XML_REGEXP_SUBREG) ||
2078        (range1->type == XML_REGEXP_SUBREG) ||
2079        (range1->type == XML_REGEXP_STRING) ||
2080        (range2->type == XML_REGEXP_STRING))
2081	return(-1);
2082
2083    /* put them in order */
2084    if (range1->type > range2->type) {
2085        xmlRegRangePtr tmp;
2086
2087	tmp = range1;
2088	range1 = range2;
2089	range2 = tmp;
2090    }
2091    if ((range1->type == XML_REGEXP_ANYCHAR) ||
2092        (range2->type == XML_REGEXP_ANYCHAR)) {
2093	ret = 1;
2094    } else if ((range1->type == XML_REGEXP_EPSILON) ||
2095               (range2->type == XML_REGEXP_EPSILON)) {
2096	return(0);
2097    } else if (range1->type == range2->type) {
2098        if (range1->type != XML_REGEXP_CHARVAL)
2099            ret = 1;
2100        else if ((range1->end < range2->start) ||
2101	         (range2->end < range1->start))
2102	    ret = 0;
2103	else
2104	    ret = 1;
2105    } else if (range1->type == XML_REGEXP_CHARVAL) {
2106        int codepoint;
2107	int neg = 0;
2108
2109	/*
2110	 * just check all codepoints in the range for acceptance,
2111	 * this is usually way cheaper since done only once at
2112	 * compilation than testing over and over at runtime or
2113	 * pushing too many states when evaluating.
2114	 */
2115	if (((range1->neg == 0) && (range2->neg != 0)) ||
2116	    ((range1->neg != 0) && (range2->neg == 0)))
2117	    neg = 1;
2118
2119	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
2120	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
2121					    0, range2->start, range2->end,
2122					    range2->blockName);
2123	    if (ret < 0)
2124	        return(-1);
2125	    if (((neg == 1) && (ret == 0)) ||
2126	        ((neg == 0) && (ret == 1)))
2127		return(1);
2128	}
2129	return(0);
2130    } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
2131               (range2->type == XML_REGEXP_BLOCK_NAME)) {
2132	if (range1->type == range2->type) {
2133	    ret = xmlStrEqual(range1->blockName, range2->blockName);
2134	} else {
2135	    /*
2136	     * comparing a block range with anything else is way
2137	     * too costly, and maintining the table is like too much
2138	     * memory too, so let's force the automata to save state
2139	     * here.
2140	     */
2141	    return(1);
2142	}
2143    } else if ((range1->type < XML_REGEXP_LETTER) ||
2144               (range2->type < XML_REGEXP_LETTER)) {
2145	if ((range1->type == XML_REGEXP_ANYSPACE) &&
2146	    (range2->type == XML_REGEXP_NOTSPACE))
2147	    ret = 0;
2148	else if ((range1->type == XML_REGEXP_INITNAME) &&
2149	         (range2->type == XML_REGEXP_NOTINITNAME))
2150	    ret = 0;
2151	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
2152	         (range2->type == XML_REGEXP_NOTNAMECHAR))
2153	    ret = 0;
2154	else if ((range1->type == XML_REGEXP_DECIMAL) &&
2155	         (range2->type == XML_REGEXP_NOTDECIMAL))
2156	    ret = 0;
2157	else if ((range1->type == XML_REGEXP_REALCHAR) &&
2158	         (range2->type == XML_REGEXP_NOTREALCHAR))
2159	    ret = 0;
2160	else {
2161	    /* same thing to limit complexity */
2162	    return(1);
2163	}
2164    } else {
2165        ret = 0;
2166        /* range1->type < range2->type here */
2167        switch (range1->type) {
2168	    case XML_REGEXP_LETTER:
2169	         /* all disjoint except in the subgroups */
2170	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
2171		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
2172		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
2173		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
2174		     (range2->type == XML_REGEXP_LETTER_OTHERS))
2175		     ret = 1;
2176		 break;
2177	    case XML_REGEXP_MARK:
2178	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2179		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2180		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
2181		     ret = 1;
2182		 break;
2183	    case XML_REGEXP_NUMBER:
2184	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2185		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2186		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
2187		     ret = 1;
2188		 break;
2189	    case XML_REGEXP_PUNCT:
2190	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2191		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
2192		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2193		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2194		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2195		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2196		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
2197		     ret = 1;
2198		 break;
2199	    case XML_REGEXP_SEPAR:
2200	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2201		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
2202		     (range2->type == XML_REGEXP_SEPAR_PARA))
2203		     ret = 1;
2204		 break;
2205	    case XML_REGEXP_SYMBOL:
2206	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2207		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2208		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2209		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2210		     ret = 1;
2211		 break;
2212	    case XML_REGEXP_OTHER:
2213	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2214		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2215		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
2216		     ret = 1;
2217		 break;
2218            default:
2219	         if ((range2->type >= XML_REGEXP_LETTER) &&
2220		     (range2->type < XML_REGEXP_BLOCK_NAME))
2221		     ret = 0;
2222		 else {
2223		     /* safety net ! */
2224		     return(1);
2225		 }
2226	}
2227    }
2228    if (((range1->neg == 0) && (range2->neg != 0)) ||
2229        ((range1->neg != 0) && (range2->neg == 0)))
2230	ret = !ret;
2231    return(ret);
2232}
2233
2234/**
2235 * xmlFACompareAtomTypes:
2236 * @type1:  an atom type
2237 * @type2:  an atom type
2238 *
2239 * Compares two atoms type to check whether they intersect in some ways,
2240 * this is used by xmlFACompareAtoms only
2241 *
2242 * Returns 1 if they may intersect and 0 otherwise
2243 */
2244static int
2245xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
2246    if ((type1 == XML_REGEXP_EPSILON) ||
2247        (type1 == XML_REGEXP_CHARVAL) ||
2248	(type1 == XML_REGEXP_RANGES) ||
2249	(type1 == XML_REGEXP_SUBREG) ||
2250	(type1 == XML_REGEXP_STRING) ||
2251	(type1 == XML_REGEXP_ANYCHAR))
2252	return(1);
2253    if ((type2 == XML_REGEXP_EPSILON) ||
2254        (type2 == XML_REGEXP_CHARVAL) ||
2255	(type2 == XML_REGEXP_RANGES) ||
2256	(type2 == XML_REGEXP_SUBREG) ||
2257	(type2 == XML_REGEXP_STRING) ||
2258	(type2 == XML_REGEXP_ANYCHAR))
2259	return(1);
2260
2261    if (type1 == type2) return(1);
2262
2263    /* simplify subsequent compares by making sure type1 < type2 */
2264    if (type1 > type2) {
2265        xmlRegAtomType tmp = type1;
2266	type1 = type2;
2267	type2 = tmp;
2268    }
2269    switch (type1) {
2270        case XML_REGEXP_ANYSPACE: /* \s */
2271	    /* can't be a letter, number, mark, pontuation, symbol */
2272	    if ((type2 == XML_REGEXP_NOTSPACE) ||
2273		((type2 >= XML_REGEXP_LETTER) &&
2274		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2275	        ((type2 >= XML_REGEXP_NUMBER) &&
2276		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2277	        ((type2 >= XML_REGEXP_MARK) &&
2278		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2279	        ((type2 >= XML_REGEXP_PUNCT) &&
2280		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2281	        ((type2 >= XML_REGEXP_SYMBOL) &&
2282		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
2283	        ) return(0);
2284	    break;
2285        case XML_REGEXP_NOTSPACE: /* \S */
2286	    break;
2287        case XML_REGEXP_INITNAME: /* \l */
2288	    /* can't be a number, mark, separator, pontuation, symbol or other */
2289	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
2290	        ((type2 >= XML_REGEXP_NUMBER) &&
2291		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
2292	        ((type2 >= XML_REGEXP_MARK) &&
2293		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2294	        ((type2 >= XML_REGEXP_SEPAR) &&
2295		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2296	        ((type2 >= XML_REGEXP_PUNCT) &&
2297		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2298	        ((type2 >= XML_REGEXP_SYMBOL) &&
2299		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2300	        ((type2 >= XML_REGEXP_OTHER) &&
2301		 (type2 <= XML_REGEXP_OTHER_NA))
2302		) return(0);
2303	    break;
2304        case XML_REGEXP_NOTINITNAME: /* \L */
2305	    break;
2306        case XML_REGEXP_NAMECHAR: /* \c */
2307	    /* can't be a mark, separator, pontuation, symbol or other */
2308	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
2309	        ((type2 >= XML_REGEXP_MARK) &&
2310		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2311	        ((type2 >= XML_REGEXP_PUNCT) &&
2312		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2313	        ((type2 >= XML_REGEXP_SEPAR) &&
2314		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2315	        ((type2 >= XML_REGEXP_SYMBOL) &&
2316		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2317	        ((type2 >= XML_REGEXP_OTHER) &&
2318		 (type2 <= XML_REGEXP_OTHER_NA))
2319		) return(0);
2320	    break;
2321        case XML_REGEXP_NOTNAMECHAR: /* \C */
2322	    break;
2323        case XML_REGEXP_DECIMAL: /* \d */
2324	    /* can't be a letter, mark, separator, pontuation, symbol or other */
2325	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2326	        (type2 == XML_REGEXP_REALCHAR) ||
2327		((type2 >= XML_REGEXP_LETTER) &&
2328		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
2329	        ((type2 >= XML_REGEXP_MARK) &&
2330		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2331	        ((type2 >= XML_REGEXP_PUNCT) &&
2332		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2333	        ((type2 >= XML_REGEXP_SEPAR) &&
2334		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2335	        ((type2 >= XML_REGEXP_SYMBOL) &&
2336		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2337	        ((type2 >= XML_REGEXP_OTHER) &&
2338		 (type2 <= XML_REGEXP_OTHER_NA))
2339		)return(0);
2340	    break;
2341        case XML_REGEXP_NOTDECIMAL: /* \D */
2342	    break;
2343        case XML_REGEXP_REALCHAR: /* \w */
2344	    /* can't be a mark, separator, pontuation, symbol or other */
2345	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
2346	        ((type2 >= XML_REGEXP_MARK) &&
2347		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
2348	        ((type2 >= XML_REGEXP_PUNCT) &&
2349		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
2350	        ((type2 >= XML_REGEXP_SEPAR) &&
2351		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
2352	        ((type2 >= XML_REGEXP_SYMBOL) &&
2353		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
2354	        ((type2 >= XML_REGEXP_OTHER) &&
2355		 (type2 <= XML_REGEXP_OTHER_NA))
2356		)return(0);
2357	    break;
2358        case XML_REGEXP_NOTREALCHAR: /* \W */
2359	    break;
2360	/*
2361	 * at that point we know both type 1 and type2 are from
2362	 * character categories are ordered and are different,
2363	 * it becomes simple because this is a partition
2364	 */
2365        case XML_REGEXP_LETTER:
2366	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
2367	        return(1);
2368	    return(0);
2369        case XML_REGEXP_LETTER_UPPERCASE:
2370        case XML_REGEXP_LETTER_LOWERCASE:
2371        case XML_REGEXP_LETTER_TITLECASE:
2372        case XML_REGEXP_LETTER_MODIFIER:
2373        case XML_REGEXP_LETTER_OTHERS:
2374	    return(0);
2375        case XML_REGEXP_MARK:
2376	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
2377	        return(1);
2378	    return(0);
2379        case XML_REGEXP_MARK_NONSPACING:
2380        case XML_REGEXP_MARK_SPACECOMBINING:
2381        case XML_REGEXP_MARK_ENCLOSING:
2382	    return(0);
2383        case XML_REGEXP_NUMBER:
2384	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
2385	        return(1);
2386	    return(0);
2387        case XML_REGEXP_NUMBER_DECIMAL:
2388        case XML_REGEXP_NUMBER_LETTER:
2389        case XML_REGEXP_NUMBER_OTHERS:
2390	    return(0);
2391        case XML_REGEXP_PUNCT:
2392	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
2393	        return(1);
2394	    return(0);
2395        case XML_REGEXP_PUNCT_CONNECTOR:
2396        case XML_REGEXP_PUNCT_DASH:
2397        case XML_REGEXP_PUNCT_OPEN:
2398        case XML_REGEXP_PUNCT_CLOSE:
2399        case XML_REGEXP_PUNCT_INITQUOTE:
2400        case XML_REGEXP_PUNCT_FINQUOTE:
2401        case XML_REGEXP_PUNCT_OTHERS:
2402	    return(0);
2403        case XML_REGEXP_SEPAR:
2404	    if (type2 <= XML_REGEXP_SEPAR_PARA)
2405	        return(1);
2406	    return(0);
2407        case XML_REGEXP_SEPAR_SPACE:
2408        case XML_REGEXP_SEPAR_LINE:
2409        case XML_REGEXP_SEPAR_PARA:
2410	    return(0);
2411        case XML_REGEXP_SYMBOL:
2412	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
2413	        return(1);
2414	    return(0);
2415        case XML_REGEXP_SYMBOL_MATH:
2416        case XML_REGEXP_SYMBOL_CURRENCY:
2417        case XML_REGEXP_SYMBOL_MODIFIER:
2418        case XML_REGEXP_SYMBOL_OTHERS:
2419	    return(0);
2420        case XML_REGEXP_OTHER:
2421	    if (type2 <= XML_REGEXP_OTHER_NA)
2422	        return(1);
2423	    return(0);
2424        case XML_REGEXP_OTHER_CONTROL:
2425        case XML_REGEXP_OTHER_FORMAT:
2426        case XML_REGEXP_OTHER_PRIVATE:
2427        case XML_REGEXP_OTHER_NA:
2428	    return(0);
2429	default:
2430	    break;
2431    }
2432    return(1);
2433}
2434
2435/**
2436 * xmlFAEqualAtoms:
2437 * @atom1:  an atom
2438 * @atom2:  an atom
2439 * @deep: if not set only compare string pointers
2440 *
2441 * Compares two atoms to check whether they are the same exactly
2442 * this is used to remove equivalent transitions
2443 *
2444 * Returns 1 if same and 0 otherwise
2445 */
2446static int
2447xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2448    int ret = 0;
2449
2450    if (atom1 == atom2)
2451	return(1);
2452    if ((atom1 == NULL) || (atom2 == NULL))
2453	return(0);
2454
2455    if (atom1->type != atom2->type)
2456        return(0);
2457    switch (atom1->type) {
2458        case XML_REGEXP_EPSILON:
2459	    ret = 0;
2460	    break;
2461        case XML_REGEXP_STRING:
2462            if (!deep)
2463                ret = (atom1->valuep == atom2->valuep);
2464            else
2465                ret = xmlStrEqual((xmlChar *)atom1->valuep,
2466                                  (xmlChar *)atom2->valuep);
2467	    break;
2468        case XML_REGEXP_CHARVAL:
2469	    ret = (atom1->codepoint == atom2->codepoint);
2470	    break;
2471	case XML_REGEXP_RANGES:
2472	    /* too hard to do in the general case */
2473	    ret = 0;
2474	default:
2475	    break;
2476    }
2477    return(ret);
2478}
2479
2480/**
2481 * xmlFACompareAtoms:
2482 * @atom1:  an atom
2483 * @atom2:  an atom
2484 * @deep: if not set only compare string pointers
2485 *
2486 * Compares two atoms to check whether they intersect in some ways,
2487 * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
2488 *
2489 * Returns 1 if yes and 0 otherwise
2490 */
2491static int
2492xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
2493    int ret = 1;
2494
2495    if (atom1 == atom2)
2496	return(1);
2497    if ((atom1 == NULL) || (atom2 == NULL))
2498	return(0);
2499
2500    if ((atom1->type == XML_REGEXP_ANYCHAR) ||
2501        (atom2->type == XML_REGEXP_ANYCHAR))
2502	return(1);
2503
2504    if (atom1->type > atom2->type) {
2505	xmlRegAtomPtr tmp;
2506	tmp = atom1;
2507	atom1 = atom2;
2508	atom2 = tmp;
2509    }
2510    if (atom1->type != atom2->type) {
2511        ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
2512	/* if they can't intersect at the type level break now */
2513	if (ret == 0)
2514	    return(0);
2515    }
2516    switch (atom1->type) {
2517        case XML_REGEXP_STRING:
2518            if (!deep)
2519                ret = (atom1->valuep != atom2->valuep);
2520            else
2521                ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
2522                                             (xmlChar *)atom2->valuep);
2523	    break;
2524        case XML_REGEXP_EPSILON:
2525	    goto not_determinist;
2526        case XML_REGEXP_CHARVAL:
2527	    if (atom2->type == XML_REGEXP_CHARVAL) {
2528		ret = (atom1->codepoint == atom2->codepoint);
2529	    } else {
2530	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
2531		if (ret < 0)
2532		    ret = 1;
2533	    }
2534	    break;
2535        case XML_REGEXP_RANGES:
2536	    if (atom2->type == XML_REGEXP_RANGES) {
2537	        int i, j, res;
2538		xmlRegRangePtr r1, r2;
2539
2540		/*
2541		 * need to check that none of the ranges eventually matches
2542		 */
2543		for (i = 0;i < atom1->nbRanges;i++) {
2544		    for (j = 0;j < atom2->nbRanges;j++) {
2545			r1 = atom1->ranges[i];
2546			r2 = atom2->ranges[j];
2547			res = xmlFACompareRanges(r1, r2);
2548			if (res == 1) {
2549			    ret = 1;
2550			    goto done;
2551			}
2552		    }
2553		}
2554		ret = 0;
2555	    }
2556	    break;
2557	default:
2558	    goto not_determinist;
2559    }
2560done:
2561    if (atom1->neg != atom2->neg) {
2562        ret = !ret;
2563    }
2564    if (ret == 0)
2565        return(0);
2566not_determinist:
2567    return(1);
2568}
2569
2570/**
2571 * xmlFARecurseDeterminism:
2572 * @ctxt:  a regexp parser context
2573 *
2574 * Check whether the associated regexp is determinist,
2575 * should be called after xmlFAEliminateEpsilonTransitions()
2576 *
2577 */
2578static int
2579xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2580	                 int to, xmlRegAtomPtr atom) {
2581    int ret = 1;
2582    int res;
2583    int transnr, nbTrans;
2584    xmlRegTransPtr t1;
2585    int deep = 1;
2586
2587    if (state == NULL)
2588	return(ret);
2589
2590    if (ctxt->flags & AM_AUTOMATA_RNG)
2591        deep = 0;
2592
2593    /*
2594     * don't recurse on transitions potentially added in the course of
2595     * the elimination.
2596     */
2597    nbTrans = state->nbTrans;
2598    for (transnr = 0;transnr < nbTrans;transnr++) {
2599	t1 = &(state->trans[transnr]);
2600	/*
2601	 * check transitions conflicting with the one looked at
2602	 */
2603	if (t1->atom == NULL) {
2604	    if (t1->to < 0)
2605		continue;
2606	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2607		                           to, atom);
2608	    if (res == 0) {
2609	        ret = 0;
2610		/* t1->nd = 1; */
2611	    }
2612	    continue;
2613	}
2614	if (t1->to != to)
2615	    continue;
2616	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
2617	    ret = 0;
2618	    /* mark the transition as non-deterministic */
2619	    t1->nd = 1;
2620	}
2621    }
2622    return(ret);
2623}
2624
2625/**
2626 * xmlFAComputesDeterminism:
2627 * @ctxt:  a regexp parser context
2628 *
2629 * Check whether the associated regexp is determinist,
2630 * should be called after xmlFAEliminateEpsilonTransitions()
2631 *
2632 */
2633static int
2634xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2635    int statenr, transnr;
2636    xmlRegStatePtr state;
2637    xmlRegTransPtr t1, t2, last;
2638    int i;
2639    int ret = 1;
2640    int deep = 1;
2641
2642#ifdef DEBUG_REGEXP_GRAPH
2643    printf("xmlFAComputesDeterminism\n");
2644    xmlRegPrintCtxt(stdout, ctxt);
2645#endif
2646    if (ctxt->determinist != -1)
2647	return(ctxt->determinist);
2648
2649    if (ctxt->flags & AM_AUTOMATA_RNG)
2650        deep = 0;
2651
2652    /*
2653     * First cleanup the automata removing cancelled transitions
2654     */
2655    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2656	state = ctxt->states[statenr];
2657	if (state == NULL)
2658	    continue;
2659	if (state->nbTrans < 2)
2660	    continue;
2661	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2662	    t1 = &(state->trans[transnr]);
2663	    /*
2664	     * Determinism checks in case of counted or all transitions
2665	     * will have to be handled separately
2666	     */
2667	    if (t1->atom == NULL) {
2668		/* t1->nd = 1; */
2669		continue;
2670	    }
2671	    if (t1->to == -1) /* eliminated */
2672		continue;
2673	    for (i = 0;i < transnr;i++) {
2674		t2 = &(state->trans[i]);
2675		if (t2->to == -1) /* eliminated */
2676		    continue;
2677		if (t2->atom != NULL) {
2678		    if (t1->to == t2->to) {
2679                        /*
2680                         * Here we use deep because we want to keep the
2681                         * transitions which indicate a conflict
2682                         */
2683			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
2684                            (t1->counter == t2->counter) &&
2685                            (t1->count == t2->count))
2686			    t2->to = -1; /* eliminated */
2687		    }
2688		}
2689	    }
2690	}
2691    }
2692
2693    /*
2694     * Check for all states that there aren't 2 transitions
2695     * with the same atom and a different target.
2696     */
2697    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2698	state = ctxt->states[statenr];
2699	if (state == NULL)
2700	    continue;
2701	if (state->nbTrans < 2)
2702	    continue;
2703	last = NULL;
2704	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2705	    t1 = &(state->trans[transnr]);
2706	    /*
2707	     * Determinism checks in case of counted or all transitions
2708	     * will have to be handled separately
2709	     */
2710	    if (t1->atom == NULL) {
2711		continue;
2712	    }
2713	    if (t1->to == -1) /* eliminated */
2714		continue;
2715	    for (i = 0;i < transnr;i++) {
2716		t2 = &(state->trans[i]);
2717		if (t2->to == -1) /* eliminated */
2718		    continue;
2719		if (t2->atom != NULL) {
2720                    /*
2721                     * But here we don't use deep because we want to
2722                     * find transitions which indicate a conflict
2723                     */
2724		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
2725			ret = 0;
2726			/* mark the transitions as non-deterministic ones */
2727			t1->nd = 1;
2728			t2->nd = 1;
2729			last = t1;
2730		    }
2731		} else if (t1->to != -1) {
2732		    /*
2733		     * do the closure in case of remaining specific
2734		     * epsilon transitions like choices or all
2735		     */
2736		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2737						   t2->to, t2->atom);
2738		    /* don't shortcut the computation so all non deterministic
2739		       transition get marked down
2740		    if (ret == 0)
2741			return(0);
2742		     */
2743		    if (ret == 0) {
2744			t1->nd = 1;
2745			/* t2->nd = 1; */
2746			last = t1;
2747		    }
2748		}
2749	    }
2750	    /* don't shortcut the computation so all non deterministic
2751	       transition get marked down
2752	    if (ret == 0)
2753		break; */
2754	}
2755
2756	/*
2757	 * mark specifically the last non-deterministic transition
2758	 * from a state since there is no need to set-up rollback
2759	 * from it
2760	 */
2761	if (last != NULL) {
2762	    last->nd = 2;
2763	}
2764
2765	/* don't shortcut the computation so all non deterministic
2766	   transition get marked down
2767	if (ret == 0)
2768	    break; */
2769    }
2770
2771    ctxt->determinist = ret;
2772    return(ret);
2773}
2774
2775/************************************************************************
2776 * 									*
2777 *	Routines to check input against transition atoms		*
2778 * 									*
2779 ************************************************************************/
2780
2781static int
2782xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2783	                  int start, int end, const xmlChar *blockName) {
2784    int ret = 0;
2785
2786    switch (type) {
2787        case XML_REGEXP_STRING:
2788        case XML_REGEXP_SUBREG:
2789        case XML_REGEXP_RANGES:
2790        case XML_REGEXP_EPSILON:
2791	    return(-1);
2792        case XML_REGEXP_ANYCHAR:
2793	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2794	    break;
2795        case XML_REGEXP_CHARVAL:
2796	    ret = ((codepoint >= start) && (codepoint <= end));
2797	    break;
2798        case XML_REGEXP_NOTSPACE:
2799	    neg = !neg;
2800        case XML_REGEXP_ANYSPACE:
2801	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2802		   (codepoint == '\t') || (codepoint == ' '));
2803	    break;
2804        case XML_REGEXP_NOTINITNAME:
2805	    neg = !neg;
2806        case XML_REGEXP_INITNAME:
2807	    ret = (IS_LETTER(codepoint) ||
2808		   (codepoint == '_') || (codepoint == ':'));
2809	    break;
2810        case XML_REGEXP_NOTNAMECHAR:
2811	    neg = !neg;
2812        case XML_REGEXP_NAMECHAR:
2813	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2814		   (codepoint == '.') || (codepoint == '-') ||
2815		   (codepoint == '_') || (codepoint == ':') ||
2816		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2817	    break;
2818        case XML_REGEXP_NOTDECIMAL:
2819	    neg = !neg;
2820        case XML_REGEXP_DECIMAL:
2821	    ret = xmlUCSIsCatNd(codepoint);
2822	    break;
2823        case XML_REGEXP_REALCHAR:
2824	    neg = !neg;
2825        case XML_REGEXP_NOTREALCHAR:
2826	    ret = xmlUCSIsCatP(codepoint);
2827	    if (ret == 0)
2828		ret = xmlUCSIsCatZ(codepoint);
2829	    if (ret == 0)
2830		ret = xmlUCSIsCatC(codepoint);
2831	    break;
2832        case XML_REGEXP_LETTER:
2833	    ret = xmlUCSIsCatL(codepoint);
2834	    break;
2835        case XML_REGEXP_LETTER_UPPERCASE:
2836	    ret = xmlUCSIsCatLu(codepoint);
2837	    break;
2838        case XML_REGEXP_LETTER_LOWERCASE:
2839	    ret = xmlUCSIsCatLl(codepoint);
2840	    break;
2841        case XML_REGEXP_LETTER_TITLECASE:
2842	    ret = xmlUCSIsCatLt(codepoint);
2843	    break;
2844        case XML_REGEXP_LETTER_MODIFIER:
2845	    ret = xmlUCSIsCatLm(codepoint);
2846	    break;
2847        case XML_REGEXP_LETTER_OTHERS:
2848	    ret = xmlUCSIsCatLo(codepoint);
2849	    break;
2850        case XML_REGEXP_MARK:
2851	    ret = xmlUCSIsCatM(codepoint);
2852	    break;
2853        case XML_REGEXP_MARK_NONSPACING:
2854	    ret = xmlUCSIsCatMn(codepoint);
2855	    break;
2856        case XML_REGEXP_MARK_SPACECOMBINING:
2857	    ret = xmlUCSIsCatMc(codepoint);
2858	    break;
2859        case XML_REGEXP_MARK_ENCLOSING:
2860	    ret = xmlUCSIsCatMe(codepoint);
2861	    break;
2862        case XML_REGEXP_NUMBER:
2863	    ret = xmlUCSIsCatN(codepoint);
2864	    break;
2865        case XML_REGEXP_NUMBER_DECIMAL:
2866	    ret = xmlUCSIsCatNd(codepoint);
2867	    break;
2868        case XML_REGEXP_NUMBER_LETTER:
2869	    ret = xmlUCSIsCatNl(codepoint);
2870	    break;
2871        case XML_REGEXP_NUMBER_OTHERS:
2872	    ret = xmlUCSIsCatNo(codepoint);
2873	    break;
2874        case XML_REGEXP_PUNCT:
2875	    ret = xmlUCSIsCatP(codepoint);
2876	    break;
2877        case XML_REGEXP_PUNCT_CONNECTOR:
2878	    ret = xmlUCSIsCatPc(codepoint);
2879	    break;
2880        case XML_REGEXP_PUNCT_DASH:
2881	    ret = xmlUCSIsCatPd(codepoint);
2882	    break;
2883        case XML_REGEXP_PUNCT_OPEN:
2884	    ret = xmlUCSIsCatPs(codepoint);
2885	    break;
2886        case XML_REGEXP_PUNCT_CLOSE:
2887	    ret = xmlUCSIsCatPe(codepoint);
2888	    break;
2889        case XML_REGEXP_PUNCT_INITQUOTE:
2890	    ret = xmlUCSIsCatPi(codepoint);
2891	    break;
2892        case XML_REGEXP_PUNCT_FINQUOTE:
2893	    ret = xmlUCSIsCatPf(codepoint);
2894	    break;
2895        case XML_REGEXP_PUNCT_OTHERS:
2896	    ret = xmlUCSIsCatPo(codepoint);
2897	    break;
2898        case XML_REGEXP_SEPAR:
2899	    ret = xmlUCSIsCatZ(codepoint);
2900	    break;
2901        case XML_REGEXP_SEPAR_SPACE:
2902	    ret = xmlUCSIsCatZs(codepoint);
2903	    break;
2904        case XML_REGEXP_SEPAR_LINE:
2905	    ret = xmlUCSIsCatZl(codepoint);
2906	    break;
2907        case XML_REGEXP_SEPAR_PARA:
2908	    ret = xmlUCSIsCatZp(codepoint);
2909	    break;
2910        case XML_REGEXP_SYMBOL:
2911	    ret = xmlUCSIsCatS(codepoint);
2912	    break;
2913        case XML_REGEXP_SYMBOL_MATH:
2914	    ret = xmlUCSIsCatSm(codepoint);
2915	    break;
2916        case XML_REGEXP_SYMBOL_CURRENCY:
2917	    ret = xmlUCSIsCatSc(codepoint);
2918	    break;
2919        case XML_REGEXP_SYMBOL_MODIFIER:
2920	    ret = xmlUCSIsCatSk(codepoint);
2921	    break;
2922        case XML_REGEXP_SYMBOL_OTHERS:
2923	    ret = xmlUCSIsCatSo(codepoint);
2924	    break;
2925        case XML_REGEXP_OTHER:
2926	    ret = xmlUCSIsCatC(codepoint);
2927	    break;
2928        case XML_REGEXP_OTHER_CONTROL:
2929	    ret = xmlUCSIsCatCc(codepoint);
2930	    break;
2931        case XML_REGEXP_OTHER_FORMAT:
2932	    ret = xmlUCSIsCatCf(codepoint);
2933	    break;
2934        case XML_REGEXP_OTHER_PRIVATE:
2935	    ret = xmlUCSIsCatCo(codepoint);
2936	    break;
2937        case XML_REGEXP_OTHER_NA:
2938	    /* ret = xmlUCSIsCatCn(codepoint); */
2939	    /* Seems it doesn't exist anymore in recent Unicode releases */
2940	    ret = 0;
2941	    break;
2942        case XML_REGEXP_BLOCK_NAME:
2943	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2944	    break;
2945    }
2946    if (neg)
2947	return(!ret);
2948    return(ret);
2949}
2950
2951static int
2952xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2953    int i, ret = 0;
2954    xmlRegRangePtr range;
2955
2956    if ((atom == NULL) || (!IS_CHAR(codepoint)))
2957	return(-1);
2958
2959    switch (atom->type) {
2960        case XML_REGEXP_SUBREG:
2961        case XML_REGEXP_EPSILON:
2962	    return(-1);
2963        case XML_REGEXP_CHARVAL:
2964            return(codepoint == atom->codepoint);
2965        case XML_REGEXP_RANGES: {
2966	    int accept = 0;
2967
2968	    for (i = 0;i < atom->nbRanges;i++) {
2969		range = atom->ranges[i];
2970		if (range->neg == 2) {
2971		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2972						0, range->start, range->end,
2973						range->blockName);
2974		    if (ret != 0)
2975			return(0); /* excluded char */
2976		} else if (range->neg) {
2977		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2978						0, range->start, range->end,
2979						range->blockName);
2980		    if (ret == 0)
2981		        accept = 1;
2982		    else
2983		        return(0);
2984		} else {
2985		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2986						0, range->start, range->end,
2987						range->blockName);
2988		    if (ret != 0)
2989			accept = 1; /* might still be excluded */
2990		}
2991	    }
2992	    return(accept);
2993	}
2994        case XML_REGEXP_STRING:
2995	    printf("TODO: XML_REGEXP_STRING\n");
2996	    return(-1);
2997        case XML_REGEXP_ANYCHAR:
2998        case XML_REGEXP_ANYSPACE:
2999        case XML_REGEXP_NOTSPACE:
3000        case XML_REGEXP_INITNAME:
3001        case XML_REGEXP_NOTINITNAME:
3002        case XML_REGEXP_NAMECHAR:
3003        case XML_REGEXP_NOTNAMECHAR:
3004        case XML_REGEXP_DECIMAL:
3005        case XML_REGEXP_NOTDECIMAL:
3006        case XML_REGEXP_REALCHAR:
3007        case XML_REGEXP_NOTREALCHAR:
3008        case XML_REGEXP_LETTER:
3009        case XML_REGEXP_LETTER_UPPERCASE:
3010        case XML_REGEXP_LETTER_LOWERCASE:
3011        case XML_REGEXP_LETTER_TITLECASE:
3012        case XML_REGEXP_LETTER_MODIFIER:
3013        case XML_REGEXP_LETTER_OTHERS:
3014        case XML_REGEXP_MARK:
3015        case XML_REGEXP_MARK_NONSPACING:
3016        case XML_REGEXP_MARK_SPACECOMBINING:
3017        case XML_REGEXP_MARK_ENCLOSING:
3018        case XML_REGEXP_NUMBER:
3019        case XML_REGEXP_NUMBER_DECIMAL:
3020        case XML_REGEXP_NUMBER_LETTER:
3021        case XML_REGEXP_NUMBER_OTHERS:
3022        case XML_REGEXP_PUNCT:
3023        case XML_REGEXP_PUNCT_CONNECTOR:
3024        case XML_REGEXP_PUNCT_DASH:
3025        case XML_REGEXP_PUNCT_OPEN:
3026        case XML_REGEXP_PUNCT_CLOSE:
3027        case XML_REGEXP_PUNCT_INITQUOTE:
3028        case XML_REGEXP_PUNCT_FINQUOTE:
3029        case XML_REGEXP_PUNCT_OTHERS:
3030        case XML_REGEXP_SEPAR:
3031        case XML_REGEXP_SEPAR_SPACE:
3032        case XML_REGEXP_SEPAR_LINE:
3033        case XML_REGEXP_SEPAR_PARA:
3034        case XML_REGEXP_SYMBOL:
3035        case XML_REGEXP_SYMBOL_MATH:
3036        case XML_REGEXP_SYMBOL_CURRENCY:
3037        case XML_REGEXP_SYMBOL_MODIFIER:
3038        case XML_REGEXP_SYMBOL_OTHERS:
3039        case XML_REGEXP_OTHER:
3040        case XML_REGEXP_OTHER_CONTROL:
3041        case XML_REGEXP_OTHER_FORMAT:
3042        case XML_REGEXP_OTHER_PRIVATE:
3043        case XML_REGEXP_OTHER_NA:
3044	case XML_REGEXP_BLOCK_NAME:
3045	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
3046		                            (const xmlChar *)atom->valuep);
3047	    if (atom->neg)
3048		ret = !ret;
3049	    break;
3050    }
3051    return(ret);
3052}
3053
3054/************************************************************************
3055 * 									*
3056 *	Saving and restoring state of an execution context		*
3057 * 									*
3058 ************************************************************************/
3059
3060#ifdef DEBUG_REGEXP_EXEC
3061static void
3062xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
3063    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
3064    if (exec->inputStack != NULL) {
3065	int i;
3066	printf(": ");
3067	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
3068	    printf("%s ", (const char *)
3069	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
3070    } else {
3071	printf(": %s", &(exec->inputString[exec->index]));
3072    }
3073    printf("\n");
3074}
3075#endif
3076
3077static void
3078xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
3079#ifdef DEBUG_REGEXP_EXEC
3080    printf("saving ");
3081    exec->transno++;
3082    xmlFARegDebugExec(exec);
3083    exec->transno--;
3084#endif
3085#ifdef MAX_PUSH
3086    if (exec->nbPush > MAX_PUSH) {
3087        return;
3088    }
3089    exec->nbPush++;
3090#endif
3091
3092    if (exec->maxRollbacks == 0) {
3093	exec->maxRollbacks = 4;
3094	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
3095		                             sizeof(xmlRegExecRollback));
3096	if (exec->rollbacks == NULL) {
3097	    xmlRegexpErrMemory(NULL, "saving regexp");
3098	    exec->maxRollbacks = 0;
3099	    return;
3100	}
3101	memset(exec->rollbacks, 0,
3102	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
3103    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
3104	xmlRegExecRollback *tmp;
3105	int len = exec->maxRollbacks;
3106
3107	exec->maxRollbacks *= 2;
3108	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
3109			exec->maxRollbacks * sizeof(xmlRegExecRollback));
3110	if (tmp == NULL) {
3111	    xmlRegexpErrMemory(NULL, "saving regexp");
3112	    exec->maxRollbacks /= 2;
3113	    return;
3114	}
3115	exec->rollbacks = tmp;
3116	tmp = &exec->rollbacks[len];
3117	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
3118    }
3119    exec->rollbacks[exec->nbRollbacks].state = exec->state;
3120    exec->rollbacks[exec->nbRollbacks].index = exec->index;
3121    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
3122    if (exec->comp->nbCounters > 0) {
3123	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3124	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
3125		xmlMalloc(exec->comp->nbCounters * sizeof(int));
3126	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3127		xmlRegexpErrMemory(NULL, "saving regexp");
3128		exec->status = -5;
3129		return;
3130	    }
3131	}
3132	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
3133	       exec->comp->nbCounters * sizeof(int));
3134    }
3135    exec->nbRollbacks++;
3136}
3137
3138static void
3139xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
3140    if (exec->nbRollbacks <= 0) {
3141	exec->status = -1;
3142#ifdef DEBUG_REGEXP_EXEC
3143	printf("rollback failed on empty stack\n");
3144#endif
3145	return;
3146    }
3147    exec->nbRollbacks--;
3148    exec->state = exec->rollbacks[exec->nbRollbacks].state;
3149    exec->index = exec->rollbacks[exec->nbRollbacks].index;
3150    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
3151    if (exec->comp->nbCounters > 0) {
3152	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
3153	    fprintf(stderr, "exec save: allocation failed");
3154	    exec->status = -6;
3155	    return;
3156	}
3157	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
3158	       exec->comp->nbCounters * sizeof(int));
3159    }
3160
3161#ifdef DEBUG_REGEXP_EXEC
3162    printf("restored ");
3163    xmlFARegDebugExec(exec);
3164#endif
3165}
3166
3167/************************************************************************
3168 * 									*
3169 *	Verifier, running an input against a compiled regexp		*
3170 * 									*
3171 ************************************************************************/
3172
3173static int
3174xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
3175    xmlRegExecCtxt execval;
3176    xmlRegExecCtxtPtr exec = &execval;
3177    int ret, codepoint = 0, len, deter;
3178
3179    exec->inputString = content;
3180    exec->index = 0;
3181    exec->nbPush = 0;
3182    exec->determinist = 1;
3183    exec->maxRollbacks = 0;
3184    exec->nbRollbacks = 0;
3185    exec->rollbacks = NULL;
3186    exec->status = 0;
3187    exec->comp = comp;
3188    exec->state = comp->states[0];
3189    exec->transno = 0;
3190    exec->transcount = 0;
3191    exec->inputStack = NULL;
3192    exec->inputStackMax = 0;
3193    if (comp->nbCounters > 0) {
3194	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
3195	if (exec->counts == NULL) {
3196	    xmlRegexpErrMemory(NULL, "running regexp");
3197	    return(-1);
3198	}
3199        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
3200    } else
3201	exec->counts = NULL;
3202    while ((exec->status == 0) &&
3203	   ((exec->inputString[exec->index] != 0) ||
3204	    ((exec->state != NULL) &&
3205	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3206	xmlRegTransPtr trans;
3207	xmlRegAtomPtr atom;
3208
3209	/*
3210	 * If end of input on non-terminal state, rollback, however we may
3211	 * still have epsilon like transition for counted transitions
3212	 * on counters, in that case don't break too early.  Additionally,
3213	 * if we are working on a range like "AB{0,2}", where B is not present,
3214	 * we don't want to break.
3215	 */
3216	len = 1;
3217	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
3218	    /*
3219	     * if there is a transition, we must check if
3220	     *  atom allows minOccurs of 0
3221	     */
3222	    if (exec->transno < exec->state->nbTrans) {
3223	        trans = &exec->state->trans[exec->transno];
3224		if (trans->to >=0) {
3225		    atom = trans->atom;
3226		    if (!((atom->min == 0) && (atom->max > 0)))
3227		        goto rollback;
3228		}
3229	    } else
3230	        goto rollback;
3231	}
3232
3233	exec->transcount = 0;
3234	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3235	    trans = &exec->state->trans[exec->transno];
3236	    if (trans->to < 0)
3237		continue;
3238	    atom = trans->atom;
3239	    ret = 0;
3240	    deter = 1;
3241	    if (trans->count >= 0) {
3242		int count;
3243		xmlRegCounterPtr counter;
3244
3245		if (exec->counts == NULL) {
3246		    exec->status = -1;
3247		    goto error;
3248		}
3249		/*
3250		 * A counted transition.
3251		 */
3252
3253		count = exec->counts[trans->count];
3254		counter = &exec->comp->counters[trans->count];
3255#ifdef DEBUG_REGEXP_EXEC
3256		printf("testing count %d: val %d, min %d, max %d\n",
3257		       trans->count, count, counter->min,  counter->max);
3258#endif
3259		ret = ((count >= counter->min) && (count <= counter->max));
3260		if ((ret) && (counter->min != counter->max))
3261		    deter = 0;
3262	    } else if (atom == NULL) {
3263		fprintf(stderr, "epsilon transition left at runtime\n");
3264		exec->status = -2;
3265		break;
3266	    } else if (exec->inputString[exec->index] != 0) {
3267                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3268		ret = xmlRegCheckCharacter(atom, codepoint);
3269		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
3270		    xmlRegStatePtr to = comp->states[trans->to];
3271
3272		    /*
3273		     * this is a multiple input sequence
3274		     * If there is a counter associated increment it now.
3275		     * before potentially saving and rollback
3276		     * do not increment if the counter is already over the
3277		     * maximum limit in which case get to next transition
3278		     */
3279		    if (trans->counter >= 0) {
3280			xmlRegCounterPtr counter;
3281
3282			if ((exec->counts == NULL) ||
3283			    (exec->comp == NULL) ||
3284			    (exec->comp->counters == NULL)) {
3285			    exec->status = -1;
3286			    goto error;
3287			}
3288			counter = &exec->comp->counters[trans->counter];
3289			if (exec->counts[trans->counter] >= counter->max)
3290			    continue; /* for loop on transitions */
3291
3292#ifdef DEBUG_REGEXP_EXEC
3293			printf("Increasing count %d\n", trans->counter);
3294#endif
3295			exec->counts[trans->counter]++;
3296		    }
3297		    if (exec->state->nbTrans > exec->transno + 1) {
3298			xmlFARegExecSave(exec);
3299		    }
3300		    exec->transcount = 1;
3301		    do {
3302			/*
3303			 * Try to progress as much as possible on the input
3304			 */
3305			if (exec->transcount == atom->max) {
3306			    break;
3307			}
3308			exec->index += len;
3309			/*
3310			 * End of input: stop here
3311			 */
3312			if (exec->inputString[exec->index] == 0) {
3313			    exec->index -= len;
3314			    break;
3315			}
3316			if (exec->transcount >= atom->min) {
3317			    int transno = exec->transno;
3318			    xmlRegStatePtr state = exec->state;
3319
3320			    /*
3321			     * The transition is acceptable save it
3322			     */
3323			    exec->transno = -1; /* trick */
3324			    exec->state = to;
3325			    xmlFARegExecSave(exec);
3326			    exec->transno = transno;
3327			    exec->state = state;
3328			}
3329			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3330				              len);
3331			ret = xmlRegCheckCharacter(atom, codepoint);
3332			exec->transcount++;
3333		    } while (ret == 1);
3334		    if (exec->transcount < atom->min)
3335			ret = 0;
3336
3337		    /*
3338		     * If the last check failed but one transition was found
3339		     * possible, rollback
3340		     */
3341		    if (ret < 0)
3342			ret = 0;
3343		    if (ret == 0) {
3344			goto rollback;
3345		    }
3346		    if (trans->counter >= 0) {
3347			if (exec->counts == NULL) {
3348			    exec->status = -1;
3349			    goto error;
3350			}
3351#ifdef DEBUG_REGEXP_EXEC
3352			printf("Decreasing count %d\n", trans->counter);
3353#endif
3354			exec->counts[trans->counter]--;
3355		    }
3356		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
3357		    /*
3358		     * we don't match on the codepoint, but minOccurs of 0
3359		     * says that's ok.  Setting len to 0 inhibits stepping
3360		     * over the codepoint.
3361		     */
3362		    exec->transcount = 1;
3363		    len = 0;
3364		    ret = 1;
3365		}
3366	    } else if ((atom->min == 0) && (atom->max > 0)) {
3367	        /* another spot to match when minOccurs is 0 */
3368		exec->transcount = 1;
3369		len = 0;
3370		ret = 1;
3371	    }
3372	    if (ret == 1) {
3373		if ((trans->nd == 1) ||
3374		    ((trans->count >= 0) && (deter == 0) &&
3375		     (exec->state->nbTrans > exec->transno + 1))) {
3376#ifdef DEBUG_REGEXP_EXEC
3377		    if (trans->nd == 1)
3378		        printf("Saving on nd transition atom %d for %c at %d\n",
3379			       trans->atom->no, codepoint, exec->index);
3380		    else
3381		        printf("Saving on counted transition count %d for %c at %d\n",
3382			       trans->count, codepoint, exec->index);
3383#endif
3384		    xmlFARegExecSave(exec);
3385		}
3386		if (trans->counter >= 0) {
3387		    xmlRegCounterPtr counter;
3388
3389                    /* make sure we don't go over the counter maximum value */
3390		    if ((exec->counts == NULL) ||
3391			(exec->comp == NULL) ||
3392			(exec->comp->counters == NULL)) {
3393			exec->status = -1;
3394			goto error;
3395		    }
3396		    counter = &exec->comp->counters[trans->counter];
3397		    if (exec->counts[trans->counter] >= counter->max)
3398			continue; /* for loop on transitions */
3399#ifdef DEBUG_REGEXP_EXEC
3400		    printf("Increasing count %d\n", trans->counter);
3401#endif
3402		    exec->counts[trans->counter]++;
3403		}
3404		if ((trans->count >= 0) &&
3405		    (trans->count < REGEXP_ALL_COUNTER)) {
3406		    if (exec->counts == NULL) {
3407		        exec->status = -1;
3408			goto error;
3409		    }
3410#ifdef DEBUG_REGEXP_EXEC
3411		    printf("resetting count %d on transition\n",
3412		           trans->count);
3413#endif
3414		    exec->counts[trans->count] = 0;
3415		}
3416#ifdef DEBUG_REGEXP_EXEC
3417		printf("entering state %d\n", trans->to);
3418#endif
3419		exec->state = comp->states[trans->to];
3420		exec->transno = 0;
3421		if (trans->atom != NULL) {
3422		    exec->index += len;
3423		}
3424		goto progress;
3425	    } else if (ret < 0) {
3426		exec->status = -4;
3427		break;
3428	    }
3429	}
3430	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3431rollback:
3432	    /*
3433	     * Failed to find a way out
3434	     */
3435	    exec->determinist = 0;
3436#ifdef DEBUG_REGEXP_EXEC
3437	    printf("rollback from state %d on %d:%c\n", exec->state->no,
3438	           codepoint,codepoint);
3439#endif
3440	    xmlFARegExecRollBack(exec);
3441	}
3442progress:
3443	continue;
3444    }
3445error:
3446    if (exec->rollbacks != NULL) {
3447	if (exec->counts != NULL) {
3448	    int i;
3449
3450	    for (i = 0;i < exec->maxRollbacks;i++)
3451		if (exec->rollbacks[i].counts != NULL)
3452		    xmlFree(exec->rollbacks[i].counts);
3453	}
3454	xmlFree(exec->rollbacks);
3455    }
3456    if (exec->counts != NULL)
3457	xmlFree(exec->counts);
3458    if (exec->status == 0)
3459	return(1);
3460    if (exec->status == -1) {
3461	if (exec->nbPush > MAX_PUSH)
3462	    return(-1);
3463	return(0);
3464    }
3465    return(exec->status);
3466}
3467
3468/************************************************************************
3469 * 									*
3470 *	Progressive interface to the verifier one atom at a time	*
3471 * 									*
3472 ************************************************************************/
3473#ifdef DEBUG_ERR
3474static void testerr(xmlRegExecCtxtPtr exec);
3475#endif
3476
3477/**
3478 * xmlRegNewExecCtxt:
3479 * @comp: a precompiled regular expression
3480 * @callback: a callback function used for handling progresses in the
3481 *            automata matching phase
3482 * @data: the context data associated to the callback in this context
3483 *
3484 * Build a context used for progressive evaluation of a regexp.
3485 *
3486 * Returns the new context
3487 */
3488xmlRegExecCtxtPtr
3489xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
3490    xmlRegExecCtxtPtr exec;
3491
3492    if (comp == NULL)
3493	return(NULL);
3494    if ((comp->compact == NULL) && (comp->states == NULL))
3495        return(NULL);
3496    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
3497    if (exec == NULL) {
3498	xmlRegexpErrMemory(NULL, "creating execution context");
3499	return(NULL);
3500    }
3501    memset(exec, 0, sizeof(xmlRegExecCtxt));
3502    exec->inputString = NULL;
3503    exec->index = 0;
3504    exec->determinist = 1;
3505    exec->maxRollbacks = 0;
3506    exec->nbRollbacks = 0;
3507    exec->rollbacks = NULL;
3508    exec->status = 0;
3509    exec->comp = comp;
3510    if (comp->compact == NULL)
3511	exec->state = comp->states[0];
3512    exec->transno = 0;
3513    exec->transcount = 0;
3514    exec->callback = callback;
3515    exec->data = data;
3516    if (comp->nbCounters > 0) {
3517        /*
3518	 * For error handling, exec->counts is allocated twice the size
3519	 * the second half is used to store the data in case of rollback
3520	 */
3521	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3522	                                 * 2);
3523	if (exec->counts == NULL) {
3524	    xmlRegexpErrMemory(NULL, "creating execution context");
3525	    xmlFree(exec);
3526	    return(NULL);
3527	}
3528        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3529	exec->errCounts = &exec->counts[comp->nbCounters];
3530    } else {
3531	exec->counts = NULL;
3532	exec->errCounts = NULL;
3533    }
3534    exec->inputStackMax = 0;
3535    exec->inputStackNr = 0;
3536    exec->inputStack = NULL;
3537    exec->errStateNo = -1;
3538    exec->errString = NULL;
3539    exec->nbPush = 0;
3540    return(exec);
3541}
3542
3543/**
3544 * xmlRegFreeExecCtxt:
3545 * @exec: a regular expression evaulation context
3546 *
3547 * Free the structures associated to a regular expression evaulation context.
3548 */
3549void
3550xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3551    if (exec == NULL)
3552	return;
3553
3554    if (exec->rollbacks != NULL) {
3555	if (exec->counts != NULL) {
3556	    int i;
3557
3558	    for (i = 0;i < exec->maxRollbacks;i++)
3559		if (exec->rollbacks[i].counts != NULL)
3560		    xmlFree(exec->rollbacks[i].counts);
3561	}
3562	xmlFree(exec->rollbacks);
3563    }
3564    if (exec->counts != NULL)
3565	xmlFree(exec->counts);
3566    if (exec->inputStack != NULL) {
3567	int i;
3568
3569	for (i = 0;i < exec->inputStackNr;i++) {
3570	    if (exec->inputStack[i].value != NULL)
3571		xmlFree(exec->inputStack[i].value);
3572	}
3573	xmlFree(exec->inputStack);
3574    }
3575    if (exec->errString != NULL)
3576        xmlFree(exec->errString);
3577    xmlFree(exec);
3578}
3579
3580static void
3581xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3582	                    void *data) {
3583#ifdef DEBUG_PUSH
3584    printf("saving value: %d:%s\n", exec->inputStackNr, value);
3585#endif
3586    if (exec->inputStackMax == 0) {
3587	exec->inputStackMax = 4;
3588	exec->inputStack = (xmlRegInputTokenPtr)
3589	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3590	if (exec->inputStack == NULL) {
3591	    xmlRegexpErrMemory(NULL, "pushing input string");
3592	    exec->inputStackMax = 0;
3593	    return;
3594	}
3595    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3596	xmlRegInputTokenPtr tmp;
3597
3598	exec->inputStackMax *= 2;
3599	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3600			exec->inputStackMax * sizeof(xmlRegInputToken));
3601	if (tmp == NULL) {
3602	    xmlRegexpErrMemory(NULL, "pushing input string");
3603	    exec->inputStackMax /= 2;
3604	    return;
3605	}
3606	exec->inputStack = tmp;
3607    }
3608    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3609    exec->inputStack[exec->inputStackNr].data = data;
3610    exec->inputStackNr++;
3611    exec->inputStack[exec->inputStackNr].value = NULL;
3612    exec->inputStack[exec->inputStackNr].data = NULL;
3613}
3614
3615/**
3616 * xmlRegStrEqualWildcard:
3617 * @expStr:  the string to be evaluated
3618 * @valStr:  the validation string
3619 *
3620 * Checks if both strings are equal or have the same content. "*"
3621 * can be used as a wildcard in @valStr; "|" is used as a seperator of
3622 * substrings in both @expStr and @valStr.
3623 *
3624 * Returns 1 if the comparison is satisfied and the number of substrings
3625 * is equal, 0 otherwise.
3626 */
3627
3628static int
3629xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3630    if (expStr == valStr) return(1);
3631    if (expStr == NULL) return(0);
3632    if (valStr == NULL) return(0);
3633    do {
3634	/*
3635	* Eval if we have a wildcard for the current item.
3636	*/
3637        if (*expStr != *valStr) {
3638	    /* if one of them starts with a wildcard make valStr be it */
3639	    if (*valStr == '*') {
3640	        const xmlChar *tmp;
3641
3642		tmp = valStr;
3643		valStr = expStr;
3644		expStr = tmp;
3645	    }
3646	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3647		do {
3648		    if (*valStr == XML_REG_STRING_SEPARATOR)
3649			break;
3650		    valStr++;
3651		} while (*valStr != 0);
3652		continue;
3653	    } else
3654		return(0);
3655	}
3656	expStr++;
3657	valStr++;
3658    } while (*valStr != 0);
3659    if (*expStr != 0)
3660	return (0);
3661    else
3662	return (1);
3663}
3664
3665/**
3666 * xmlRegCompactPushString:
3667 * @exec: a regexp execution context
3668 * @comp:  the precompiled exec with a compact table
3669 * @value: a string token input
3670 * @data: data associated to the token to reuse in callbacks
3671 *
3672 * Push one input token in the execution context
3673 *
3674 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3675 *     a negative value in case of error.
3676 */
3677static int
3678xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3679	                xmlRegexpPtr comp,
3680	                const xmlChar *value,
3681	                void *data) {
3682    int state = exec->index;
3683    int i, target;
3684
3685    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3686	return(-1);
3687
3688    if (value == NULL) {
3689	/*
3690	 * are we at a final state ?
3691	 */
3692	if (comp->compact[state * (comp->nbstrings + 1)] ==
3693            XML_REGEXP_FINAL_STATE)
3694	    return(1);
3695	return(0);
3696    }
3697
3698#ifdef DEBUG_PUSH
3699    printf("value pushed: %s\n", value);
3700#endif
3701
3702    /*
3703     * Examine all outside transitions from current state
3704     */
3705    for (i = 0;i < comp->nbstrings;i++) {
3706	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3707	if ((target > 0) && (target <= comp->nbstates)) {
3708	    target--; /* to avoid 0 */
3709	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3710		exec->index = target;
3711		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3712		    exec->callback(exec->data, value,
3713			  comp->transdata[state * comp->nbstrings + i], data);
3714		}
3715#ifdef DEBUG_PUSH
3716		printf("entering state %d\n", target);
3717#endif
3718		if (comp->compact[target * (comp->nbstrings + 1)] ==
3719		    XML_REGEXP_SINK_STATE)
3720		    goto error;
3721
3722		if (comp->compact[target * (comp->nbstrings + 1)] ==
3723		    XML_REGEXP_FINAL_STATE)
3724		    return(1);
3725		return(0);
3726	    }
3727	}
3728    }
3729    /*
3730     * Failed to find an exit transition out from current state for the
3731     * current token
3732     */
3733#ifdef DEBUG_PUSH
3734    printf("failed to find a transition for %s on state %d\n", value, state);
3735#endif
3736error:
3737    if (exec->errString != NULL)
3738        xmlFree(exec->errString);
3739    exec->errString = xmlStrdup(value);
3740    exec->errStateNo = state;
3741    exec->status = -1;
3742#ifdef DEBUG_ERR
3743    testerr(exec);
3744#endif
3745    return(-1);
3746}
3747
3748/**
3749 * xmlRegExecPushStringInternal:
3750 * @exec: a regexp execution context or NULL to indicate the end
3751 * @value: a string token input
3752 * @data: data associated to the token to reuse in callbacks
3753 * @compound: value was assembled from 2 strings
3754 *
3755 * Push one input token in the execution context
3756 *
3757 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3758 *     a negative value in case of error.
3759 */
3760static int
3761xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3762	                     void *data, int compound) {
3763    xmlRegTransPtr trans;
3764    xmlRegAtomPtr atom;
3765    int ret;
3766    int final = 0;
3767    int progress = 1;
3768
3769    if (exec == NULL)
3770	return(-1);
3771    if (exec->comp == NULL)
3772	return(-1);
3773    if (exec->status != 0)
3774	return(exec->status);
3775
3776    if (exec->comp->compact != NULL)
3777	return(xmlRegCompactPushString(exec, exec->comp, value, data));
3778
3779    if (value == NULL) {
3780        if (exec->state->type == XML_REGEXP_FINAL_STATE)
3781	    return(1);
3782	final = 1;
3783    }
3784
3785#ifdef DEBUG_PUSH
3786    printf("value pushed: %s\n", value);
3787#endif
3788    /*
3789     * If we have an active rollback stack push the new value there
3790     * and get back to where we were left
3791     */
3792    if ((value != NULL) && (exec->inputStackNr > 0)) {
3793	xmlFARegExecSaveInputString(exec, value, data);
3794	value = exec->inputStack[exec->index].value;
3795	data = exec->inputStack[exec->index].data;
3796#ifdef DEBUG_PUSH
3797	printf("value loaded: %s\n", value);
3798#endif
3799    }
3800
3801    while ((exec->status == 0) &&
3802	   ((value != NULL) ||
3803	    ((final == 1) &&
3804	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3805
3806	/*
3807	 * End of input on non-terminal state, rollback, however we may
3808	 * still have epsilon like transition for counted transitions
3809	 * on counters, in that case don't break too early.
3810	 */
3811	if ((value == NULL) && (exec->counts == NULL))
3812	    goto rollback;
3813
3814	exec->transcount = 0;
3815	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3816	    trans = &exec->state->trans[exec->transno];
3817	    if (trans->to < 0)
3818		continue;
3819	    atom = trans->atom;
3820	    ret = 0;
3821	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3822		int i;
3823		int count;
3824		xmlRegTransPtr t;
3825		xmlRegCounterPtr counter;
3826
3827		ret = 0;
3828
3829#ifdef DEBUG_PUSH
3830		printf("testing all lax %d\n", trans->count);
3831#endif
3832		/*
3833		 * Check all counted transitions from the current state
3834		 */
3835		if ((value == NULL) && (final)) {
3836		    ret = 1;
3837		} else if (value != NULL) {
3838		    for (i = 0;i < exec->state->nbTrans;i++) {
3839			t = &exec->state->trans[i];
3840			if ((t->counter < 0) || (t == trans))
3841			    continue;
3842			counter = &exec->comp->counters[t->counter];
3843			count = exec->counts[t->counter];
3844			if ((count < counter->max) &&
3845		            (t->atom != NULL) &&
3846			    (xmlStrEqual(value, t->atom->valuep))) {
3847			    ret = 0;
3848			    break;
3849			}
3850			if ((count >= counter->min) &&
3851			    (count < counter->max) &&
3852			    (t->atom != NULL) &&
3853			    (xmlStrEqual(value, t->atom->valuep))) {
3854			    ret = 1;
3855			    break;
3856			}
3857		    }
3858		}
3859	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3860		int i;
3861		int count;
3862		xmlRegTransPtr t;
3863		xmlRegCounterPtr counter;
3864
3865		ret = 1;
3866
3867#ifdef DEBUG_PUSH
3868		printf("testing all %d\n", trans->count);
3869#endif
3870		/*
3871		 * Check all counted transitions from the current state
3872		 */
3873		for (i = 0;i < exec->state->nbTrans;i++) {
3874                    t = &exec->state->trans[i];
3875		    if ((t->counter < 0) || (t == trans))
3876			continue;
3877                    counter = &exec->comp->counters[t->counter];
3878		    count = exec->counts[t->counter];
3879		    if ((count < counter->min) || (count > counter->max)) {
3880			ret = 0;
3881			break;
3882		    }
3883		}
3884	    } else if (trans->count >= 0) {
3885		int count;
3886		xmlRegCounterPtr counter;
3887
3888		/*
3889		 * A counted transition.
3890		 */
3891
3892		count = exec->counts[trans->count];
3893		counter = &exec->comp->counters[trans->count];
3894#ifdef DEBUG_PUSH
3895		printf("testing count %d: val %d, min %d, max %d\n",
3896		       trans->count, count, counter->min,  counter->max);
3897#endif
3898		ret = ((count >= counter->min) && (count <= counter->max));
3899	    } else if (atom == NULL) {
3900		fprintf(stderr, "epsilon transition left at runtime\n");
3901		exec->status = -2;
3902		break;
3903	    } else if (value != NULL) {
3904		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3905		if (atom->neg) {
3906		    ret = !ret;
3907		    if (!compound)
3908		        ret = 0;
3909		}
3910		if ((ret == 1) && (trans->counter >= 0)) {
3911		    xmlRegCounterPtr counter;
3912		    int count;
3913
3914		    count = exec->counts[trans->counter];
3915		    counter = &exec->comp->counters[trans->counter];
3916		    if (count >= counter->max)
3917			ret = 0;
3918		}
3919
3920		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3921		    xmlRegStatePtr to = exec->comp->states[trans->to];
3922
3923		    /*
3924		     * this is a multiple input sequence
3925		     */
3926		    if (exec->state->nbTrans > exec->transno + 1) {
3927			if (exec->inputStackNr <= 0) {
3928			    xmlFARegExecSaveInputString(exec, value, data);
3929			}
3930			xmlFARegExecSave(exec);
3931		    }
3932		    exec->transcount = 1;
3933		    do {
3934			/*
3935			 * Try to progress as much as possible on the input
3936			 */
3937			if (exec->transcount == atom->max) {
3938			    break;
3939			}
3940			exec->index++;
3941			value = exec->inputStack[exec->index].value;
3942			data = exec->inputStack[exec->index].data;
3943#ifdef DEBUG_PUSH
3944			printf("value loaded: %s\n", value);
3945#endif
3946
3947			/*
3948			 * End of input: stop here
3949			 */
3950			if (value == NULL) {
3951			    exec->index --;
3952			    break;
3953			}
3954			if (exec->transcount >= atom->min) {
3955			    int transno = exec->transno;
3956			    xmlRegStatePtr state = exec->state;
3957
3958			    /*
3959			     * The transition is acceptable save it
3960			     */
3961			    exec->transno = -1; /* trick */
3962			    exec->state = to;
3963			    if (exec->inputStackNr <= 0) {
3964				xmlFARegExecSaveInputString(exec, value, data);
3965			    }
3966			    xmlFARegExecSave(exec);
3967			    exec->transno = transno;
3968			    exec->state = state;
3969			}
3970			ret = xmlStrEqual(value, atom->valuep);
3971			exec->transcount++;
3972		    } while (ret == 1);
3973		    if (exec->transcount < atom->min)
3974			ret = 0;
3975
3976		    /*
3977		     * If the last check failed but one transition was found
3978		     * possible, rollback
3979		     */
3980		    if (ret < 0)
3981			ret = 0;
3982		    if (ret == 0) {
3983			goto rollback;
3984		    }
3985		}
3986	    }
3987	    if (ret == 1) {
3988		if ((exec->callback != NULL) && (atom != NULL) &&
3989			(data != NULL)) {
3990		    exec->callback(exec->data, atom->valuep,
3991			           atom->data, data);
3992		}
3993		if (exec->state->nbTrans > exec->transno + 1) {
3994		    if (exec->inputStackNr <= 0) {
3995			xmlFARegExecSaveInputString(exec, value, data);
3996		    }
3997		    xmlFARegExecSave(exec);
3998		}
3999		if (trans->counter >= 0) {
4000#ifdef DEBUG_PUSH
4001		    printf("Increasing count %d\n", trans->counter);
4002#endif
4003		    exec->counts[trans->counter]++;
4004		}
4005		if ((trans->count >= 0) &&
4006		    (trans->count < REGEXP_ALL_COUNTER)) {
4007#ifdef DEBUG_REGEXP_EXEC
4008		    printf("resetting count %d on transition\n",
4009		           trans->count);
4010#endif
4011		    exec->counts[trans->count] = 0;
4012		}
4013#ifdef DEBUG_PUSH
4014		printf("entering state %d\n", trans->to);
4015#endif
4016                if ((exec->comp->states[trans->to] != NULL) &&
4017		    (exec->comp->states[trans->to]->type ==
4018		     XML_REGEXP_SINK_STATE)) {
4019		    /*
4020		     * entering a sink state, save the current state as error
4021		     * state.
4022		     */
4023		    if (exec->errString != NULL)
4024			xmlFree(exec->errString);
4025		    exec->errString = xmlStrdup(value);
4026		    exec->errState = exec->state;
4027		    memcpy(exec->errCounts, exec->counts,
4028			   exec->comp->nbCounters * sizeof(int));
4029		}
4030		exec->state = exec->comp->states[trans->to];
4031		exec->transno = 0;
4032		if (trans->atom != NULL) {
4033		    if (exec->inputStack != NULL) {
4034			exec->index++;
4035			if (exec->index < exec->inputStackNr) {
4036			    value = exec->inputStack[exec->index].value;
4037			    data = exec->inputStack[exec->index].data;
4038#ifdef DEBUG_PUSH
4039			    printf("value loaded: %s\n", value);
4040#endif
4041			} else {
4042			    value = NULL;
4043			    data = NULL;
4044#ifdef DEBUG_PUSH
4045			    printf("end of input\n");
4046#endif
4047			}
4048		    } else {
4049			value = NULL;
4050			data = NULL;
4051#ifdef DEBUG_PUSH
4052			printf("end of input\n");
4053#endif
4054		    }
4055		}
4056		goto progress;
4057	    } else if (ret < 0) {
4058		exec->status = -4;
4059		break;
4060	    }
4061	}
4062	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4063rollback:
4064            /*
4065	     * if we didn't yet rollback on the current input
4066	     * store the current state as the error state.
4067	     */
4068	    if ((progress) && (exec->state != NULL) &&
4069	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
4070	        progress = 0;
4071		if (exec->errString != NULL)
4072		    xmlFree(exec->errString);
4073		exec->errString = xmlStrdup(value);
4074		exec->errState = exec->state;
4075		memcpy(exec->errCounts, exec->counts,
4076		       exec->comp->nbCounters * sizeof(int));
4077	    }
4078
4079	    /*
4080	     * Failed to find a way out
4081	     */
4082	    exec->determinist = 0;
4083	    xmlFARegExecRollBack(exec);
4084	    if (exec->status == 0) {
4085		value = exec->inputStack[exec->index].value;
4086		data = exec->inputStack[exec->index].data;
4087#ifdef DEBUG_PUSH
4088		printf("value loaded: %s\n", value);
4089#endif
4090	    }
4091	}
4092	continue;
4093progress:
4094        progress = 1;
4095	continue;
4096    }
4097    if (exec->status == 0) {
4098        return(exec->state->type == XML_REGEXP_FINAL_STATE);
4099    }
4100#ifdef DEBUG_ERR
4101    if (exec->status < 0) {
4102	testerr(exec);
4103    }
4104#endif
4105    return(exec->status);
4106}
4107
4108/**
4109 * xmlRegExecPushString:
4110 * @exec: a regexp execution context or NULL to indicate the end
4111 * @value: a string token input
4112 * @data: data associated to the token to reuse in callbacks
4113 *
4114 * Push one input token in the execution context
4115 *
4116 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4117 *     a negative value in case of error.
4118 */
4119int
4120xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
4121	             void *data) {
4122    return(xmlRegExecPushStringInternal(exec, value, data, 0));
4123}
4124
4125/**
4126 * xmlRegExecPushString2:
4127 * @exec: a regexp execution context or NULL to indicate the end
4128 * @value: the first string token input
4129 * @value2: the second string token input
4130 * @data: data associated to the token to reuse in callbacks
4131 *
4132 * Push one input token in the execution context
4133 *
4134 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
4135 *     a negative value in case of error.
4136 */
4137int
4138xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
4139                      const xmlChar *value2, void *data) {
4140    xmlChar buf[150];
4141    int lenn, lenp, ret;
4142    xmlChar *str;
4143
4144    if (exec == NULL)
4145	return(-1);
4146    if (exec->comp == NULL)
4147	return(-1);
4148    if (exec->status != 0)
4149	return(exec->status);
4150
4151    if (value2 == NULL)
4152        return(xmlRegExecPushString(exec, value, data));
4153
4154    lenn = strlen((char *) value2);
4155    lenp = strlen((char *) value);
4156
4157    if (150 < lenn + lenp + 2) {
4158	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4159	if (str == NULL) {
4160	    exec->status = -1;
4161	    return(-1);
4162	}
4163    } else {
4164	str = buf;
4165    }
4166    memcpy(&str[0], value, lenp);
4167    str[lenp] = XML_REG_STRING_SEPARATOR;
4168    memcpy(&str[lenp + 1], value2, lenn);
4169    str[lenn + lenp + 1] = 0;
4170
4171    if (exec->comp->compact != NULL)
4172	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
4173    else
4174        ret = xmlRegExecPushStringInternal(exec, str, data, 1);
4175
4176    if (str != buf)
4177        xmlFree(str);
4178    return(ret);
4179}
4180
4181/**
4182 * xmlRegExecGetValues:
4183 * @exec: a regexp execution context
4184 * @err: error extraction or normal one
4185 * @nbval: pointer to the number of accepted values IN/OUT
4186 * @nbneg: return number of negative transitions
4187 * @values: pointer to the array of acceptable values
4188 * @terminal: return value if this was a terminal state
4189 *
4190 * Extract informations from the regexp execution, internal routine to
4191 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
4192 *
4193 * Returns: 0 in case of success or -1 in case of error.
4194 */
4195static int
4196xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
4197                    int *nbval, int *nbneg,
4198		    xmlChar **values, int *terminal) {
4199    int maxval;
4200    int nb = 0;
4201
4202    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
4203        (values == NULL) || (*nbval <= 0))
4204        return(-1);
4205
4206    maxval = *nbval;
4207    *nbval = 0;
4208    *nbneg = 0;
4209    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
4210        xmlRegexpPtr comp;
4211	int target, i, state;
4212
4213        comp = exec->comp;
4214
4215	if (err) {
4216	    if (exec->errStateNo == -1) return(-1);
4217	    state = exec->errStateNo;
4218	} else {
4219	    state = exec->index;
4220	}
4221	if (terminal != NULL) {
4222	    if (comp->compact[state * (comp->nbstrings + 1)] ==
4223	        XML_REGEXP_FINAL_STATE)
4224		*terminal = 1;
4225	    else
4226		*terminal = 0;
4227	}
4228	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4229	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4230	    if ((target > 0) && (target <= comp->nbstates) &&
4231	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
4232		 XML_REGEXP_SINK_STATE)) {
4233	        values[nb++] = comp->stringMap[i];
4234		(*nbval)++;
4235	    }
4236	}
4237	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
4238	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
4239	    if ((target > 0) && (target <= comp->nbstates) &&
4240	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
4241		 XML_REGEXP_SINK_STATE)) {
4242	        values[nb++] = comp->stringMap[i];
4243		(*nbneg)++;
4244	    }
4245	}
4246    } else {
4247        int transno;
4248	xmlRegTransPtr trans;
4249	xmlRegAtomPtr atom;
4250	xmlRegStatePtr state;
4251
4252	if (terminal != NULL) {
4253	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
4254		*terminal = 1;
4255	    else
4256		*terminal = 0;
4257	}
4258
4259	if (err) {
4260	    if (exec->errState == NULL) return(-1);
4261	    state = exec->errState;
4262	} else {
4263	    if (exec->state == NULL) return(-1);
4264	    state = exec->state;
4265	}
4266	for (transno = 0;
4267	     (transno < state->nbTrans) && (nb < maxval);
4268	     transno++) {
4269	    trans = &state->trans[transno];
4270	    if (trans->to < 0)
4271		continue;
4272	    atom = trans->atom;
4273	    if ((atom == NULL) || (atom->valuep == NULL))
4274		continue;
4275	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4276	        /* this should not be reached but ... */
4277	        TODO;
4278	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4279	        /* this should not be reached but ... */
4280	        TODO;
4281	    } else if (trans->counter >= 0) {
4282		xmlRegCounterPtr counter = NULL;
4283		int count;
4284
4285		if (err)
4286		    count = exec->errCounts[trans->counter];
4287		else
4288		    count = exec->counts[trans->counter];
4289		if (exec->comp != NULL)
4290		    counter = &exec->comp->counters[trans->counter];
4291		if ((counter == NULL) || (count < counter->max)) {
4292		    if (atom->neg)
4293			values[nb++] = (xmlChar *) atom->valuep2;
4294		    else
4295			values[nb++] = (xmlChar *) atom->valuep;
4296		    (*nbval)++;
4297		}
4298	    } else {
4299                if ((exec->comp->states[trans->to] != NULL) &&
4300		    (exec->comp->states[trans->to]->type !=
4301		     XML_REGEXP_SINK_STATE)) {
4302		    if (atom->neg)
4303			values[nb++] = (xmlChar *) atom->valuep2;
4304		    else
4305			values[nb++] = (xmlChar *) atom->valuep;
4306		    (*nbval)++;
4307		}
4308	    }
4309	}
4310	for (transno = 0;
4311	     (transno < state->nbTrans) && (nb < maxval);
4312	     transno++) {
4313	    trans = &state->trans[transno];
4314	    if (trans->to < 0)
4315		continue;
4316	    atom = trans->atom;
4317	    if ((atom == NULL) || (atom->valuep == NULL))
4318		continue;
4319	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
4320	        continue;
4321	    } else if (trans->count == REGEXP_ALL_COUNTER) {
4322	        continue;
4323	    } else if (trans->counter >= 0) {
4324	        continue;
4325	    } else {
4326                if ((exec->comp->states[trans->to] != NULL) &&
4327		    (exec->comp->states[trans->to]->type ==
4328		     XML_REGEXP_SINK_STATE)) {
4329		    if (atom->neg)
4330			values[nb++] = (xmlChar *) atom->valuep2;
4331		    else
4332			values[nb++] = (xmlChar *) atom->valuep;
4333		    (*nbneg)++;
4334		}
4335	    }
4336	}
4337    }
4338    return(0);
4339}
4340
4341/**
4342 * xmlRegExecNextValues:
4343 * @exec: a regexp execution context
4344 * @nbval: pointer to the number of accepted values IN/OUT
4345 * @nbneg: return number of negative transitions
4346 * @values: pointer to the array of acceptable values
4347 * @terminal: return value if this was a terminal state
4348 *
4349 * Extract informations from the regexp execution,
4350 * the parameter @values must point to an array of @nbval string pointers
4351 * on return nbval will contain the number of possible strings in that
4352 * state and the @values array will be updated with them. The string values
4353 * returned will be freed with the @exec context and don't need to be
4354 * deallocated.
4355 *
4356 * Returns: 0 in case of success or -1 in case of error.
4357 */
4358int
4359xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
4360                     xmlChar **values, int *terminal) {
4361    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
4362}
4363
4364/**
4365 * xmlRegExecErrInfo:
4366 * @exec: a regexp execution context generating an error
4367 * @string: return value for the error string
4368 * @nbval: pointer to the number of accepted values IN/OUT
4369 * @nbneg: return number of negative transitions
4370 * @values: pointer to the array of acceptable values
4371 * @terminal: return value if this was a terminal state
4372 *
4373 * Extract error informations from the regexp execution, the parameter
4374 * @string will be updated with the value pushed and not accepted,
4375 * the parameter @values must point to an array of @nbval string pointers
4376 * on return nbval will contain the number of possible strings in that
4377 * state and the @values array will be updated with them. The string values
4378 * returned will be freed with the @exec context and don't need to be
4379 * deallocated.
4380 *
4381 * Returns: 0 in case of success or -1 in case of error.
4382 */
4383int
4384xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
4385                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
4386    if (exec == NULL)
4387        return(-1);
4388    if (string != NULL) {
4389        if (exec->status != 0)
4390	    *string = exec->errString;
4391	else
4392	    *string = NULL;
4393    }
4394    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
4395}
4396
4397#ifdef DEBUG_ERR
4398static void testerr(xmlRegExecCtxtPtr exec) {
4399    const xmlChar *string;
4400    xmlChar *values[5];
4401    int nb = 5;
4402    int nbneg;
4403    int terminal;
4404    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
4405}
4406#endif
4407
4408#if 0
4409static int
4410xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
4411    xmlRegTransPtr trans;
4412    xmlRegAtomPtr atom;
4413    int ret;
4414    int codepoint, len;
4415
4416    if (exec == NULL)
4417	return(-1);
4418    if (exec->status != 0)
4419	return(exec->status);
4420
4421    while ((exec->status == 0) &&
4422	   ((exec->inputString[exec->index] != 0) ||
4423	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
4424
4425	/*
4426	 * End of input on non-terminal state, rollback, however we may
4427	 * still have epsilon like transition for counted transitions
4428	 * on counters, in that case don't break too early.
4429	 */
4430	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
4431	    goto rollback;
4432
4433	exec->transcount = 0;
4434	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
4435	    trans = &exec->state->trans[exec->transno];
4436	    if (trans->to < 0)
4437		continue;
4438	    atom = trans->atom;
4439	    ret = 0;
4440	    if (trans->count >= 0) {
4441		int count;
4442		xmlRegCounterPtr counter;
4443
4444		/*
4445		 * A counted transition.
4446		 */
4447
4448		count = exec->counts[trans->count];
4449		counter = &exec->comp->counters[trans->count];
4450#ifdef DEBUG_REGEXP_EXEC
4451		printf("testing count %d: val %d, min %d, max %d\n",
4452		       trans->count, count, counter->min,  counter->max);
4453#endif
4454		ret = ((count >= counter->min) && (count <= counter->max));
4455	    } else if (atom == NULL) {
4456		fprintf(stderr, "epsilon transition left at runtime\n");
4457		exec->status = -2;
4458		break;
4459	    } else if (exec->inputString[exec->index] != 0) {
4460                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
4461		ret = xmlRegCheckCharacter(atom, codepoint);
4462		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
4463		    xmlRegStatePtr to = exec->comp->states[trans->to];
4464
4465		    /*
4466		     * this is a multiple input sequence
4467		     */
4468		    if (exec->state->nbTrans > exec->transno + 1) {
4469			xmlFARegExecSave(exec);
4470		    }
4471		    exec->transcount = 1;
4472		    do {
4473			/*
4474			 * Try to progress as much as possible on the input
4475			 */
4476			if (exec->transcount == atom->max) {
4477			    break;
4478			}
4479			exec->index += len;
4480			/*
4481			 * End of input: stop here
4482			 */
4483			if (exec->inputString[exec->index] == 0) {
4484			    exec->index -= len;
4485			    break;
4486			}
4487			if (exec->transcount >= atom->min) {
4488			    int transno = exec->transno;
4489			    xmlRegStatePtr state = exec->state;
4490
4491			    /*
4492			     * The transition is acceptable save it
4493			     */
4494			    exec->transno = -1; /* trick */
4495			    exec->state = to;
4496			    xmlFARegExecSave(exec);
4497			    exec->transno = transno;
4498			    exec->state = state;
4499			}
4500			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
4501				              len);
4502			ret = xmlRegCheckCharacter(atom, codepoint);
4503			exec->transcount++;
4504		    } while (ret == 1);
4505		    if (exec->transcount < atom->min)
4506			ret = 0;
4507
4508		    /*
4509		     * If the last check failed but one transition was found
4510		     * possible, rollback
4511		     */
4512		    if (ret < 0)
4513			ret = 0;
4514		    if (ret == 0) {
4515			goto rollback;
4516		    }
4517		}
4518	    }
4519	    if (ret == 1) {
4520		if (exec->state->nbTrans > exec->transno + 1) {
4521		    xmlFARegExecSave(exec);
4522		}
4523		/*
4524		 * restart count for expressions like this ((abc){2})*
4525		 */
4526		if (trans->count >= 0) {
4527#ifdef DEBUG_REGEXP_EXEC
4528		    printf("Reset count %d\n", trans->count);
4529#endif
4530		    exec->counts[trans->count] = 0;
4531		}
4532		if (trans->counter >= 0) {
4533#ifdef DEBUG_REGEXP_EXEC
4534		    printf("Increasing count %d\n", trans->counter);
4535#endif
4536		    exec->counts[trans->counter]++;
4537		}
4538#ifdef DEBUG_REGEXP_EXEC
4539		printf("entering state %d\n", trans->to);
4540#endif
4541		exec->state = exec->comp->states[trans->to];
4542		exec->transno = 0;
4543		if (trans->atom != NULL) {
4544		    exec->index += len;
4545		}
4546		goto progress;
4547	    } else if (ret < 0) {
4548		exec->status = -4;
4549		break;
4550	    }
4551	}
4552	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4553rollback:
4554	    /*
4555	     * Failed to find a way out
4556	     */
4557	    exec->determinist = 0;
4558	    xmlFARegExecRollBack(exec);
4559	}
4560progress:
4561	continue;
4562    }
4563}
4564#endif
4565/************************************************************************
4566 * 									*
4567 *	Parser for the Schemas Datatype Regular Expressions		*
4568 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
4569 * 									*
4570 ************************************************************************/
4571
4572/**
4573 * xmlFAIsChar:
4574 * @ctxt:  a regexp parser context
4575 *
4576 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
4577 */
4578static int
4579xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4580    int cur;
4581    int len;
4582
4583    cur = CUR_SCHAR(ctxt->cur, len);
4584    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4585	(cur == '*') || (cur == '+') || (cur == '(') ||
4586	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
4587	(cur == 0x5D) || (cur == 0))
4588	return(-1);
4589    return(cur);
4590}
4591
4592/**
4593 * xmlFAParseCharProp:
4594 * @ctxt:  a regexp parser context
4595 *
4596 * [27]   charProp   ::=   IsCategory | IsBlock
4597 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
4598 *                       Separators | Symbols | Others
4599 * [29]   Letters   ::=   'L' [ultmo]?
4600 * [30]   Marks   ::=   'M' [nce]?
4601 * [31]   Numbers   ::=   'N' [dlo]?
4602 * [32]   Punctuation   ::=   'P' [cdseifo]?
4603 * [33]   Separators   ::=   'Z' [slp]?
4604 * [34]   Symbols   ::=   'S' [mcko]?
4605 * [35]   Others   ::=   'C' [cfon]?
4606 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
4607 */
4608static void
4609xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4610    int cur;
4611    xmlRegAtomType type = (xmlRegAtomType) 0;
4612    xmlChar *blockName = NULL;
4613
4614    cur = CUR;
4615    if (cur == 'L') {
4616	NEXT;
4617	cur = CUR;
4618	if (cur == 'u') {
4619	    NEXT;
4620	    type = XML_REGEXP_LETTER_UPPERCASE;
4621	} else if (cur == 'l') {
4622	    NEXT;
4623	    type = XML_REGEXP_LETTER_LOWERCASE;
4624	} else if (cur == 't') {
4625	    NEXT;
4626	    type = XML_REGEXP_LETTER_TITLECASE;
4627	} else if (cur == 'm') {
4628	    NEXT;
4629	    type = XML_REGEXP_LETTER_MODIFIER;
4630	} else if (cur == 'o') {
4631	    NEXT;
4632	    type = XML_REGEXP_LETTER_OTHERS;
4633	} else {
4634	    type = XML_REGEXP_LETTER;
4635	}
4636    } else if (cur == 'M') {
4637	NEXT;
4638	cur = CUR;
4639	if (cur == 'n') {
4640	    NEXT;
4641	    /* nonspacing */
4642	    type = XML_REGEXP_MARK_NONSPACING;
4643	} else if (cur == 'c') {
4644	    NEXT;
4645	    /* spacing combining */
4646	    type = XML_REGEXP_MARK_SPACECOMBINING;
4647	} else if (cur == 'e') {
4648	    NEXT;
4649	    /* enclosing */
4650	    type = XML_REGEXP_MARK_ENCLOSING;
4651	} else {
4652	    /* all marks */
4653	    type = XML_REGEXP_MARK;
4654	}
4655    } else if (cur == 'N') {
4656	NEXT;
4657	cur = CUR;
4658	if (cur == 'd') {
4659	    NEXT;
4660	    /* digital */
4661	    type = XML_REGEXP_NUMBER_DECIMAL;
4662	} else if (cur == 'l') {
4663	    NEXT;
4664	    /* letter */
4665	    type = XML_REGEXP_NUMBER_LETTER;
4666	} else if (cur == 'o') {
4667	    NEXT;
4668	    /* other */
4669	    type = XML_REGEXP_NUMBER_OTHERS;
4670	} else {
4671	    /* all numbers */
4672	    type = XML_REGEXP_NUMBER;
4673	}
4674    } else if (cur == 'P') {
4675	NEXT;
4676	cur = CUR;
4677	if (cur == 'c') {
4678	    NEXT;
4679	    /* connector */
4680	    type = XML_REGEXP_PUNCT_CONNECTOR;
4681	} else if (cur == 'd') {
4682	    NEXT;
4683	    /* dash */
4684	    type = XML_REGEXP_PUNCT_DASH;
4685	} else if (cur == 's') {
4686	    NEXT;
4687	    /* open */
4688	    type = XML_REGEXP_PUNCT_OPEN;
4689	} else if (cur == 'e') {
4690	    NEXT;
4691	    /* close */
4692	    type = XML_REGEXP_PUNCT_CLOSE;
4693	} else if (cur == 'i') {
4694	    NEXT;
4695	    /* initial quote */
4696	    type = XML_REGEXP_PUNCT_INITQUOTE;
4697	} else if (cur == 'f') {
4698	    NEXT;
4699	    /* final quote */
4700	    type = XML_REGEXP_PUNCT_FINQUOTE;
4701	} else if (cur == 'o') {
4702	    NEXT;
4703	    /* other */
4704	    type = XML_REGEXP_PUNCT_OTHERS;
4705	} else {
4706	    /* all punctuation */
4707	    type = XML_REGEXP_PUNCT;
4708	}
4709    } else if (cur == 'Z') {
4710	NEXT;
4711	cur = CUR;
4712	if (cur == 's') {
4713	    NEXT;
4714	    /* space */
4715	    type = XML_REGEXP_SEPAR_SPACE;
4716	} else if (cur == 'l') {
4717	    NEXT;
4718	    /* line */
4719	    type = XML_REGEXP_SEPAR_LINE;
4720	} else if (cur == 'p') {
4721	    NEXT;
4722	    /* paragraph */
4723	    type = XML_REGEXP_SEPAR_PARA;
4724	} else {
4725	    /* all separators */
4726	    type = XML_REGEXP_SEPAR;
4727	}
4728    } else if (cur == 'S') {
4729	NEXT;
4730	cur = CUR;
4731	if (cur == 'm') {
4732	    NEXT;
4733	    type = XML_REGEXP_SYMBOL_MATH;
4734	    /* math */
4735	} else if (cur == 'c') {
4736	    NEXT;
4737	    type = XML_REGEXP_SYMBOL_CURRENCY;
4738	    /* currency */
4739	} else if (cur == 'k') {
4740	    NEXT;
4741	    type = XML_REGEXP_SYMBOL_MODIFIER;
4742	    /* modifiers */
4743	} else if (cur == 'o') {
4744	    NEXT;
4745	    type = XML_REGEXP_SYMBOL_OTHERS;
4746	    /* other */
4747	} else {
4748	    /* all symbols */
4749	    type = XML_REGEXP_SYMBOL;
4750	}
4751    } else if (cur == 'C') {
4752	NEXT;
4753	cur = CUR;
4754	if (cur == 'c') {
4755	    NEXT;
4756	    /* control */
4757	    type = XML_REGEXP_OTHER_CONTROL;
4758	} else if (cur == 'f') {
4759	    NEXT;
4760	    /* format */
4761	    type = XML_REGEXP_OTHER_FORMAT;
4762	} else if (cur == 'o') {
4763	    NEXT;
4764	    /* private use */
4765	    type = XML_REGEXP_OTHER_PRIVATE;
4766	} else if (cur == 'n') {
4767	    NEXT;
4768	    /* not assigned */
4769	    type = XML_REGEXP_OTHER_NA;
4770	} else {
4771	    /* all others */
4772	    type = XML_REGEXP_OTHER;
4773	}
4774    } else if (cur == 'I') {
4775	const xmlChar *start;
4776	NEXT;
4777	cur = CUR;
4778	if (cur != 's') {
4779	    ERROR("IsXXXX expected");
4780	    return;
4781	}
4782	NEXT;
4783	start = ctxt->cur;
4784	cur = CUR;
4785	if (((cur >= 'a') && (cur <= 'z')) ||
4786	    ((cur >= 'A') && (cur <= 'Z')) ||
4787	    ((cur >= '0') && (cur <= '9')) ||
4788	    (cur == 0x2D)) {
4789	    NEXT;
4790	    cur = CUR;
4791	    while (((cur >= 'a') && (cur <= 'z')) ||
4792		((cur >= 'A') && (cur <= 'Z')) ||
4793		((cur >= '0') && (cur <= '9')) ||
4794		(cur == 0x2D)) {
4795		NEXT;
4796		cur = CUR;
4797	    }
4798	}
4799	type = XML_REGEXP_BLOCK_NAME;
4800	blockName = xmlStrndup(start, ctxt->cur - start);
4801    } else {
4802	ERROR("Unknown char property");
4803	return;
4804    }
4805    if (ctxt->atom == NULL) {
4806	ctxt->atom = xmlRegNewAtom(ctxt, type);
4807	if (ctxt->atom != NULL)
4808	    ctxt->atom->valuep = blockName;
4809    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4810        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4811		           type, 0, 0, blockName);
4812    }
4813}
4814
4815/**
4816 * xmlFAParseCharClassEsc:
4817 * @ctxt:  a regexp parser context
4818 *
4819 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
4820 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4821 * [25] catEsc   ::=   '\p{' charProp '}'
4822 * [26] complEsc ::=   '\P{' charProp '}'
4823 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4824 */
4825static void
4826xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4827    int cur;
4828
4829    if (CUR == '.') {
4830	if (ctxt->atom == NULL) {
4831	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4832	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4833	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4834			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
4835	}
4836	NEXT;
4837	return;
4838    }
4839    if (CUR != '\\') {
4840	ERROR("Escaped sequence: expecting \\");
4841	return;
4842    }
4843    NEXT;
4844    cur = CUR;
4845    if (cur == 'p') {
4846	NEXT;
4847	if (CUR != '{') {
4848	    ERROR("Expecting '{'");
4849	    return;
4850	}
4851	NEXT;
4852	xmlFAParseCharProp(ctxt);
4853	if (CUR != '}') {
4854	    ERROR("Expecting '}'");
4855	    return;
4856	}
4857	NEXT;
4858    } else if (cur == 'P') {
4859	NEXT;
4860	if (CUR != '{') {
4861	    ERROR("Expecting '{'");
4862	    return;
4863	}
4864	NEXT;
4865	xmlFAParseCharProp(ctxt);
4866	ctxt->atom->neg = 1;
4867	if (CUR != '}') {
4868	    ERROR("Expecting '}'");
4869	    return;
4870	}
4871	NEXT;
4872    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4873	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4874	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4875	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4876	(cur == 0x5E)) {
4877	if (ctxt->atom == NULL) {
4878	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4879	    if (ctxt->atom != NULL) {
4880	        switch (cur) {
4881		    case 'n':
4882		        ctxt->atom->codepoint = '\n';
4883			break;
4884		    case 'r':
4885		        ctxt->atom->codepoint = '\r';
4886			break;
4887		    case 't':
4888		        ctxt->atom->codepoint = '\t';
4889			break;
4890		    default:
4891			ctxt->atom->codepoint = cur;
4892		}
4893	    }
4894	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4895            switch (cur) {
4896                case 'n':
4897                    cur = '\n';
4898                    break;
4899                case 'r':
4900                    cur = '\r';
4901                    break;
4902                case 't':
4903                    cur = '\t';
4904                    break;
4905            }
4906	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4907			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4908	}
4909	NEXT;
4910    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4911	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4912	(cur == 'w') || (cur == 'W')) {
4913	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4914
4915	switch (cur) {
4916	    case 's':
4917		type = XML_REGEXP_ANYSPACE;
4918		break;
4919	    case 'S':
4920		type = XML_REGEXP_NOTSPACE;
4921		break;
4922	    case 'i':
4923		type = XML_REGEXP_INITNAME;
4924		break;
4925	    case 'I':
4926		type = XML_REGEXP_NOTINITNAME;
4927		break;
4928	    case 'c':
4929		type = XML_REGEXP_NAMECHAR;
4930		break;
4931	    case 'C':
4932		type = XML_REGEXP_NOTNAMECHAR;
4933		break;
4934	    case 'd':
4935		type = XML_REGEXP_DECIMAL;
4936		break;
4937	    case 'D':
4938		type = XML_REGEXP_NOTDECIMAL;
4939		break;
4940	    case 'w':
4941		type = XML_REGEXP_REALCHAR;
4942		break;
4943	    case 'W':
4944		type = XML_REGEXP_NOTREALCHAR;
4945		break;
4946	}
4947	NEXT;
4948	if (ctxt->atom == NULL) {
4949	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4950	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4951	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4952			       type, 0, 0, NULL);
4953	}
4954    } else {
4955	ERROR("Wrong escape sequence, misuse of character '\\'");
4956    }
4957}
4958
4959/**
4960 * xmlFAParseCharRange:
4961 * @ctxt:  a regexp parser context
4962 *
4963 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
4964 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
4965 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
4966 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
4967 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
4968 */
4969static void
4970xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4971    int cur, len;
4972    int start = -1;
4973    int end = -1;
4974
4975    if (CUR == '\0') {
4976        ERROR("Expecting ']'");
4977	return;
4978    }
4979
4980    cur = CUR;
4981    if (cur == '\\') {
4982	NEXT;
4983	cur = CUR;
4984	switch (cur) {
4985	    case 'n': start = 0xA; break;
4986	    case 'r': start = 0xD; break;
4987	    case 't': start = 0x9; break;
4988	    case '\\': case '|': case '.': case '-': case '^': case '?':
4989	    case '*': case '+': case '{': case '}': case '(': case ')':
4990	    case '[': case ']':
4991		start = cur; break;
4992	    default:
4993		ERROR("Invalid escape value");
4994		return;
4995	}
4996	end = start;
4997        len = 1;
4998    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4999        end = start = CUR_SCHAR(ctxt->cur, len);
5000    } else {
5001	ERROR("Expecting a char range");
5002	return;
5003    }
5004    /*
5005     * Since we are "inside" a range, we can assume ctxt->cur is past
5006     * the start of ctxt->string, and PREV should be safe
5007     */
5008    if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
5009	NEXTL(len);
5010	return;
5011    }
5012    NEXTL(len);
5013    cur = CUR;
5014    if ((cur != '-') || (NXT(1) == ']')) {
5015        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5016		              XML_REGEXP_CHARVAL, start, end, NULL);
5017	return;
5018    }
5019    NEXT;
5020    cur = CUR;
5021    if (cur == '\\') {
5022	NEXT;
5023	cur = CUR;
5024	switch (cur) {
5025	    case 'n': end = 0xA; break;
5026	    case 'r': end = 0xD; break;
5027	    case 't': end = 0x9; break;
5028	    case '\\': case '|': case '.': case '-': case '^': case '?':
5029	    case '*': case '+': case '{': case '}': case '(': case ')':
5030	    case '[': case ']':
5031		end = cur; break;
5032	    default:
5033		ERROR("Invalid escape value");
5034		return;
5035	}
5036        len = 1;
5037    } else if ((cur != 0x5B) && (cur != 0x5D)) {
5038        end = CUR_SCHAR(ctxt->cur, len);
5039    } else {
5040	ERROR("Expecting the end of a char range");
5041	return;
5042    }
5043    NEXTL(len);
5044    /* TODO check that the values are acceptable character ranges for XML */
5045    if (end < start) {
5046	ERROR("End of range is before start of range");
5047    } else {
5048        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
5049		           XML_REGEXP_CHARVAL, start, end, NULL);
5050    }
5051    return;
5052}
5053
5054/**
5055 * xmlFAParsePosCharGroup:
5056 * @ctxt:  a regexp parser context
5057 *
5058 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
5059 */
5060static void
5061xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
5062    do {
5063	if (CUR == '\\') {
5064	    xmlFAParseCharClassEsc(ctxt);
5065	} else {
5066	    xmlFAParseCharRange(ctxt);
5067	}
5068    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
5069             (CUR != 0) && (ctxt->error == 0));
5070}
5071
5072/**
5073 * xmlFAParseCharGroup:
5074 * @ctxt:  a regexp parser context
5075 *
5076 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
5077 * [15]   negCharGroup ::= '^' posCharGroup
5078 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
5079 * [12]   charClassExpr ::= '[' charGroup ']'
5080 */
5081static void
5082xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
5083    int n = ctxt->neg;
5084    while ((CUR != ']') && (ctxt->error == 0)) {
5085	if (CUR == '^') {
5086	    int neg = ctxt->neg;
5087
5088	    NEXT;
5089	    ctxt->neg = !ctxt->neg;
5090	    xmlFAParsePosCharGroup(ctxt);
5091	    ctxt->neg = neg;
5092	} else if ((CUR == '-') && (NXT(1) == '[')) {
5093	    int neg = ctxt->neg;
5094	    ctxt->neg = 2;
5095	    NEXT;	/* eat the '-' */
5096	    NEXT;	/* eat the '[' */
5097	    xmlFAParseCharGroup(ctxt);
5098	    if (CUR == ']') {
5099		NEXT;
5100	    } else {
5101		ERROR("charClassExpr: ']' expected");
5102		break;
5103	    }
5104	    ctxt->neg = neg;
5105	    break;
5106	} else if (CUR != ']') {
5107	    xmlFAParsePosCharGroup(ctxt);
5108	}
5109    }
5110    ctxt->neg = n;
5111}
5112
5113/**
5114 * xmlFAParseCharClass:
5115 * @ctxt:  a regexp parser context
5116 *
5117 * [11]   charClass   ::=     charClassEsc | charClassExpr
5118 * [12]   charClassExpr   ::=   '[' charGroup ']'
5119 */
5120static void
5121xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
5122    if (CUR == '[') {
5123	NEXT;
5124	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
5125	if (ctxt->atom == NULL)
5126	    return;
5127	xmlFAParseCharGroup(ctxt);
5128	if (CUR == ']') {
5129	    NEXT;
5130	} else {
5131	    ERROR("xmlFAParseCharClass: ']' expected");
5132	}
5133    } else {
5134	xmlFAParseCharClassEsc(ctxt);
5135    }
5136}
5137
5138/**
5139 * xmlFAParseQuantExact:
5140 * @ctxt:  a regexp parser context
5141 *
5142 * [8]   QuantExact   ::=   [0-9]+
5143 *
5144 * Returns 0 if success or -1 in case of error
5145 */
5146static int
5147xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
5148    int ret = 0;
5149    int ok = 0;
5150
5151    while ((CUR >= '0') && (CUR <= '9')) {
5152	ret = ret * 10 + (CUR - '0');
5153	ok = 1;
5154	NEXT;
5155    }
5156    if (ok != 1) {
5157	return(-1);
5158    }
5159    return(ret);
5160}
5161
5162/**
5163 * xmlFAParseQuantifier:
5164 * @ctxt:  a regexp parser context
5165 *
5166 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
5167 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
5168 * [6]   quantRange   ::=   QuantExact ',' QuantExact
5169 * [7]   quantMin   ::=   QuantExact ','
5170 * [8]   QuantExact   ::=   [0-9]+
5171 */
5172static int
5173xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
5174    int cur;
5175
5176    cur = CUR;
5177    if ((cur == '?') || (cur == '*') || (cur == '+')) {
5178	if (ctxt->atom != NULL) {
5179	    if (cur == '?')
5180		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
5181	    else if (cur == '*')
5182		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
5183	    else if (cur == '+')
5184		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
5185	}
5186	NEXT;
5187	return(1);
5188    }
5189    if (cur == '{') {
5190	int min = 0, max = 0;
5191
5192	NEXT;
5193	cur = xmlFAParseQuantExact(ctxt);
5194	if (cur >= 0)
5195	    min = cur;
5196	if (CUR == ',') {
5197	    NEXT;
5198	    if (CUR == '}')
5199	        max = INT_MAX;
5200	    else {
5201	        cur = xmlFAParseQuantExact(ctxt);
5202	        if (cur >= 0)
5203		    max = cur;
5204		else {
5205		    ERROR("Improper quantifier");
5206		}
5207	    }
5208	}
5209	if (CUR == '}') {
5210	    NEXT;
5211	} else {
5212	    ERROR("Unterminated quantifier");
5213	}
5214	if (max == 0)
5215	    max = min;
5216	if (ctxt->atom != NULL) {
5217	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
5218	    ctxt->atom->min = min;
5219	    ctxt->atom->max = max;
5220	}
5221	return(1);
5222    }
5223    return(0);
5224}
5225
5226/**
5227 * xmlFAParseAtom:
5228 * @ctxt:  a regexp parser context
5229 *
5230 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
5231 */
5232static int
5233xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
5234    int codepoint, len;
5235
5236    codepoint = xmlFAIsChar(ctxt);
5237    if (codepoint > 0) {
5238	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
5239	if (ctxt->atom == NULL)
5240	    return(-1);
5241	codepoint = CUR_SCHAR(ctxt->cur, len);
5242	ctxt->atom->codepoint = codepoint;
5243	NEXTL(len);
5244	return(1);
5245    } else if (CUR == '|') {
5246	return(0);
5247    } else if (CUR == 0) {
5248	return(0);
5249    } else if (CUR == ')') {
5250	return(0);
5251    } else if (CUR == '(') {
5252	xmlRegStatePtr start, oldend, start0;
5253
5254	NEXT;
5255	/*
5256	 * this extra Epsilon transition is needed if we count with 0 allowed
5257	 * unfortunately this can't be known at that point
5258	 */
5259	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5260	start0 = ctxt->state;
5261	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
5262	start = ctxt->state;
5263	oldend = ctxt->end;
5264	ctxt->end = NULL;
5265	ctxt->atom = NULL;
5266	xmlFAParseRegExp(ctxt, 0);
5267	if (CUR == ')') {
5268	    NEXT;
5269	} else {
5270	    ERROR("xmlFAParseAtom: expecting ')'");
5271	}
5272	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
5273	if (ctxt->atom == NULL)
5274	    return(-1);
5275	ctxt->atom->start = start;
5276	ctxt->atom->start0 = start0;
5277	ctxt->atom->stop = ctxt->state;
5278	ctxt->end = oldend;
5279	return(1);
5280    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
5281	xmlFAParseCharClass(ctxt);
5282	return(1);
5283    }
5284    return(0);
5285}
5286
5287/**
5288 * xmlFAParsePiece:
5289 * @ctxt:  a regexp parser context
5290 *
5291 * [3]   piece   ::=   atom quantifier?
5292 */
5293static int
5294xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
5295    int ret;
5296
5297    ctxt->atom = NULL;
5298    ret = xmlFAParseAtom(ctxt);
5299    if (ret == 0)
5300	return(0);
5301    if (ctxt->atom == NULL) {
5302	ERROR("internal: no atom generated");
5303    }
5304    xmlFAParseQuantifier(ctxt);
5305    return(1);
5306}
5307
5308/**
5309 * xmlFAParseBranch:
5310 * @ctxt:  a regexp parser context
5311 * @to: optional target to the end of the branch
5312 *
5313 * @to is used to optimize by removing duplicate path in automata
5314 * in expressions like (a|b)(c|d)
5315 *
5316 * [2]   branch   ::=   piece*
5317 */
5318static int
5319xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
5320    xmlRegStatePtr previous;
5321    int ret;
5322
5323    previous = ctxt->state;
5324    ret = xmlFAParsePiece(ctxt);
5325    if (ret != 0) {
5326	if (xmlFAGenerateTransitions(ctxt, previous,
5327	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5328	    return(-1);
5329	previous = ctxt->state;
5330	ctxt->atom = NULL;
5331    }
5332    while ((ret != 0) && (ctxt->error == 0)) {
5333	ret = xmlFAParsePiece(ctxt);
5334	if (ret != 0) {
5335	    if (xmlFAGenerateTransitions(ctxt, previous,
5336	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
5337		    return(-1);
5338	    previous = ctxt->state;
5339	    ctxt->atom = NULL;
5340	}
5341    }
5342    return(0);
5343}
5344
5345/**
5346 * xmlFAParseRegExp:
5347 * @ctxt:  a regexp parser context
5348 * @top:  is this the top-level expression ?
5349 *
5350 * [1]   regExp   ::=     branch  ( '|' branch )*
5351 */
5352static void
5353xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
5354    xmlRegStatePtr start, end;
5355
5356    /* if not top start should have been generated by an epsilon trans */
5357    start = ctxt->state;
5358    ctxt->end = NULL;
5359    xmlFAParseBranch(ctxt, NULL);
5360    if (top) {
5361#ifdef DEBUG_REGEXP_GRAPH
5362	printf("State %d is final\n", ctxt->state->no);
5363#endif
5364	ctxt->state->type = XML_REGEXP_FINAL_STATE;
5365    }
5366    if (CUR != '|') {
5367	ctxt->end = ctxt->state;
5368	return;
5369    }
5370    end = ctxt->state;
5371    while ((CUR == '|') && (ctxt->error == 0)) {
5372	NEXT;
5373	ctxt->state = start;
5374	ctxt->end = NULL;
5375	xmlFAParseBranch(ctxt, end);
5376    }
5377    if (!top) {
5378	ctxt->state = end;
5379	ctxt->end = end;
5380    }
5381}
5382
5383/************************************************************************
5384 * 									*
5385 * 			The basic API					*
5386 * 									*
5387 ************************************************************************/
5388
5389/**
5390 * xmlRegexpPrint:
5391 * @output: the file for the output debug
5392 * @regexp: the compiled regexp
5393 *
5394 * Print the content of the compiled regular expression
5395 */
5396void
5397xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
5398    int i;
5399
5400    if (output == NULL)
5401        return;
5402    fprintf(output, " regexp: ");
5403    if (regexp == NULL) {
5404	fprintf(output, "NULL\n");
5405	return;
5406    }
5407    fprintf(output, "'%s' ", regexp->string);
5408    fprintf(output, "\n");
5409    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
5410    for (i = 0;i < regexp->nbAtoms; i++) {
5411	fprintf(output, " %02d ", i);
5412	xmlRegPrintAtom(output, regexp->atoms[i]);
5413    }
5414    fprintf(output, "%d states:", regexp->nbStates);
5415    fprintf(output, "\n");
5416    for (i = 0;i < regexp->nbStates; i++) {
5417	xmlRegPrintState(output, regexp->states[i]);
5418    }
5419    fprintf(output, "%d counters:\n", regexp->nbCounters);
5420    for (i = 0;i < regexp->nbCounters; i++) {
5421	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
5422		                                regexp->counters[i].max);
5423    }
5424}
5425
5426/**
5427 * xmlRegexpCompile:
5428 * @regexp:  a regular expression string
5429 *
5430 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
5431 * Appendix F and builds an automata suitable for testing strings against
5432 * that regular expression
5433 *
5434 * Returns the compiled expression or NULL in case of error
5435 */
5436xmlRegexpPtr
5437xmlRegexpCompile(const xmlChar *regexp) {
5438    xmlRegexpPtr ret;
5439    xmlRegParserCtxtPtr ctxt;
5440
5441    ctxt = xmlRegNewParserCtxt(regexp);
5442    if (ctxt == NULL)
5443	return(NULL);
5444
5445    /* initialize the parser */
5446    ctxt->end = NULL;
5447    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5448    xmlRegStatePush(ctxt, ctxt->start);
5449
5450    /* parse the expression building an automata */
5451    xmlFAParseRegExp(ctxt, 1);
5452    if (CUR != 0) {
5453	ERROR("xmlFAParseRegExp: extra characters");
5454    }
5455    if (ctxt->error != 0) {
5456	xmlRegFreeParserCtxt(ctxt);
5457	return(NULL);
5458    }
5459    ctxt->end = ctxt->state;
5460    ctxt->start->type = XML_REGEXP_START_STATE;
5461    ctxt->end->type = XML_REGEXP_FINAL_STATE;
5462
5463    /* remove the Epsilon except for counted transitions */
5464    xmlFAEliminateEpsilonTransitions(ctxt);
5465
5466
5467    if (ctxt->error != 0) {
5468	xmlRegFreeParserCtxt(ctxt);
5469	return(NULL);
5470    }
5471    ret = xmlRegEpxFromParse(ctxt);
5472    xmlRegFreeParserCtxt(ctxt);
5473    return(ret);
5474}
5475
5476/**
5477 * xmlRegexpExec:
5478 * @comp:  the compiled regular expression
5479 * @content:  the value to check against the regular expression
5480 *
5481 * Check if the regular expression generates the value
5482 *
5483 * Returns 1 if it matches, 0 if not and a negative value in case of error
5484 */
5485int
5486xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5487    if ((comp == NULL) || (content == NULL))
5488	return(-1);
5489    return(xmlFARegExec(comp, content));
5490}
5491
5492/**
5493 * xmlRegexpIsDeterminist:
5494 * @comp:  the compiled regular expression
5495 *
5496 * Check if the regular expression is determinist
5497 *
5498 * Returns 1 if it yes, 0 if not and a negative value in case of error
5499 */
5500int
5501xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5502    xmlAutomataPtr am;
5503    int ret;
5504
5505    if (comp == NULL)
5506	return(-1);
5507    if (comp->determinist != -1)
5508	return(comp->determinist);
5509
5510    am = xmlNewAutomata();
5511    if (am->states != NULL) {
5512	int i;
5513
5514	for (i = 0;i < am->nbStates;i++)
5515	    xmlRegFreeState(am->states[i]);
5516	xmlFree(am->states);
5517    }
5518    am->nbAtoms = comp->nbAtoms;
5519    am->atoms = comp->atoms;
5520    am->nbStates = comp->nbStates;
5521    am->states = comp->states;
5522    am->determinist = -1;
5523    am->flags = comp->flags;
5524    ret = xmlFAComputesDeterminism(am);
5525    am->atoms = NULL;
5526    am->states = NULL;
5527    xmlFreeAutomata(am);
5528    comp->determinist = ret;
5529    return(ret);
5530}
5531
5532/**
5533 * xmlRegFreeRegexp:
5534 * @regexp:  the regexp
5535 *
5536 * Free a regexp
5537 */
5538void
5539xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5540    int i;
5541    if (regexp == NULL)
5542	return;
5543
5544    if (regexp->string != NULL)
5545	xmlFree(regexp->string);
5546    if (regexp->states != NULL) {
5547	for (i = 0;i < regexp->nbStates;i++)
5548	    xmlRegFreeState(regexp->states[i]);
5549	xmlFree(regexp->states);
5550    }
5551    if (regexp->atoms != NULL) {
5552	for (i = 0;i < regexp->nbAtoms;i++)
5553	    xmlRegFreeAtom(regexp->atoms[i]);
5554	xmlFree(regexp->atoms);
5555    }
5556    if (regexp->counters != NULL)
5557	xmlFree(regexp->counters);
5558    if (regexp->compact != NULL)
5559	xmlFree(regexp->compact);
5560    if (regexp->transdata != NULL)
5561	xmlFree(regexp->transdata);
5562    if (regexp->stringMap != NULL) {
5563	for (i = 0; i < regexp->nbstrings;i++)
5564	    xmlFree(regexp->stringMap[i]);
5565	xmlFree(regexp->stringMap);
5566    }
5567
5568    xmlFree(regexp);
5569}
5570
5571#ifdef LIBXML_AUTOMATA_ENABLED
5572/************************************************************************
5573 * 									*
5574 * 			The Automata interface				*
5575 * 									*
5576 ************************************************************************/
5577
5578/**
5579 * xmlNewAutomata:
5580 *
5581 * Create a new automata
5582 *
5583 * Returns the new object or NULL in case of failure
5584 */
5585xmlAutomataPtr
5586xmlNewAutomata(void) {
5587    xmlAutomataPtr ctxt;
5588
5589    ctxt = xmlRegNewParserCtxt(NULL);
5590    if (ctxt == NULL)
5591	return(NULL);
5592
5593    /* initialize the parser */
5594    ctxt->end = NULL;
5595    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5596    if (ctxt->start == NULL) {
5597	xmlFreeAutomata(ctxt);
5598	return(NULL);
5599    }
5600    ctxt->start->type = XML_REGEXP_START_STATE;
5601    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5602        xmlRegFreeState(ctxt->start);
5603	xmlFreeAutomata(ctxt);
5604	return(NULL);
5605    }
5606    ctxt->flags = 0;
5607
5608    return(ctxt);
5609}
5610
5611/**
5612 * xmlFreeAutomata:
5613 * @am: an automata
5614 *
5615 * Free an automata
5616 */
5617void
5618xmlFreeAutomata(xmlAutomataPtr am) {
5619    if (am == NULL)
5620	return;
5621    xmlRegFreeParserCtxt(am);
5622}
5623
5624/**
5625 * xmlAutomataSetFlags:
5626 * @am: an automata
5627 * @flags:  a set of internal flags
5628 *
5629 * Set some flags on the automata
5630 */
5631void
5632xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
5633    if (am == NULL)
5634	return;
5635    am->flags |= flags;
5636}
5637
5638/**
5639 * xmlAutomataGetInitState:
5640 * @am: an automata
5641 *
5642 * Initial state lookup
5643 *
5644 * Returns the initial state of the automata
5645 */
5646xmlAutomataStatePtr
5647xmlAutomataGetInitState(xmlAutomataPtr am) {
5648    if (am == NULL)
5649	return(NULL);
5650    return(am->start);
5651}
5652
5653/**
5654 * xmlAutomataSetFinalState:
5655 * @am: an automata
5656 * @state: a state in this automata
5657 *
5658 * Makes that state a final state
5659 *
5660 * Returns 0 or -1 in case of error
5661 */
5662int
5663xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5664    if ((am == NULL) || (state == NULL))
5665	return(-1);
5666    state->type = XML_REGEXP_FINAL_STATE;
5667    return(0);
5668}
5669
5670/**
5671 * xmlAutomataNewTransition:
5672 * @am: an automata
5673 * @from: the starting point of the transition
5674 * @to: the target point of the transition or NULL
5675 * @token: the input string associated to that transition
5676 * @data: data passed to the callback function if the transition is activated
5677 *
5678 * If @to is NULL, this creates first a new target state in the automata
5679 * and then adds a transition from the @from state to the target state
5680 * activated by the value of @token
5681 *
5682 * Returns the target state or NULL in case of error
5683 */
5684xmlAutomataStatePtr
5685xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5686			 xmlAutomataStatePtr to, const xmlChar *token,
5687			 void *data) {
5688    xmlRegAtomPtr atom;
5689
5690    if ((am == NULL) || (from == NULL) || (token == NULL))
5691	return(NULL);
5692    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5693    if (atom == NULL)
5694        return(NULL);
5695    atom->data = data;
5696    if (atom == NULL)
5697	return(NULL);
5698    atom->valuep = xmlStrdup(token);
5699
5700    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5701        xmlRegFreeAtom(atom);
5702	return(NULL);
5703    }
5704    if (to == NULL)
5705	return(am->state);
5706    return(to);
5707}
5708
5709/**
5710 * xmlAutomataNewTransition2:
5711 * @am: an automata
5712 * @from: the starting point of the transition
5713 * @to: the target point of the transition or NULL
5714 * @token: the first input string associated to that transition
5715 * @token2: the second input string associated to that transition
5716 * @data: data passed to the callback function if the transition is activated
5717 *
5718 * If @to is NULL, this creates first a new target state in the automata
5719 * and then adds a transition from the @from state to the target state
5720 * activated by the value of @token
5721 *
5722 * Returns the target state or NULL in case of error
5723 */
5724xmlAutomataStatePtr
5725xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5726			  xmlAutomataStatePtr to, const xmlChar *token,
5727			  const xmlChar *token2, void *data) {
5728    xmlRegAtomPtr atom;
5729
5730    if ((am == NULL) || (from == NULL) || (token == NULL))
5731	return(NULL);
5732    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5733    if (atom == NULL)
5734	return(NULL);
5735    atom->data = data;
5736    if ((token2 == NULL) || (*token2 == 0)) {
5737	atom->valuep = xmlStrdup(token);
5738    } else {
5739	int lenn, lenp;
5740	xmlChar *str;
5741
5742	lenn = strlen((char *) token2);
5743	lenp = strlen((char *) token);
5744
5745	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5746	if (str == NULL) {
5747	    xmlRegFreeAtom(atom);
5748	    return(NULL);
5749	}
5750	memcpy(&str[0], token, lenp);
5751	str[lenp] = '|';
5752	memcpy(&str[lenp + 1], token2, lenn);
5753	str[lenn + lenp + 1] = 0;
5754
5755	atom->valuep = str;
5756    }
5757
5758    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5759        xmlRegFreeAtom(atom);
5760	return(NULL);
5761    }
5762    if (to == NULL)
5763	return(am->state);
5764    return(to);
5765}
5766
5767/**
5768 * xmlAutomataNewNegTrans:
5769 * @am: an automata
5770 * @from: the starting point of the transition
5771 * @to: the target point of the transition or NULL
5772 * @token: the first input string associated to that transition
5773 * @token2: the second input string associated to that transition
5774 * @data: data passed to the callback function if the transition is activated
5775 *
5776 * If @to is NULL, this creates first a new target state in the automata
5777 * and then adds a transition from the @from state to the target state
5778 * activated by any value except (@token,@token2)
5779 * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
5780 # the semantic of XSD ##other
5781 *
5782 * Returns the target state or NULL in case of error
5783 */
5784xmlAutomataStatePtr
5785xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5786		       xmlAutomataStatePtr to, const xmlChar *token,
5787		       const xmlChar *token2, void *data) {
5788    xmlRegAtomPtr atom;
5789    xmlChar err_msg[200];
5790
5791    if ((am == NULL) || (from == NULL) || (token == NULL))
5792	return(NULL);
5793    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5794    if (atom == NULL)
5795	return(NULL);
5796    atom->data = data;
5797    atom->neg = 1;
5798    if ((token2 == NULL) || (*token2 == 0)) {
5799	atom->valuep = xmlStrdup(token);
5800    } else {
5801	int lenn, lenp;
5802	xmlChar *str;
5803
5804	lenn = strlen((char *) token2);
5805	lenp = strlen((char *) token);
5806
5807	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5808	if (str == NULL) {
5809	    xmlRegFreeAtom(atom);
5810	    return(NULL);
5811	}
5812	memcpy(&str[0], token, lenp);
5813	str[lenp] = '|';
5814	memcpy(&str[lenp + 1], token2, lenn);
5815	str[lenn + lenp + 1] = 0;
5816
5817	atom->valuep = str;
5818    }
5819    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5820    err_msg[199] = 0;
5821    atom->valuep2 = xmlStrdup(err_msg);
5822
5823    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5824        xmlRegFreeAtom(atom);
5825	return(NULL);
5826    }
5827    am->negs++;
5828    if (to == NULL)
5829	return(am->state);
5830    return(to);
5831}
5832
5833/**
5834 * xmlAutomataNewCountTrans2:
5835 * @am: an automata
5836 * @from: the starting point of the transition
5837 * @to: the target point of the transition or NULL
5838 * @token: the input string associated to that transition
5839 * @token2: the second input string associated to that transition
5840 * @min:  the minimum successive occurences of token
5841 * @max:  the maximum successive occurences of token
5842 * @data:  data associated to the transition
5843 *
5844 * If @to is NULL, this creates first a new target state in the automata
5845 * and then adds a transition from the @from state to the target state
5846 * activated by a succession of input of value @token and @token2 and
5847 * whose number is between @min and @max
5848 *
5849 * Returns the target state or NULL in case of error
5850 */
5851xmlAutomataStatePtr
5852xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5853			 xmlAutomataStatePtr to, const xmlChar *token,
5854			 const xmlChar *token2,
5855			 int min, int max, void *data) {
5856    xmlRegAtomPtr atom;
5857    int counter;
5858
5859    if ((am == NULL) || (from == NULL) || (token == NULL))
5860	return(NULL);
5861    if (min < 0)
5862	return(NULL);
5863    if ((max < min) || (max < 1))
5864	return(NULL);
5865    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5866    if (atom == NULL)
5867	return(NULL);
5868    if ((token2 == NULL) || (*token2 == 0)) {
5869	atom->valuep = xmlStrdup(token);
5870    } else {
5871	int lenn, lenp;
5872	xmlChar *str;
5873
5874	lenn = strlen((char *) token2);
5875	lenp = strlen((char *) token);
5876
5877	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5878	if (str == NULL) {
5879	    xmlRegFreeAtom(atom);
5880	    return(NULL);
5881	}
5882	memcpy(&str[0], token, lenp);
5883	str[lenp] = '|';
5884	memcpy(&str[lenp + 1], token2, lenn);
5885	str[lenn + lenp + 1] = 0;
5886
5887	atom->valuep = str;
5888    }
5889    atom->data = data;
5890    if (min == 0)
5891	atom->min = 1;
5892    else
5893	atom->min = min;
5894    atom->max = max;
5895
5896    /*
5897     * associate a counter to the transition.
5898     */
5899    counter = xmlRegGetCounter(am);
5900    am->counters[counter].min = min;
5901    am->counters[counter].max = max;
5902
5903    /* xmlFAGenerateTransitions(am, from, to, atom); */
5904    if (to == NULL) {
5905        to = xmlRegNewState(am);
5906	xmlRegStatePush(am, to);
5907    }
5908    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5909    xmlRegAtomPush(am, atom);
5910    am->state = to;
5911
5912    if (to == NULL)
5913	to = am->state;
5914    if (to == NULL)
5915	return(NULL);
5916    if (min == 0)
5917	xmlFAGenerateEpsilonTransition(am, from, to);
5918    return(to);
5919}
5920
5921/**
5922 * xmlAutomataNewCountTrans:
5923 * @am: an automata
5924 * @from: the starting point of the transition
5925 * @to: the target point of the transition or NULL
5926 * @token: the input string associated to that transition
5927 * @min:  the minimum successive occurences of token
5928 * @max:  the maximum successive occurences of token
5929 * @data:  data associated to the transition
5930 *
5931 * If @to is NULL, this creates first a new target state in the automata
5932 * and then adds a transition from the @from state to the target state
5933 * activated by a succession of input of value @token and whose number
5934 * is between @min and @max
5935 *
5936 * Returns the target state or NULL in case of error
5937 */
5938xmlAutomataStatePtr
5939xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5940			 xmlAutomataStatePtr to, const xmlChar *token,
5941			 int min, int max, void *data) {
5942    xmlRegAtomPtr atom;
5943    int counter;
5944
5945    if ((am == NULL) || (from == NULL) || (token == NULL))
5946	return(NULL);
5947    if (min < 0)
5948	return(NULL);
5949    if ((max < min) || (max < 1))
5950	return(NULL);
5951    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5952    if (atom == NULL)
5953	return(NULL);
5954    atom->valuep = xmlStrdup(token);
5955    atom->data = data;
5956    if (min == 0)
5957	atom->min = 1;
5958    else
5959	atom->min = min;
5960    atom->max = max;
5961
5962    /*
5963     * associate a counter to the transition.
5964     */
5965    counter = xmlRegGetCounter(am);
5966    am->counters[counter].min = min;
5967    am->counters[counter].max = max;
5968
5969    /* xmlFAGenerateTransitions(am, from, to, atom); */
5970    if (to == NULL) {
5971        to = xmlRegNewState(am);
5972	xmlRegStatePush(am, to);
5973    }
5974    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5975    xmlRegAtomPush(am, atom);
5976    am->state = to;
5977
5978    if (to == NULL)
5979	to = am->state;
5980    if (to == NULL)
5981	return(NULL);
5982    if (min == 0)
5983	xmlFAGenerateEpsilonTransition(am, from, to);
5984    return(to);
5985}
5986
5987/**
5988 * xmlAutomataNewOnceTrans2:
5989 * @am: an automata
5990 * @from: the starting point of the transition
5991 * @to: the target point of the transition or NULL
5992 * @token: the input string associated to that transition
5993 * @token2: the second input string associated to that transition
5994 * @min:  the minimum successive occurences of token
5995 * @max:  the maximum successive occurences of token
5996 * @data:  data associated to the transition
5997 *
5998 * If @to is NULL, this creates first a new target state in the automata
5999 * and then adds a transition from the @from state to the target state
6000 * activated by a succession of input of value @token and @token2 and whose
6001 * number is between @min and @max, moreover that transition can only be
6002 * crossed once.
6003 *
6004 * Returns the target state or NULL in case of error
6005 */
6006xmlAutomataStatePtr
6007xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
6008			 xmlAutomataStatePtr to, const xmlChar *token,
6009			 const xmlChar *token2,
6010			 int min, int max, void *data) {
6011    xmlRegAtomPtr atom;
6012    int counter;
6013
6014    if ((am == NULL) || (from == NULL) || (token == NULL))
6015	return(NULL);
6016    if (min < 1)
6017	return(NULL);
6018    if ((max < min) || (max < 1))
6019	return(NULL);
6020    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6021    if (atom == NULL)
6022	return(NULL);
6023    if ((token2 == NULL) || (*token2 == 0)) {
6024	atom->valuep = xmlStrdup(token);
6025    } else {
6026	int lenn, lenp;
6027	xmlChar *str;
6028
6029	lenn = strlen((char *) token2);
6030	lenp = strlen((char *) token);
6031
6032	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
6033	if (str == NULL) {
6034	    xmlRegFreeAtom(atom);
6035	    return(NULL);
6036	}
6037	memcpy(&str[0], token, lenp);
6038	str[lenp] = '|';
6039	memcpy(&str[lenp + 1], token2, lenn);
6040	str[lenn + lenp + 1] = 0;
6041
6042	atom->valuep = str;
6043    }
6044    atom->data = data;
6045    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6046    atom->min = min;
6047    atom->max = max;
6048    /*
6049     * associate a counter to the transition.
6050     */
6051    counter = xmlRegGetCounter(am);
6052    am->counters[counter].min = 1;
6053    am->counters[counter].max = 1;
6054
6055    /* xmlFAGenerateTransitions(am, from, to, atom); */
6056    if (to == NULL) {
6057	to = xmlRegNewState(am);
6058	xmlRegStatePush(am, to);
6059    }
6060    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6061    xmlRegAtomPush(am, atom);
6062    am->state = to;
6063    return(to);
6064}
6065
6066
6067
6068/**
6069 * xmlAutomataNewOnceTrans:
6070 * @am: an automata
6071 * @from: the starting point of the transition
6072 * @to: the target point of the transition or NULL
6073 * @token: the input string associated to that transition
6074 * @min:  the minimum successive occurences of token
6075 * @max:  the maximum successive occurences of token
6076 * @data:  data associated to the transition
6077 *
6078 * If @to is NULL, this creates first a new target state in the automata
6079 * and then adds a transition from the @from state to the target state
6080 * activated by a succession of input of value @token and whose number
6081 * is between @min and @max, moreover that transition can only be crossed
6082 * once.
6083 *
6084 * Returns the target state or NULL in case of error
6085 */
6086xmlAutomataStatePtr
6087xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6088			 xmlAutomataStatePtr to, const xmlChar *token,
6089			 int min, int max, void *data) {
6090    xmlRegAtomPtr atom;
6091    int counter;
6092
6093    if ((am == NULL) || (from == NULL) || (token == NULL))
6094	return(NULL);
6095    if (min < 1)
6096	return(NULL);
6097    if ((max < min) || (max < 1))
6098	return(NULL);
6099    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
6100    if (atom == NULL)
6101	return(NULL);
6102    atom->valuep = xmlStrdup(token);
6103    atom->data = data;
6104    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
6105    atom->min = min;
6106    atom->max = max;
6107    /*
6108     * associate a counter to the transition.
6109     */
6110    counter = xmlRegGetCounter(am);
6111    am->counters[counter].min = 1;
6112    am->counters[counter].max = 1;
6113
6114    /* xmlFAGenerateTransitions(am, from, to, atom); */
6115    if (to == NULL) {
6116	to = xmlRegNewState(am);
6117	xmlRegStatePush(am, to);
6118    }
6119    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
6120    xmlRegAtomPush(am, atom);
6121    am->state = to;
6122    return(to);
6123}
6124
6125/**
6126 * xmlAutomataNewState:
6127 * @am: an automata
6128 *
6129 * Create a new disconnected state in the automata
6130 *
6131 * Returns the new state or NULL in case of error
6132 */
6133xmlAutomataStatePtr
6134xmlAutomataNewState(xmlAutomataPtr am) {
6135    xmlAutomataStatePtr to;
6136
6137    if (am == NULL)
6138	return(NULL);
6139    to = xmlRegNewState(am);
6140    xmlRegStatePush(am, to);
6141    return(to);
6142}
6143
6144/**
6145 * xmlAutomataNewEpsilon:
6146 * @am: an automata
6147 * @from: the starting point of the transition
6148 * @to: the target point of the transition or NULL
6149 *
6150 * If @to is NULL, this creates first a new target state in the automata
6151 * and then adds an epsilon transition from the @from state to the
6152 * target state
6153 *
6154 * Returns the target state or NULL in case of error
6155 */
6156xmlAutomataStatePtr
6157xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
6158		      xmlAutomataStatePtr to) {
6159    if ((am == NULL) || (from == NULL))
6160	return(NULL);
6161    xmlFAGenerateEpsilonTransition(am, from, to);
6162    if (to == NULL)
6163	return(am->state);
6164    return(to);
6165}
6166
6167/**
6168 * xmlAutomataNewAllTrans:
6169 * @am: an automata
6170 * @from: the starting point of the transition
6171 * @to: the target point of the transition or NULL
6172 * @lax: allow to transition if not all all transitions have been activated
6173 *
6174 * If @to is NULL, this creates first a new target state in the automata
6175 * and then adds a an ALL transition from the @from state to the
6176 * target state. That transition is an epsilon transition allowed only when
6177 * all transitions from the @from node have been activated.
6178 *
6179 * Returns the target state or NULL in case of error
6180 */
6181xmlAutomataStatePtr
6182xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6183		       xmlAutomataStatePtr to, int lax) {
6184    if ((am == NULL) || (from == NULL))
6185	return(NULL);
6186    xmlFAGenerateAllTransition(am, from, to, lax);
6187    if (to == NULL)
6188	return(am->state);
6189    return(to);
6190}
6191
6192/**
6193 * xmlAutomataNewCounter:
6194 * @am: an automata
6195 * @min:  the minimal value on the counter
6196 * @max:  the maximal value on the counter
6197 *
6198 * Create a new counter
6199 *
6200 * Returns the counter number or -1 in case of error
6201 */
6202int
6203xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
6204    int ret;
6205
6206    if (am == NULL)
6207	return(-1);
6208
6209    ret = xmlRegGetCounter(am);
6210    if (ret < 0)
6211	return(-1);
6212    am->counters[ret].min = min;
6213    am->counters[ret].max = max;
6214    return(ret);
6215}
6216
6217/**
6218 * xmlAutomataNewCountedTrans:
6219 * @am: an automata
6220 * @from: the starting point of the transition
6221 * @to: the target point of the transition or NULL
6222 * @counter: the counter associated to that transition
6223 *
6224 * If @to is NULL, this creates first a new target state in the automata
6225 * and then adds an epsilon transition from the @from state to the target state
6226 * which will increment the counter provided
6227 *
6228 * Returns the target state or NULL in case of error
6229 */
6230xmlAutomataStatePtr
6231xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6232		xmlAutomataStatePtr to, int counter) {
6233    if ((am == NULL) || (from == NULL) || (counter < 0))
6234	return(NULL);
6235    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
6236    if (to == NULL)
6237	return(am->state);
6238    return(to);
6239}
6240
6241/**
6242 * xmlAutomataNewCounterTrans:
6243 * @am: an automata
6244 * @from: the starting point of the transition
6245 * @to: the target point of the transition or NULL
6246 * @counter: the counter associated to that transition
6247 *
6248 * If @to is NULL, this creates first a new target state in the automata
6249 * and then adds an epsilon transition from the @from state to the target state
6250 * which will be allowed only if the counter is within the right range.
6251 *
6252 * Returns the target state or NULL in case of error
6253 */
6254xmlAutomataStatePtr
6255xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
6256		xmlAutomataStatePtr to, int counter) {
6257    if ((am == NULL) || (from == NULL) || (counter < 0))
6258	return(NULL);
6259    xmlFAGenerateCountedTransition(am, from, to, counter);
6260    if (to == NULL)
6261	return(am->state);
6262    return(to);
6263}
6264
6265/**
6266 * xmlAutomataCompile:
6267 * @am: an automata
6268 *
6269 * Compile the automata into a Reg Exp ready for being executed.
6270 * The automata should be free after this point.
6271 *
6272 * Returns the compiled regexp or NULL in case of error
6273 */
6274xmlRegexpPtr
6275xmlAutomataCompile(xmlAutomataPtr am) {
6276    xmlRegexpPtr ret;
6277
6278    if ((am == NULL) || (am->error != 0)) return(NULL);
6279    xmlFAEliminateEpsilonTransitions(am);
6280    /* xmlFAComputesDeterminism(am); */
6281    ret = xmlRegEpxFromParse(am);
6282
6283    return(ret);
6284}
6285
6286/**
6287 * xmlAutomataIsDeterminist:
6288 * @am: an automata
6289 *
6290 * Checks if an automata is determinist.
6291 *
6292 * Returns 1 if true, 0 if not, and -1 in case of error
6293 */
6294int
6295xmlAutomataIsDeterminist(xmlAutomataPtr am) {
6296    int ret;
6297
6298    if (am == NULL)
6299	return(-1);
6300
6301    ret = xmlFAComputesDeterminism(am);
6302    return(ret);
6303}
6304#endif /* LIBXML_AUTOMATA_ENABLED */
6305
6306#ifdef LIBXML_EXPR_ENABLED
6307/************************************************************************
6308 *									*
6309 *		Formal Expression handling code				*
6310 *									*
6311 ************************************************************************/
6312/************************************************************************
6313 *									*
6314 *		Expression handling context				*
6315 *									*
6316 ************************************************************************/
6317
6318struct _xmlExpCtxt {
6319    xmlDictPtr dict;
6320    xmlExpNodePtr *table;
6321    int size;
6322    int nbElems;
6323    int nb_nodes;
6324    int maxNodes;
6325    const char *expr;
6326    const char *cur;
6327    int nb_cons;
6328    int tabSize;
6329};
6330
6331/**
6332 * xmlExpNewCtxt:
6333 * @maxNodes:  the maximum number of nodes
6334 * @dict:  optional dictionnary to use internally
6335 *
6336 * Creates a new context for manipulating expressions
6337 *
6338 * Returns the context or NULL in case of error
6339 */
6340xmlExpCtxtPtr
6341xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
6342    xmlExpCtxtPtr ret;
6343    int size = 256;
6344
6345    if (maxNodes <= 4096)
6346        maxNodes = 4096;
6347
6348    ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
6349    if (ret == NULL)
6350        return(NULL);
6351    memset(ret, 0, sizeof(xmlExpCtxt));
6352    ret->size = size;
6353    ret->nbElems = 0;
6354    ret->maxNodes = maxNodes;
6355    ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
6356    if (ret->table == NULL) {
6357        xmlFree(ret);
6358	return(NULL);
6359    }
6360    memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
6361    if (dict == NULL) {
6362        ret->dict = xmlDictCreate();
6363	if (ret->dict == NULL) {
6364	    xmlFree(ret->table);
6365	    xmlFree(ret);
6366	    return(NULL);
6367	}
6368    } else {
6369        ret->dict = dict;
6370	xmlDictReference(ret->dict);
6371    }
6372    return(ret);
6373}
6374
6375/**
6376 * xmlExpFreeCtxt:
6377 * @ctxt:  an expression context
6378 *
6379 * Free an expression context
6380 */
6381void
6382xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
6383    if (ctxt == NULL)
6384        return;
6385    xmlDictFree(ctxt->dict);
6386    if (ctxt->table != NULL)
6387	xmlFree(ctxt->table);
6388    xmlFree(ctxt);
6389}
6390
6391/************************************************************************
6392 *									*
6393 *		Structure associated to an expression node		*
6394 *									*
6395 ************************************************************************/
6396#define MAX_NODES 10000
6397
6398/* #define DEBUG_DERIV */
6399
6400/*
6401 * TODO:
6402 * - Wildcards
6403 * - public API for creation
6404 *
6405 * Started
6406 * - regression testing
6407 *
6408 * Done
6409 * - split into module and test tool
6410 * - memleaks
6411 */
6412
6413typedef enum {
6414    XML_EXP_NILABLE = (1 << 0)
6415} xmlExpNodeInfo;
6416
6417#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
6418
6419struct _xmlExpNode {
6420    unsigned char type;/* xmlExpNodeType */
6421    unsigned char info;/* OR of xmlExpNodeInfo */
6422    unsigned short key;	/* the hash key */
6423    unsigned int ref;	/* The number of references */
6424    int c_max;		/* the maximum length it can consume */
6425    xmlExpNodePtr exp_left;
6426    xmlExpNodePtr next;/* the next node in the hash table or free list */
6427    union {
6428	struct {
6429	    int f_min;
6430	    int f_max;
6431	} count;
6432	struct {
6433	    xmlExpNodePtr f_right;
6434	} children;
6435        const xmlChar *f_str;
6436    } field;
6437};
6438
6439#define exp_min field.count.f_min
6440#define exp_max field.count.f_max
6441/* #define exp_left field.children.f_left */
6442#define exp_right field.children.f_right
6443#define exp_str field.f_str
6444
6445static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
6446static xmlExpNode forbiddenExpNode = {
6447    XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6448};
6449xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
6450static xmlExpNode emptyExpNode = {
6451    XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
6452};
6453xmlExpNodePtr emptyExp = &emptyExpNode;
6454
6455/************************************************************************
6456 *									*
6457 *  The custom hash table for unicity and canonicalization		*
6458 *  of sub-expressions pointers						*
6459 *									*
6460 ************************************************************************/
6461/*
6462 * xmlExpHashNameComputeKey:
6463 * Calculate the hash key for a token
6464 */
6465static unsigned short
6466xmlExpHashNameComputeKey(const xmlChar *name) {
6467    unsigned short value = 0L;
6468    char ch;
6469
6470    if (name != NULL) {
6471	value += 30 * (*name);
6472	while ((ch = *name++) != 0) {
6473	    value = value ^ ((value << 5) + (value >> 3) + (unsigned short)ch);
6474	}
6475    }
6476    return (value);
6477}
6478
6479/*
6480 * xmlExpHashComputeKey:
6481 * Calculate the hash key for a compound expression
6482 */
6483static unsigned short
6484xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
6485                     xmlExpNodePtr right) {
6486    unsigned long value;
6487    unsigned short ret;
6488
6489    switch (type) {
6490        case XML_EXP_SEQ:
6491	    value = left->key;
6492	    value += right->key;
6493	    value *= 3;
6494	    ret = (unsigned short) value;
6495	    break;
6496        case XML_EXP_OR:
6497	    value = left->key;
6498	    value += right->key;
6499	    value *= 7;
6500	    ret = (unsigned short) value;
6501	    break;
6502        case XML_EXP_COUNT:
6503	    value = left->key;
6504	    value += right->key;
6505	    ret = (unsigned short) value;
6506	    break;
6507	default:
6508	    ret = 0;
6509    }
6510    return(ret);
6511}
6512
6513
6514static xmlExpNodePtr
6515xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6516    xmlExpNodePtr ret;
6517
6518    if (ctxt->nb_nodes >= MAX_NODES)
6519        return(NULL);
6520    ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6521    if (ret == NULL)
6522        return(NULL);
6523    memset(ret, 0, sizeof(xmlExpNode));
6524    ret->type = type;
6525    ret->next = NULL;
6526    ctxt->nb_nodes++;
6527    ctxt->nb_cons++;
6528    return(ret);
6529}
6530
6531/**
6532 * xmlExpHashGetEntry:
6533 * @table: the hash table
6534 *
6535 * Get the unique entry from the hash table. The entry is created if
6536 * needed. @left and @right are consumed, i.e. their ref count will
6537 * be decremented by the operation.
6538 *
6539 * Returns the pointer or NULL in case of error
6540 */
6541static xmlExpNodePtr
6542xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6543                   xmlExpNodePtr left, xmlExpNodePtr right,
6544		   const xmlChar *name, int min, int max) {
6545    unsigned short kbase, key;
6546    xmlExpNodePtr entry;
6547    xmlExpNodePtr insert;
6548
6549    if (ctxt == NULL)
6550	return(NULL);
6551
6552    /*
6553     * Check for duplicate and insertion location.
6554     */
6555    if (type == XML_EXP_ATOM) {
6556	kbase = xmlExpHashNameComputeKey(name);
6557    } else if (type == XML_EXP_COUNT) {
6558        /* COUNT reduction rule 1 */
6559	/* a{1} -> a */
6560	if (min == max) {
6561	    if (min == 1) {
6562		return(left);
6563	    }
6564	    if (min == 0) {
6565		xmlExpFree(ctxt, left);
6566	        return(emptyExp);
6567	    }
6568	}
6569	if (min < 0) {
6570	    xmlExpFree(ctxt, left);
6571	    return(forbiddenExp);
6572	}
6573        if (max == -1)
6574	    kbase = min + 79;
6575	else
6576	    kbase = max - min;
6577	kbase += left->key;
6578    } else if (type == XML_EXP_OR) {
6579        /* Forbid reduction rules */
6580        if (left->type == XML_EXP_FORBID) {
6581	    xmlExpFree(ctxt, left);
6582	    return(right);
6583	}
6584        if (right->type == XML_EXP_FORBID) {
6585	    xmlExpFree(ctxt, right);
6586	    return(left);
6587	}
6588
6589        /* OR reduction rule 1 */
6590	/* a | a reduced to a */
6591        if (left == right) {
6592	    left->ref--;
6593	    return(left);
6594	}
6595        /* OR canonicalization rule 1 */
6596	/* linearize (a | b) | c into a | (b | c) */
6597        if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6598	    xmlExpNodePtr tmp = left;
6599            left = right;
6600	    right = tmp;
6601	}
6602        /* OR reduction rule 2 */
6603	/* a | (a | b) and b | (a | b) are reduced to a | b */
6604        if (right->type == XML_EXP_OR) {
6605	    if ((left == right->exp_left) ||
6606	        (left == right->exp_right)) {
6607		xmlExpFree(ctxt, left);
6608		return(right);
6609	    }
6610	}
6611        /* OR canonicalization rule 2 */
6612	/* linearize (a | b) | c into a | (b | c) */
6613        if (left->type == XML_EXP_OR) {
6614	    xmlExpNodePtr tmp;
6615
6616	    /* OR canonicalization rule 2 */
6617	    if ((left->exp_right->type != XML_EXP_OR) &&
6618	        (left->exp_right->key < left->exp_left->key)) {
6619	        tmp = left->exp_right;
6620		left->exp_right = left->exp_left;
6621		left->exp_left = tmp;
6622	    }
6623	    left->exp_right->ref++;
6624	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6625	                             NULL, 0, 0);
6626	    left->exp_left->ref++;
6627	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6628	                             NULL, 0, 0);
6629
6630	    xmlExpFree(ctxt, left);
6631	    return(tmp);
6632	}
6633	if (right->type == XML_EXP_OR) {
6634	    /* Ordering in the tree */
6635	    /* C | (A | B) -> A | (B | C) */
6636	    if (left->key > right->exp_right->key) {
6637		xmlExpNodePtr tmp;
6638		right->exp_right->ref++;
6639		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6640		                         left, NULL, 0, 0);
6641		right->exp_left->ref++;
6642		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6643		                         tmp, NULL, 0, 0);
6644		xmlExpFree(ctxt, right);
6645		return(tmp);
6646	    }
6647	    /* Ordering in the tree */
6648	    /* B | (A | C) -> A | (B | C) */
6649	    if (left->key > right->exp_left->key) {
6650		xmlExpNodePtr tmp;
6651		right->exp_right->ref++;
6652		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6653		                         right->exp_right, NULL, 0, 0);
6654		right->exp_left->ref++;
6655		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6656		                         tmp, NULL, 0, 0);
6657		xmlExpFree(ctxt, right);
6658		return(tmp);
6659	    }
6660	}
6661	/* we know both types are != XML_EXP_OR here */
6662        else if (left->key > right->key) {
6663	    xmlExpNodePtr tmp = left;
6664            left = right;
6665	    right = tmp;
6666	}
6667	kbase = xmlExpHashComputeKey(type, left, right);
6668    } else if (type == XML_EXP_SEQ) {
6669        /* Forbid reduction rules */
6670        if (left->type == XML_EXP_FORBID) {
6671	    xmlExpFree(ctxt, right);
6672	    return(left);
6673	}
6674        if (right->type == XML_EXP_FORBID) {
6675	    xmlExpFree(ctxt, left);
6676	    return(right);
6677	}
6678        /* Empty reduction rules */
6679        if (right->type == XML_EXP_EMPTY) {
6680	    return(left);
6681	}
6682        if (left->type == XML_EXP_EMPTY) {
6683	    return(right);
6684	}
6685	kbase = xmlExpHashComputeKey(type, left, right);
6686    } else
6687        return(NULL);
6688
6689    key = kbase % ctxt->size;
6690    if (ctxt->table[key] != NULL) {
6691	for (insert = ctxt->table[key]; insert != NULL;
6692	     insert = insert->next) {
6693	    if ((insert->key == kbase) &&
6694	        (insert->type == type)) {
6695		if (type == XML_EXP_ATOM) {
6696		    if (name == insert->exp_str) {
6697			insert->ref++;
6698			return(insert);
6699		    }
6700		} else if (type == XML_EXP_COUNT) {
6701		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
6702		        (insert->exp_left == left)) {
6703			insert->ref++;
6704			left->ref--;
6705			return(insert);
6706		    }
6707		} else if ((insert->exp_left == left) &&
6708			   (insert->exp_right == right)) {
6709		    insert->ref++;
6710		    left->ref--;
6711		    right->ref--;
6712		    return(insert);
6713		}
6714	    }
6715	}
6716    }
6717
6718    entry = xmlExpNewNode(ctxt, type);
6719    if (entry == NULL)
6720        return(NULL);
6721    entry->key = kbase;
6722    if (type == XML_EXP_ATOM) {
6723	entry->exp_str = name;
6724	entry->c_max = 1;
6725    } else if (type == XML_EXP_COUNT) {
6726        entry->exp_min = min;
6727        entry->exp_max = max;
6728	entry->exp_left = left;
6729	if ((min == 0) || (IS_NILLABLE(left)))
6730	    entry->info |= XML_EXP_NILABLE;
6731	if (max < 0)
6732	    entry->c_max = -1;
6733	else
6734	    entry->c_max = max * entry->exp_left->c_max;
6735    } else {
6736	entry->exp_left = left;
6737	entry->exp_right = right;
6738	if (type == XML_EXP_OR) {
6739	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6740		entry->info |= XML_EXP_NILABLE;
6741	    if ((entry->exp_left->c_max == -1) ||
6742	        (entry->exp_right->c_max == -1))
6743		entry->c_max = -1;
6744	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
6745	        entry->c_max = entry->exp_left->c_max;
6746	    else
6747	        entry->c_max = entry->exp_right->c_max;
6748	} else {
6749	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6750		entry->info |= XML_EXP_NILABLE;
6751	    if ((entry->exp_left->c_max == -1) ||
6752	        (entry->exp_right->c_max == -1))
6753		entry->c_max = -1;
6754	    else
6755	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6756	}
6757    }
6758    entry->ref = 1;
6759    if (ctxt->table[key] != NULL)
6760        entry->next = ctxt->table[key];
6761
6762    ctxt->table[key] = entry;
6763    ctxt->nbElems++;
6764
6765    return(entry);
6766}
6767
6768/**
6769 * xmlExpFree:
6770 * @ctxt: the expression context
6771 * @exp: the expression
6772 *
6773 * Dereference the expression
6774 */
6775void
6776xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6777    if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6778        return;
6779    exp->ref--;
6780    if (exp->ref == 0) {
6781        unsigned short key;
6782
6783        /* Unlink it first from the hash table */
6784	key = exp->key % ctxt->size;
6785	if (ctxt->table[key] == exp) {
6786	    ctxt->table[key] = exp->next;
6787	} else {
6788	    xmlExpNodePtr tmp;
6789
6790	    tmp = ctxt->table[key];
6791	    while (tmp != NULL) {
6792	        if (tmp->next == exp) {
6793		    tmp->next = exp->next;
6794		    break;
6795		}
6796	        tmp = tmp->next;
6797	    }
6798	}
6799
6800        if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6801	    xmlExpFree(ctxt, exp->exp_left);
6802	    xmlExpFree(ctxt, exp->exp_right);
6803	} else if (exp->type == XML_EXP_COUNT) {
6804	    xmlExpFree(ctxt, exp->exp_left);
6805	}
6806        xmlFree(exp);
6807	ctxt->nb_nodes--;
6808    }
6809}
6810
6811/**
6812 * xmlExpRef:
6813 * @exp: the expression
6814 *
6815 * Increase the reference count of the expression
6816 */
6817void
6818xmlExpRef(xmlExpNodePtr exp) {
6819    if (exp != NULL)
6820        exp->ref++;
6821}
6822
6823/**
6824 * xmlExpNewAtom:
6825 * @ctxt: the expression context
6826 * @name: the atom name
6827 * @len: the atom name lenght in byte (or -1);
6828 *
6829 * Get the atom associated to this name from that context
6830 *
6831 * Returns the node or NULL in case of error
6832 */
6833xmlExpNodePtr
6834xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6835    if ((ctxt == NULL) || (name == NULL))
6836        return(NULL);
6837    name = xmlDictLookup(ctxt->dict, name, len);
6838    if (name == NULL)
6839        return(NULL);
6840    return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
6841}
6842
6843/**
6844 * xmlExpNewOr:
6845 * @ctxt: the expression context
6846 * @left: left expression
6847 * @right: right expression
6848 *
6849 * Get the atom associated to the choice @left | @right
6850 * Note that @left and @right are consumed in the operation, to keep
6851 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6852 * this is true even in case of failure (unless ctxt == NULL).
6853 *
6854 * Returns the node or NULL in case of error
6855 */
6856xmlExpNodePtr
6857xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6858    if (ctxt == NULL)
6859        return(NULL);
6860    if ((left == NULL) || (right == NULL)) {
6861        xmlExpFree(ctxt, left);
6862        xmlExpFree(ctxt, right);
6863        return(NULL);
6864    }
6865    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
6866}
6867
6868/**
6869 * xmlExpNewSeq:
6870 * @ctxt: the expression context
6871 * @left: left expression
6872 * @right: right expression
6873 *
6874 * Get the atom associated to the sequence @left , @right
6875 * Note that @left and @right are consumed in the operation, to keep
6876 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6877 * this is true even in case of failure (unless ctxt == NULL).
6878 *
6879 * Returns the node or NULL in case of error
6880 */
6881xmlExpNodePtr
6882xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6883    if (ctxt == NULL)
6884        return(NULL);
6885    if ((left == NULL) || (right == NULL)) {
6886        xmlExpFree(ctxt, left);
6887        xmlExpFree(ctxt, right);
6888        return(NULL);
6889    }
6890    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
6891}
6892
6893/**
6894 * xmlExpNewRange:
6895 * @ctxt: the expression context
6896 * @subset: the expression to be repeated
6897 * @min: the lower bound for the repetition
6898 * @max: the upper bound for the repetition, -1 means infinite
6899 *
6900 * Get the atom associated to the range (@subset){@min, @max}
6901 * Note that @subset is consumed in the operation, to keep
6902 * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
6903 * this is true even in case of failure (unless ctxt == NULL).
6904 *
6905 * Returns the node or NULL in case of error
6906 */
6907xmlExpNodePtr
6908xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
6909    if (ctxt == NULL)
6910        return(NULL);
6911    if ((subset == NULL) || (min < 0) || (max < -1) ||
6912        ((max >= 0) && (min > max))) {
6913	xmlExpFree(ctxt, subset);
6914        return(NULL);
6915    }
6916    return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6917                              NULL, NULL, min, max));
6918}
6919
6920/************************************************************************
6921 *									*
6922 *		Public API for operations on expressions		*
6923 *									*
6924 ************************************************************************/
6925
6926static int
6927xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6928                     const xmlChar**list, int len, int nb) {
6929    int tmp, tmp2;
6930tail:
6931    switch (exp->type) {
6932        case XML_EXP_EMPTY:
6933	    return(0);
6934        case XML_EXP_ATOM:
6935	    for (tmp = 0;tmp < nb;tmp++)
6936	        if (list[tmp] == exp->exp_str)
6937		    return(0);
6938            if (nb >= len)
6939	        return(-2);
6940	    list[nb] = exp->exp_str;
6941	    return(1);
6942        case XML_EXP_COUNT:
6943	    exp = exp->exp_left;
6944	    goto tail;
6945        case XML_EXP_SEQ:
6946        case XML_EXP_OR:
6947	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
6948	    if (tmp < 0)
6949	        return(tmp);
6950	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
6951	                                nb + tmp);
6952	    if (tmp2 < 0)
6953	        return(tmp2);
6954            return(tmp + tmp2);
6955    }
6956    return(-1);
6957}
6958
6959/**
6960 * xmlExpGetLanguage:
6961 * @ctxt: the expression context
6962 * @exp: the expression
6963 * @langList: where to store the tokens
6964 * @len: the allocated lenght of @list
6965 *
6966 * Find all the strings used in @exp and store them in @list
6967 *
6968 * Returns the number of unique strings found, -1 in case of errors and
6969 *         -2 if there is more than @len strings
6970 */
6971int
6972xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6973                  const xmlChar**langList, int len) {
6974    if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
6975        return(-1);
6976    return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
6977}
6978
6979static int
6980xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6981                  const xmlChar**list, int len, int nb) {
6982    int tmp, tmp2;
6983tail:
6984    switch (exp->type) {
6985        case XML_EXP_FORBID:
6986	    return(0);
6987        case XML_EXP_EMPTY:
6988	    return(0);
6989        case XML_EXP_ATOM:
6990	    for (tmp = 0;tmp < nb;tmp++)
6991	        if (list[tmp] == exp->exp_str)
6992		    return(0);
6993            if (nb >= len)
6994	        return(-2);
6995	    list[nb] = exp->exp_str;
6996	    return(1);
6997        case XML_EXP_COUNT:
6998	    exp = exp->exp_left;
6999	    goto tail;
7000        case XML_EXP_SEQ:
7001	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7002	    if (tmp < 0)
7003	        return(tmp);
7004	    if (IS_NILLABLE(exp->exp_left)) {
7005		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7006					    nb + tmp);
7007		if (tmp2 < 0)
7008		    return(tmp2);
7009		tmp += tmp2;
7010	    }
7011            return(tmp);
7012        case XML_EXP_OR:
7013	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
7014	    if (tmp < 0)
7015	        return(tmp);
7016	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
7017	                                nb + tmp);
7018	    if (tmp2 < 0)
7019	        return(tmp2);
7020            return(tmp + tmp2);
7021    }
7022    return(-1);
7023}
7024
7025/**
7026 * xmlExpGetStart:
7027 * @ctxt: the expression context
7028 * @exp: the expression
7029 * @tokList: where to store the tokens
7030 * @len: the allocated lenght of @list
7031 *
7032 * Find all the strings that appears at the start of the languages
7033 * accepted by @exp and store them in @list. E.g. for (a, b) | c
7034 * it will return the list [a, c]
7035 *
7036 * Returns the number of unique strings found, -1 in case of errors and
7037 *         -2 if there is more than @len strings
7038 */
7039int
7040xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7041               const xmlChar**tokList, int len) {
7042    if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
7043        return(-1);
7044    return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
7045}
7046
7047/**
7048 * xmlExpIsNillable:
7049 * @exp: the expression
7050 *
7051 * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
7052 *
7053 * Returns 1 if nillable, 0 if not and -1 in case of error
7054 */
7055int
7056xmlExpIsNillable(xmlExpNodePtr exp) {
7057    if (exp == NULL)
7058        return(-1);
7059    return(IS_NILLABLE(exp) != 0);
7060}
7061
7062static xmlExpNodePtr
7063xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
7064{
7065    xmlExpNodePtr ret;
7066
7067    switch (exp->type) {
7068	case XML_EXP_EMPTY:
7069	    return(forbiddenExp);
7070	case XML_EXP_FORBID:
7071	    return(forbiddenExp);
7072	case XML_EXP_ATOM:
7073	    if (exp->exp_str == str) {
7074#ifdef DEBUG_DERIV
7075		printf("deriv atom: equal => Empty\n");
7076#endif
7077	        ret = emptyExp;
7078	    } else {
7079#ifdef DEBUG_DERIV
7080		printf("deriv atom: mismatch => forbid\n");
7081#endif
7082	        /* TODO wildcards here */
7083		ret = forbiddenExp;
7084	    }
7085	    return(ret);
7086	case XML_EXP_OR: {
7087	    xmlExpNodePtr tmp;
7088
7089#ifdef DEBUG_DERIV
7090	    printf("deriv or: => or(derivs)\n");
7091#endif
7092	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7093	    if (tmp == NULL) {
7094		return(NULL);
7095	    }
7096	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7097	    if (ret == NULL) {
7098	        xmlExpFree(ctxt, tmp);
7099		return(NULL);
7100	    }
7101            ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
7102			     NULL, 0, 0);
7103	    return(ret);
7104	}
7105	case XML_EXP_SEQ:
7106#ifdef DEBUG_DERIV
7107	    printf("deriv seq: starting with left\n");
7108#endif
7109	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7110	    if (ret == NULL) {
7111	        return(NULL);
7112	    } else if (ret == forbiddenExp) {
7113	        if (IS_NILLABLE(exp->exp_left)) {
7114#ifdef DEBUG_DERIV
7115		    printf("deriv seq: left failed but nillable\n");
7116#endif
7117		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
7118		}
7119	    } else {
7120#ifdef DEBUG_DERIV
7121		printf("deriv seq: left match => sequence\n");
7122#endif
7123	        exp->exp_right->ref++;
7124	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
7125		                         NULL, 0, 0);
7126	    }
7127	    return(ret);
7128	case XML_EXP_COUNT: {
7129	    int min, max;
7130	    xmlExpNodePtr tmp;
7131
7132	    if (exp->exp_max == 0)
7133		return(forbiddenExp);
7134	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
7135	    if (ret == NULL)
7136	        return(NULL);
7137	    if (ret == forbiddenExp) {
7138#ifdef DEBUG_DERIV
7139		printf("deriv count: pattern mismatch => forbid\n");
7140#endif
7141	        return(ret);
7142	    }
7143	    if (exp->exp_max == 1)
7144		return(ret);
7145	    if (exp->exp_max < 0) /* unbounded */
7146		max = -1;
7147	    else
7148		max = exp->exp_max - 1;
7149	    if (exp->exp_min > 0)
7150		min = exp->exp_min - 1;
7151	    else
7152		min = 0;
7153	    exp->exp_left->ref++;
7154	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
7155				     NULL, min, max);
7156	    if (ret == emptyExp) {
7157#ifdef DEBUG_DERIV
7158		printf("deriv count: match to empty => new count\n");
7159#endif
7160	        return(tmp);
7161	    }
7162#ifdef DEBUG_DERIV
7163	    printf("deriv count: match => sequence with new count\n");
7164#endif
7165	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
7166	                              NULL, 0, 0));
7167	}
7168    }
7169    return(NULL);
7170}
7171
7172/**
7173 * xmlExpStringDerive:
7174 * @ctxt: the expression context
7175 * @exp: the expression
7176 * @str: the string
7177 * @len: the string len in bytes if available
7178 *
7179 * Do one step of Brzozowski derivation of the expression @exp with
7180 * respect to the input string
7181 *
7182 * Returns the resulting expression or NULL in case of internal error
7183 */
7184xmlExpNodePtr
7185xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7186                   const xmlChar *str, int len) {
7187    const xmlChar *input;
7188
7189    if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
7190        return(NULL);
7191    }
7192    /*
7193     * check the string is in the dictionnary, if yes use an interned
7194     * copy, otherwise we know it's not an acceptable input
7195     */
7196    input = xmlDictExists(ctxt->dict, str, len);
7197    if (input == NULL) {
7198        return(forbiddenExp);
7199    }
7200    return(xmlExpStringDeriveInt(ctxt, exp, input));
7201}
7202
7203static int
7204xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
7205    int ret = 1;
7206
7207    if (sub->c_max == -1) {
7208        if (exp->c_max != -1)
7209	    ret = 0;
7210    } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
7211        ret = 0;
7212    }
7213#if 0
7214    if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
7215        ret = 0;
7216#endif
7217    return(ret);
7218}
7219
7220static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
7221                                        xmlExpNodePtr sub);
7222/**
7223 * xmlExpDivide:
7224 * @ctxt: the expressions context
7225 * @exp: the englobing expression
7226 * @sub: the subexpression
7227 * @mult: the multiple expression
7228 * @remain: the remain from the derivation of the multiple
7229 *
7230 * Check if exp is a multiple of sub, i.e. if there is a finite number n
7231 * so that sub{n} subsume exp
7232 *
7233 * Returns the multiple value if successful, 0 if it is not a multiple
7234 *         and -1 in case of internel error.
7235 */
7236
7237static int
7238xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
7239             xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
7240    int i;
7241    xmlExpNodePtr tmp, tmp2;
7242
7243    if (mult != NULL) *mult = NULL;
7244    if (remain != NULL) *remain = NULL;
7245    if (exp->c_max == -1) return(0);
7246    if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
7247
7248    for (i = 1;i <= exp->c_max;i++) {
7249        sub->ref++;
7250        tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7251				 sub, NULL, NULL, i, i);
7252	if (tmp == NULL) {
7253	    return(-1);
7254	}
7255	if (!xmlExpCheckCard(tmp, exp)) {
7256	    xmlExpFree(ctxt, tmp);
7257	    continue;
7258	}
7259	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
7260	if (tmp2 == NULL) {
7261	    xmlExpFree(ctxt, tmp);
7262	    return(-1);
7263	}
7264	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
7265	    if (remain != NULL)
7266	        *remain = tmp2;
7267	    else
7268	        xmlExpFree(ctxt, tmp2);
7269	    if (mult != NULL)
7270	        *mult = tmp;
7271	    else
7272	        xmlExpFree(ctxt, tmp);
7273#ifdef DEBUG_DERIV
7274	    printf("Divide succeeded %d\n", i);
7275#endif
7276	    return(i);
7277	}
7278	xmlExpFree(ctxt, tmp);
7279	xmlExpFree(ctxt, tmp2);
7280    }
7281#ifdef DEBUG_DERIV
7282    printf("Divide failed\n");
7283#endif
7284    return(0);
7285}
7286
7287/**
7288 * xmlExpExpDeriveInt:
7289 * @ctxt: the expressions context
7290 * @exp: the englobing expression
7291 * @sub: the subexpression
7292 *
7293 * Try to do a step of Brzozowski derivation but at a higher level
7294 * the input being a subexpression.
7295 *
7296 * Returns the resulting expression or NULL in case of internal error
7297 */
7298static xmlExpNodePtr
7299xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7300    xmlExpNodePtr ret, tmp, tmp2, tmp3;
7301    const xmlChar **tab;
7302    int len, i;
7303
7304    /*
7305     * In case of equality and if the expression can only consume a finite
7306     * amount, then the derivation is empty
7307     */
7308    if ((exp == sub) && (exp->c_max >= 0)) {
7309#ifdef DEBUG_DERIV
7310        printf("Equal(exp, sub) and finite -> Empty\n");
7311#endif
7312        return(emptyExp);
7313    }
7314    /*
7315     * decompose sub sequence first
7316     */
7317    if (sub->type == XML_EXP_EMPTY) {
7318#ifdef DEBUG_DERIV
7319        printf("Empty(sub) -> Empty\n");
7320#endif
7321	exp->ref++;
7322        return(exp);
7323    }
7324    if (sub->type == XML_EXP_SEQ) {
7325#ifdef DEBUG_DERIV
7326        printf("Seq(sub) -> decompose\n");
7327#endif
7328        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7329	if (tmp == NULL)
7330	    return(NULL);
7331	if (tmp == forbiddenExp)
7332	    return(tmp);
7333	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
7334	xmlExpFree(ctxt, tmp);
7335	return(ret);
7336    }
7337    if (sub->type == XML_EXP_OR) {
7338#ifdef DEBUG_DERIV
7339        printf("Or(sub) -> decompose\n");
7340#endif
7341        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
7342	if (tmp == forbiddenExp)
7343	    return(tmp);
7344	if (tmp == NULL)
7345	    return(NULL);
7346	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
7347	if ((ret == NULL) || (ret == forbiddenExp)) {
7348	    xmlExpFree(ctxt, tmp);
7349	    return(ret);
7350	}
7351	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
7352    }
7353    if (!xmlExpCheckCard(exp, sub)) {
7354#ifdef DEBUG_DERIV
7355        printf("CheckCard(exp, sub) failed -> Forbid\n");
7356#endif
7357        return(forbiddenExp);
7358    }
7359    switch (exp->type) {
7360        case XML_EXP_EMPTY:
7361	    if (sub == emptyExp)
7362	        return(emptyExp);
7363#ifdef DEBUG_DERIV
7364	    printf("Empty(exp) -> Forbid\n");
7365#endif
7366	    return(forbiddenExp);
7367        case XML_EXP_FORBID:
7368#ifdef DEBUG_DERIV
7369	    printf("Forbid(exp) -> Forbid\n");
7370#endif
7371	    return(forbiddenExp);
7372        case XML_EXP_ATOM:
7373	    if (sub->type == XML_EXP_ATOM) {
7374	        /* TODO: handle wildcards */
7375	        if (exp->exp_str == sub->exp_str) {
7376#ifdef DEBUG_DERIV
7377		    printf("Atom match -> Empty\n");
7378#endif
7379		    return(emptyExp);
7380                }
7381#ifdef DEBUG_DERIV
7382		printf("Atom mismatch -> Forbid\n");
7383#endif
7384	        return(forbiddenExp);
7385	    }
7386	    if ((sub->type == XML_EXP_COUNT) &&
7387	        (sub->exp_max == 1) &&
7388	        (sub->exp_left->type == XML_EXP_ATOM)) {
7389	        /* TODO: handle wildcards */
7390	        if (exp->exp_str == sub->exp_left->exp_str) {
7391#ifdef DEBUG_DERIV
7392		    printf("Atom match -> Empty\n");
7393#endif
7394		    return(emptyExp);
7395		}
7396#ifdef DEBUG_DERIV
7397		printf("Atom mismatch -> Forbid\n");
7398#endif
7399	        return(forbiddenExp);
7400	    }
7401#ifdef DEBUG_DERIV
7402	    printf("Compex exp vs Atom -> Forbid\n");
7403#endif
7404	    return(forbiddenExp);
7405        case XML_EXP_SEQ:
7406	    /* try to get the sequence consumed only if possible */
7407	    if (xmlExpCheckCard(exp->exp_left, sub)) {
7408		/* See if the sequence can be consumed directly */
7409#ifdef DEBUG_DERIV
7410		printf("Seq trying left only\n");
7411#endif
7412		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7413		if ((ret != forbiddenExp) && (ret != NULL)) {
7414#ifdef DEBUG_DERIV
7415		    printf("Seq trying left only worked\n");
7416#endif
7417		    /*
7418		     * TODO: assumption here that we are determinist
7419		     *       i.e. we won't get to a nillable exp left
7420		     *       subset which could be matched by the right
7421		     *       part too.
7422		     * e.g.: (a | b)+,(a | c) and 'a+,a'
7423		     */
7424		    exp->exp_right->ref++;
7425		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7426					      exp->exp_right, NULL, 0, 0));
7427		}
7428#ifdef DEBUG_DERIV
7429	    } else {
7430		printf("Seq: left too short\n");
7431#endif
7432	    }
7433	    /* Try instead to decompose */
7434	    if (sub->type == XML_EXP_COUNT) {
7435		int min, max;
7436
7437#ifdef DEBUG_DERIV
7438		printf("Seq: sub is a count\n");
7439#endif
7440	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7441		if (ret == NULL)
7442		    return(NULL);
7443		if (ret != forbiddenExp) {
7444#ifdef DEBUG_DERIV
7445		    printf("Seq , Count match on left\n");
7446#endif
7447		    if (sub->exp_max < 0)
7448		        max = -1;
7449	            else
7450		        max = sub->exp_max -1;
7451		    if (sub->exp_min > 0)
7452		        min = sub->exp_min -1;
7453		    else
7454		        min = 0;
7455		    exp->exp_right->ref++;
7456		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
7457		                             exp->exp_right, NULL, 0, 0);
7458		    if (tmp == NULL)
7459		        return(NULL);
7460
7461		    sub->exp_left->ref++;
7462		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
7463				      sub->exp_left, NULL, NULL, min, max);
7464		    if (tmp2 == NULL) {
7465		        xmlExpFree(ctxt, tmp);
7466			return(NULL);
7467		    }
7468		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7469		    xmlExpFree(ctxt, tmp);
7470		    xmlExpFree(ctxt, tmp2);
7471		    return(ret);
7472		}
7473	    }
7474	    /* we made no progress on structured operations */
7475	    break;
7476        case XML_EXP_OR:
7477#ifdef DEBUG_DERIV
7478	    printf("Or , trying both side\n");
7479#endif
7480	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7481	    if (ret == NULL)
7482	        return(NULL);
7483	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
7484	    if (tmp == NULL) {
7485		xmlExpFree(ctxt, ret);
7486	        return(NULL);
7487	    }
7488	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
7489        case XML_EXP_COUNT: {
7490	    int min, max;
7491
7492	    if (sub->type == XML_EXP_COUNT) {
7493	        /*
7494		 * Try to see if the loop is completely subsumed
7495		 */
7496	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
7497		if (tmp == NULL)
7498		    return(NULL);
7499		if (tmp == forbiddenExp) {
7500		    int mult;
7501
7502#ifdef DEBUG_DERIV
7503		    printf("Count, Count inner don't subsume\n");
7504#endif
7505		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7506		                        NULL, &tmp);
7507		    if (mult <= 0) {
7508#ifdef DEBUG_DERIV
7509			printf("Count, Count not multiple => forbidden\n");
7510#endif
7511                        return(forbiddenExp);
7512		    }
7513		    if (sub->exp_max == -1) {
7514		        max = -1;
7515			if (exp->exp_max == -1) {
7516			    if (exp->exp_min <= sub->exp_min * mult)
7517			        min = 0;
7518			    else
7519			        min = exp->exp_min - sub->exp_min * mult;
7520			} else {
7521#ifdef DEBUG_DERIV
7522			    printf("Count, Count finite can't subsume infinite\n");
7523#endif
7524                            xmlExpFree(ctxt, tmp);
7525			    return(forbiddenExp);
7526			}
7527		    } else {
7528			if (exp->exp_max == -1) {
7529#ifdef DEBUG_DERIV
7530			    printf("Infinite loop consume mult finite loop\n");
7531#endif
7532			    if (exp->exp_min > sub->exp_min * mult) {
7533				max = -1;
7534				min = exp->exp_min - sub->exp_min * mult;
7535			    } else {
7536				max = -1;
7537				min = 0;
7538			    }
7539			} else {
7540			    if (exp->exp_max < sub->exp_max * mult) {
7541#ifdef DEBUG_DERIV
7542				printf("loops max mult mismatch => forbidden\n");
7543#endif
7544				xmlExpFree(ctxt, tmp);
7545				return(forbiddenExp);
7546			    }
7547			    if (sub->exp_max * mult > exp->exp_min)
7548				min = 0;
7549			    else
7550				min = exp->exp_min - sub->exp_max * mult;
7551			    max = exp->exp_max - sub->exp_max * mult;
7552			}
7553		    }
7554		} else if (!IS_NILLABLE(tmp)) {
7555		    /*
7556		     * TODO: loop here to try to grow if working on finite
7557		     *       blocks.
7558		     */
7559#ifdef DEBUG_DERIV
7560		    printf("Count, Count remain not nillable => forbidden\n");
7561#endif
7562		    xmlExpFree(ctxt, tmp);
7563		    return(forbiddenExp);
7564		} else if (sub->exp_max == -1) {
7565		    if (exp->exp_max == -1) {
7566		        if (exp->exp_min <= sub->exp_min) {
7567#ifdef DEBUG_DERIV
7568			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
7569#endif
7570                            max = -1;
7571			    min = 0;
7572			} else {
7573#ifdef DEBUG_DERIV
7574			    printf("Infinite loops min => Count(X,Inf)\n");
7575#endif
7576                            max = -1;
7577			    min = exp->exp_min - sub->exp_min;
7578			}
7579		    } else if (exp->exp_min > sub->exp_min) {
7580#ifdef DEBUG_DERIV
7581			printf("loops min mismatch 1 => forbidden ???\n");
7582#endif
7583		        xmlExpFree(ctxt, tmp);
7584		        return(forbiddenExp);
7585		    } else {
7586			max = -1;
7587			min = 0;
7588		    }
7589		} else {
7590		    if (exp->exp_max == -1) {
7591#ifdef DEBUG_DERIV
7592			printf("Infinite loop consume finite loop\n");
7593#endif
7594		        if (exp->exp_min > sub->exp_min) {
7595			    max = -1;
7596			    min = exp->exp_min - sub->exp_min;
7597			} else {
7598			    max = -1;
7599			    min = 0;
7600			}
7601		    } else {
7602		        if (exp->exp_max < sub->exp_max) {
7603#ifdef DEBUG_DERIV
7604			    printf("loops max mismatch => forbidden\n");
7605#endif
7606			    xmlExpFree(ctxt, tmp);
7607			    return(forbiddenExp);
7608			}
7609			if (sub->exp_max > exp->exp_min)
7610			    min = 0;
7611			else
7612			    min = exp->exp_min - sub->exp_max;
7613			max = exp->exp_max - sub->exp_max;
7614		    }
7615		}
7616#ifdef DEBUG_DERIV
7617		printf("loops match => SEQ(COUNT())\n");
7618#endif
7619		exp->exp_left->ref++;
7620		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7621		                          NULL, NULL, min, max);
7622		if (tmp2 == NULL) {
7623		    return(NULL);
7624		}
7625                ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7626		                         NULL, 0, 0);
7627		return(ret);
7628	    }
7629	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7630	    if (tmp == NULL)
7631		return(NULL);
7632	    if (tmp == forbiddenExp) {
7633#ifdef DEBUG_DERIV
7634		printf("loop mismatch => forbidden\n");
7635#endif
7636		return(forbiddenExp);
7637	    }
7638	    if (exp->exp_min > 0)
7639		min = exp->exp_min - 1;
7640	    else
7641		min = 0;
7642	    if (exp->exp_max < 0)
7643		max = -1;
7644	    else
7645		max = exp->exp_max - 1;
7646
7647#ifdef DEBUG_DERIV
7648	    printf("loop match => SEQ(COUNT())\n");
7649#endif
7650	    exp->exp_left->ref++;
7651	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7652				      NULL, NULL, min, max);
7653	    if (tmp2 == NULL)
7654		return(NULL);
7655	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7656				     NULL, 0, 0);
7657	    return(ret);
7658	}
7659    }
7660
7661#ifdef DEBUG_DERIV
7662    printf("Fallback to derivative\n");
7663#endif
7664    if (IS_NILLABLE(sub)) {
7665        if (!(IS_NILLABLE(exp)))
7666	    return(forbiddenExp);
7667	else
7668	    ret = emptyExp;
7669    } else
7670	ret = NULL;
7671    /*
7672     * here the structured derivation made no progress so
7673     * we use the default token based derivation to force one more step
7674     */
7675    if (ctxt->tabSize == 0)
7676        ctxt->tabSize = 40;
7677
7678    tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7679	                               sizeof(const xmlChar *));
7680    if (tab == NULL) {
7681	return(NULL);
7682    }
7683
7684    /*
7685     * collect all the strings accepted by the subexpression on input
7686     */
7687    len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7688    while (len < 0) {
7689        const xmlChar **temp;
7690	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7691	                                     sizeof(const xmlChar *));
7692	if (temp == NULL) {
7693	    xmlFree((xmlChar **) tab);
7694	    return(NULL);
7695	}
7696	tab = temp;
7697	ctxt->tabSize *= 2;
7698	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7699    }
7700    for (i = 0;i < len;i++) {
7701        tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7702	if ((tmp == NULL) || (tmp == forbiddenExp)) {
7703	    xmlExpFree(ctxt, ret);
7704	    xmlFree((xmlChar **) tab);
7705	    return(tmp);
7706	}
7707	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7708	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7709	    xmlExpFree(ctxt, tmp);
7710	    xmlExpFree(ctxt, ret);
7711	    xmlFree((xmlChar **) tab);
7712	    return(tmp);
7713	}
7714	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7715	xmlExpFree(ctxt, tmp);
7716	xmlExpFree(ctxt, tmp2);
7717
7718	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7719	    xmlExpFree(ctxt, ret);
7720	    xmlFree((xmlChar **) tab);
7721	    return(tmp3);
7722	}
7723
7724	if (ret == NULL)
7725	    ret = tmp3;
7726	else {
7727	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7728	    if (ret == NULL) {
7729		xmlFree((xmlChar **) tab);
7730	        return(NULL);
7731	    }
7732	}
7733    }
7734    xmlFree((xmlChar **) tab);
7735    return(ret);
7736}
7737
7738/**
7739 * xmlExpExpDerive:
7740 * @ctxt: the expressions context
7741 * @exp: the englobing expression
7742 * @sub: the subexpression
7743 *
7744 * Evaluates the expression resulting from @exp consuming a sub expression @sub
7745 * Based on algebraic derivation and sometimes direct Brzozowski derivation
7746 * it usually tatkes less than linear time and can handle expressions generating
7747 * infinite languages.
7748 *
7749 * Returns the resulting expression or NULL in case of internal error, the
7750 *         result must be freed
7751 */
7752xmlExpNodePtr
7753xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7754    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7755        return(NULL);
7756
7757    /*
7758     * O(1) speedups
7759     */
7760    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7761#ifdef DEBUG_DERIV
7762	printf("Sub nillable and not exp : can't subsume\n");
7763#endif
7764        return(forbiddenExp);
7765    }
7766    if (xmlExpCheckCard(exp, sub) == 0) {
7767#ifdef DEBUG_DERIV
7768	printf("sub generate longuer sequances than exp : can't subsume\n");
7769#endif
7770        return(forbiddenExp);
7771    }
7772    return(xmlExpExpDeriveInt(ctxt, exp, sub));
7773}
7774
7775/**
7776 * xmlExpSubsume:
7777 * @ctxt: the expressions context
7778 * @exp: the englobing expression
7779 * @sub: the subexpression
7780 *
7781 * Check whether @exp accepts all the languages accexpted by @sub
7782 * the input being a subexpression.
7783 *
7784 * Returns 1 if true 0 if false and -1 in case of failure.
7785 */
7786int
7787xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7788    xmlExpNodePtr tmp;
7789
7790    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7791        return(-1);
7792
7793    /*
7794     * TODO: speedup by checking the language of sub is a subset of the
7795     *       language of exp
7796     */
7797    /*
7798     * O(1) speedups
7799     */
7800    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7801#ifdef DEBUG_DERIV
7802	printf("Sub nillable and not exp : can't subsume\n");
7803#endif
7804        return(0);
7805    }
7806    if (xmlExpCheckCard(exp, sub) == 0) {
7807#ifdef DEBUG_DERIV
7808	printf("sub generate longuer sequances than exp : can't subsume\n");
7809#endif
7810        return(0);
7811    }
7812    tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7813#ifdef DEBUG_DERIV
7814    printf("Result derivation :\n");
7815    PRINT_EXP(tmp);
7816#endif
7817    if (tmp == NULL)
7818        return(-1);
7819    if (tmp == forbiddenExp)
7820	return(0);
7821    if (tmp == emptyExp)
7822	return(1);
7823    if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7824        xmlExpFree(ctxt, tmp);
7825        return(1);
7826    }
7827    xmlExpFree(ctxt, tmp);
7828    return(0);
7829}
7830
7831/************************************************************************
7832 *									*
7833 *			Parsing expression 				*
7834 *									*
7835 ************************************************************************/
7836
7837static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7838
7839#undef CUR
7840#define CUR (*ctxt->cur)
7841#undef NEXT
7842#define NEXT ctxt->cur++;
7843#undef IS_BLANK
7844#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7845#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7846
7847static int
7848xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7849    int ret = 0;
7850
7851    SKIP_BLANKS
7852    if (CUR == '*') {
7853	NEXT
7854	return(-1);
7855    }
7856    if ((CUR < '0') || (CUR > '9'))
7857        return(-1);
7858    while ((CUR >= '0') && (CUR <= '9')) {
7859        ret = ret * 10 + (CUR - '0');
7860	NEXT
7861    }
7862    return(ret);
7863}
7864
7865static xmlExpNodePtr
7866xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7867    const char *base;
7868    xmlExpNodePtr ret;
7869    const xmlChar *val;
7870
7871    SKIP_BLANKS
7872    base = ctxt->cur;
7873    if (*ctxt->cur == '(') {
7874        NEXT
7875	ret = xmlExpParseExpr(ctxt);
7876	SKIP_BLANKS
7877	if (*ctxt->cur != ')') {
7878	    fprintf(stderr, "unbalanced '(' : %s\n", base);
7879	    xmlExpFree(ctxt, ret);
7880	    return(NULL);
7881	}
7882	NEXT;
7883	SKIP_BLANKS
7884	goto parse_quantifier;
7885    }
7886    while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
7887           (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
7888	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
7889	NEXT;
7890    val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
7891    if (val == NULL)
7892        return(NULL);
7893    ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
7894    if (ret == NULL)
7895        return(NULL);
7896    SKIP_BLANKS
7897parse_quantifier:
7898    if (CUR == '{') {
7899        int min, max;
7900
7901        NEXT
7902	min = xmlExpParseNumber(ctxt);
7903	if (min < 0) {
7904	    xmlExpFree(ctxt, ret);
7905	    return(NULL);
7906	}
7907	SKIP_BLANKS
7908	if (CUR == ',') {
7909	    NEXT
7910	    max = xmlExpParseNumber(ctxt);
7911	    SKIP_BLANKS
7912	} else
7913	    max = min;
7914	if (CUR != '}') {
7915	    xmlExpFree(ctxt, ret);
7916	    return(NULL);
7917	}
7918        NEXT
7919	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7920	                         min, max);
7921	SKIP_BLANKS
7922    } else if (CUR == '?') {
7923        NEXT
7924	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7925	                         0, 1);
7926	SKIP_BLANKS
7927    } else if (CUR == '+') {
7928        NEXT
7929	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7930	                         1, -1);
7931	SKIP_BLANKS
7932    } else if (CUR == '*') {
7933        NEXT
7934	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7935	                         0, -1);
7936	SKIP_BLANKS
7937    }
7938    return(ret);
7939}
7940
7941
7942static xmlExpNodePtr
7943xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7944    xmlExpNodePtr ret, right;
7945
7946    ret = xmlExpParseOr(ctxt);
7947    SKIP_BLANKS
7948    while (CUR == '|') {
7949        NEXT
7950	right = xmlExpParseOr(ctxt);
7951	if (right == NULL) {
7952	    xmlExpFree(ctxt, ret);
7953	    return(NULL);
7954	}
7955	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
7956	if (ret == NULL)
7957	    return(NULL);
7958    }
7959    return(ret);
7960}
7961
7962static xmlExpNodePtr
7963xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
7964    xmlExpNodePtr ret, right;
7965
7966    ret = xmlExpParseSeq(ctxt);
7967    SKIP_BLANKS
7968    while (CUR == ',') {
7969        NEXT
7970	right = xmlExpParseSeq(ctxt);
7971	if (right == NULL) {
7972	    xmlExpFree(ctxt, ret);
7973	    return(NULL);
7974	}
7975	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
7976	if (ret == NULL)
7977	    return(NULL);
7978    }
7979    return(ret);
7980}
7981
7982/**
7983 * xmlExpParse:
7984 * @ctxt: the expressions context
7985 * @expr: the 0 terminated string
7986 *
7987 * Minimal parser for regexps, it understand the following constructs
7988 *  - string terminals
7989 *  - choice operator |
7990 *  - sequence operator ,
7991 *  - subexpressions (...)
7992 *  - usual cardinality operators + * and ?
7993 *  - finite sequences  { min, max }
7994 *  - infinite sequences { min, * }
7995 * There is minimal checkings made especially no checking on strings values
7996 *
7997 * Returns a new expression or NULL in case of failure
7998 */
7999xmlExpNodePtr
8000xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
8001    xmlExpNodePtr ret;
8002
8003    ctxt->expr = expr;
8004    ctxt->cur = expr;
8005
8006    ret = xmlExpParseExpr(ctxt);
8007    SKIP_BLANKS
8008    if (*ctxt->cur != 0) {
8009        xmlExpFree(ctxt, ret);
8010        return(NULL);
8011    }
8012    return(ret);
8013}
8014
8015static void
8016xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
8017    xmlExpNodePtr c;
8018
8019    if (expr == NULL) return;
8020    if (glob) xmlBufferWriteChar(buf, "(");
8021    switch (expr->type) {
8022        case XML_EXP_EMPTY:
8023	    xmlBufferWriteChar(buf, "empty");
8024	    break;
8025        case XML_EXP_FORBID:
8026	    xmlBufferWriteChar(buf, "forbidden");
8027	    break;
8028        case XML_EXP_ATOM:
8029	    xmlBufferWriteCHAR(buf, expr->exp_str);
8030	    break;
8031        case XML_EXP_SEQ:
8032	    c = expr->exp_left;
8033	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8034	        xmlExpDumpInt(buf, c, 1);
8035	    else
8036	        xmlExpDumpInt(buf, c, 0);
8037	    xmlBufferWriteChar(buf, " , ");
8038	    c = expr->exp_right;
8039	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8040	        xmlExpDumpInt(buf, c, 1);
8041	    else
8042	        xmlExpDumpInt(buf, c, 0);
8043            break;
8044        case XML_EXP_OR:
8045	    c = expr->exp_left;
8046	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8047	        xmlExpDumpInt(buf, c, 1);
8048	    else
8049	        xmlExpDumpInt(buf, c, 0);
8050	    xmlBufferWriteChar(buf, " | ");
8051	    c = expr->exp_right;
8052	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8053	        xmlExpDumpInt(buf, c, 1);
8054	    else
8055	        xmlExpDumpInt(buf, c, 0);
8056            break;
8057        case XML_EXP_COUNT: {
8058	    char rep[40];
8059
8060	    c = expr->exp_left;
8061	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
8062	        xmlExpDumpInt(buf, c, 1);
8063	    else
8064	        xmlExpDumpInt(buf, c, 0);
8065	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
8066		rep[0] = '?';
8067		rep[1] = 0;
8068	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
8069		rep[0] = '*';
8070		rep[1] = 0;
8071	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
8072		rep[0] = '+';
8073		rep[1] = 0;
8074	    } else if (expr->exp_max == expr->exp_min) {
8075	        snprintf(rep, 39, "{%d}", expr->exp_min);
8076	    } else if (expr->exp_max < 0) {
8077	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
8078	    } else {
8079	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
8080	    }
8081	    rep[39] = 0;
8082	    xmlBufferWriteChar(buf, rep);
8083	    break;
8084	}
8085	default:
8086	    fprintf(stderr, "Error in tree\n");
8087    }
8088    if (glob)
8089        xmlBufferWriteChar(buf, ")");
8090}
8091/**
8092 * xmlExpDump:
8093 * @buf:  a buffer to receive the output
8094 * @expr:  the compiled expression
8095 *
8096 * Serialize the expression as compiled to the buffer
8097 */
8098void
8099xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
8100    if ((buf == NULL) || (expr == NULL))
8101        return;
8102    xmlExpDumpInt(buf, expr, 0);
8103}
8104
8105/**
8106 * xmlExpMaxToken:
8107 * @expr: a compiled expression
8108 *
8109 * Indicate the maximum number of input a expression can accept
8110 *
8111 * Returns the maximum length or -1 in case of error
8112 */
8113int
8114xmlExpMaxToken(xmlExpNodePtr expr) {
8115    if (expr == NULL)
8116        return(-1);
8117    return(expr->c_max);
8118}
8119
8120/**
8121 * xmlExpCtxtNbNodes:
8122 * @ctxt: an expression context
8123 *
8124 * Debugging facility provides the number of allocated nodes at a that point
8125 *
8126 * Returns the number of nodes in use or -1 in case of error
8127 */
8128int
8129xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
8130    if (ctxt == NULL)
8131        return(-1);
8132    return(ctxt->nb_nodes);
8133}
8134
8135/**
8136 * xmlExpCtxtNbCons:
8137 * @ctxt: an expression context
8138 *
8139 * Debugging facility provides the number of allocated nodes over lifetime
8140 *
8141 * Returns the number of nodes ever allocated or -1 in case of error
8142 */
8143int
8144xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
8145    if (ctxt == NULL)
8146        return(-1);
8147    return(ctxt->nb_cons);
8148}
8149
8150#endif /* LIBXML_EXPR_ENABLED */
8151#define bottom_xmlregexp
8152#include "elfgcchack.h"
8153#endif /* LIBXML_REGEXP_ENABLED */
8154