xmlregexp.c revision aa62201290b6f3f3305f0fff504faf89c286a360
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications these include:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22/* #define DEBUG_ERR */
23
24#include <stdio.h>
25#include <string.h>
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <libxml/tree.h>
31#include <libxml/parserInternals.h>
32#include <libxml/xmlregexp.h>
33#include <libxml/xmlautomata.h>
34#include <libxml/xmlunicode.h>
35
36#ifndef INT_MAX
37#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38#endif
39
40/* #define DEBUG_REGEXP_GRAPH */
41/* #define DEBUG_REGEXP_EXEC */
42/* #define DEBUG_PUSH */
43/* #define DEBUG_COMPACTION */
44
45#define MAX_PUSH 10000000
46
47#define ERROR(str)							\
48    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
49    xmlRegexpErrCompile(ctxt, str);
50#define NEXT ctxt->cur++
51#define CUR (*(ctxt->cur))
52#define NXT(index) (ctxt->cur[index])
53
54#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
55#define NEXTL(l) ctxt->cur += l;
56#define XML_REG_STRING_SEPARATOR '|'
57
58/**
59 * TODO:
60 *
61 * macro to flag unimplemented blocks
62 */
63#define TODO 								\
64    xmlGenericError(xmlGenericErrorContext,				\
65	    "Unimplemented block at %s:%d\n",				\
66            __FILE__, __LINE__);
67
68/************************************************************************
69 * 									*
70 * 			Datatypes and structures			*
71 * 									*
72 ************************************************************************/
73
74typedef enum {
75    XML_REGEXP_EPSILON = 1,
76    XML_REGEXP_CHARVAL,
77    XML_REGEXP_RANGES,
78    XML_REGEXP_SUBREG,  /* used for () sub regexps */
79    XML_REGEXP_STRING,
80    XML_REGEXP_ANYCHAR, /* . */
81    XML_REGEXP_ANYSPACE, /* \s */
82    XML_REGEXP_NOTSPACE, /* \S */
83    XML_REGEXP_INITNAME, /* \l */
84    XML_REGEXP_NOTINITNAME, /* \L */
85    XML_REGEXP_NAMECHAR, /* \c */
86    XML_REGEXP_NOTNAMECHAR, /* \C */
87    XML_REGEXP_DECIMAL, /* \d */
88    XML_REGEXP_NOTDECIMAL, /* \D */
89    XML_REGEXP_REALCHAR, /* \w */
90    XML_REGEXP_NOTREALCHAR, /* \W */
91    XML_REGEXP_LETTER = 100,
92    XML_REGEXP_LETTER_UPPERCASE,
93    XML_REGEXP_LETTER_LOWERCASE,
94    XML_REGEXP_LETTER_TITLECASE,
95    XML_REGEXP_LETTER_MODIFIER,
96    XML_REGEXP_LETTER_OTHERS,
97    XML_REGEXP_MARK,
98    XML_REGEXP_MARK_NONSPACING,
99    XML_REGEXP_MARK_SPACECOMBINING,
100    XML_REGEXP_MARK_ENCLOSING,
101    XML_REGEXP_NUMBER,
102    XML_REGEXP_NUMBER_DECIMAL,
103    XML_REGEXP_NUMBER_LETTER,
104    XML_REGEXP_NUMBER_OTHERS,
105    XML_REGEXP_PUNCT,
106    XML_REGEXP_PUNCT_CONNECTOR,
107    XML_REGEXP_PUNCT_DASH,
108    XML_REGEXP_PUNCT_OPEN,
109    XML_REGEXP_PUNCT_CLOSE,
110    XML_REGEXP_PUNCT_INITQUOTE,
111    XML_REGEXP_PUNCT_FINQUOTE,
112    XML_REGEXP_PUNCT_OTHERS,
113    XML_REGEXP_SEPAR,
114    XML_REGEXP_SEPAR_SPACE,
115    XML_REGEXP_SEPAR_LINE,
116    XML_REGEXP_SEPAR_PARA,
117    XML_REGEXP_SYMBOL,
118    XML_REGEXP_SYMBOL_MATH,
119    XML_REGEXP_SYMBOL_CURRENCY,
120    XML_REGEXP_SYMBOL_MODIFIER,
121    XML_REGEXP_SYMBOL_OTHERS,
122    XML_REGEXP_OTHER,
123    XML_REGEXP_OTHER_CONTROL,
124    XML_REGEXP_OTHER_FORMAT,
125    XML_REGEXP_OTHER_PRIVATE,
126    XML_REGEXP_OTHER_NA,
127    XML_REGEXP_BLOCK_NAME
128} xmlRegAtomType;
129
130typedef enum {
131    XML_REGEXP_QUANT_EPSILON = 1,
132    XML_REGEXP_QUANT_ONCE,
133    XML_REGEXP_QUANT_OPT,
134    XML_REGEXP_QUANT_MULT,
135    XML_REGEXP_QUANT_PLUS,
136    XML_REGEXP_QUANT_ONCEONLY,
137    XML_REGEXP_QUANT_ALL,
138    XML_REGEXP_QUANT_RANGE
139} xmlRegQuantType;
140
141typedef enum {
142    XML_REGEXP_START_STATE = 1,
143    XML_REGEXP_FINAL_STATE,
144    XML_REGEXP_TRANS_STATE,
145    XML_REGEXP_SINK_STATE
146} xmlRegStateType;
147
148typedef enum {
149    XML_REGEXP_MARK_NORMAL = 0,
150    XML_REGEXP_MARK_START,
151    XML_REGEXP_MARK_VISITED
152} xmlRegMarkedType;
153
154typedef struct _xmlRegRange xmlRegRange;
155typedef xmlRegRange *xmlRegRangePtr;
156
157struct _xmlRegRange {
158    int neg;		/* 0 normal, 1 not, 2 exclude */
159    xmlRegAtomType type;
160    int start;
161    int end;
162    xmlChar *blockName;
163};
164
165typedef struct _xmlRegAtom xmlRegAtom;
166typedef xmlRegAtom *xmlRegAtomPtr;
167
168typedef struct _xmlAutomataState xmlRegState;
169typedef xmlRegState *xmlRegStatePtr;
170
171struct _xmlRegAtom {
172    int no;
173    xmlRegAtomType type;
174    xmlRegQuantType quant;
175    int min;
176    int max;
177
178    void *valuep;
179    void *valuep2;
180    int neg;
181    int codepoint;
182    xmlRegStatePtr start;
183    xmlRegStatePtr stop;
184    int maxRanges;
185    int nbRanges;
186    xmlRegRangePtr *ranges;
187    void *data;
188};
189
190typedef struct _xmlRegCounter xmlRegCounter;
191typedef xmlRegCounter *xmlRegCounterPtr;
192
193struct _xmlRegCounter {
194    int min;
195    int max;
196};
197
198typedef struct _xmlRegTrans xmlRegTrans;
199typedef xmlRegTrans *xmlRegTransPtr;
200
201struct _xmlRegTrans {
202    xmlRegAtomPtr atom;
203    int to;
204    int counter;
205    int count;
206    int nd;
207};
208
209struct _xmlAutomataState {
210    xmlRegStateType type;
211    xmlRegMarkedType mark;
212    xmlRegMarkedType reached;
213    int no;
214    int maxTrans;
215    int nbTrans;
216    xmlRegTrans *trans;
217    /*  knowing states ponting to us can speed things up */
218    int maxTransTo;
219    int nbTransTo;
220    int *transTo;
221};
222
223typedef struct _xmlAutomata xmlRegParserCtxt;
224typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
225
226struct _xmlAutomata {
227    xmlChar *string;
228    xmlChar *cur;
229
230    int error;
231    int neg;
232
233    xmlRegStatePtr start;
234    xmlRegStatePtr end;
235    xmlRegStatePtr state;
236
237    xmlRegAtomPtr atom;
238
239    int maxAtoms;
240    int nbAtoms;
241    xmlRegAtomPtr *atoms;
242
243    int maxStates;
244    int nbStates;
245    xmlRegStatePtr *states;
246
247    int maxCounters;
248    int nbCounters;
249    xmlRegCounter *counters;
250
251    int determinist;
252    int negs;
253};
254
255struct _xmlRegexp {
256    xmlChar *string;
257    int nbStates;
258    xmlRegStatePtr *states;
259    int nbAtoms;
260    xmlRegAtomPtr *atoms;
261    int nbCounters;
262    xmlRegCounter *counters;
263    int determinist;
264    /*
265     * That's the compact form for determinists automatas
266     */
267    int nbstates;
268    int *compact;
269    void **transdata;
270    int nbstrings;
271    xmlChar **stringMap;
272};
273
274typedef struct _xmlRegExecRollback xmlRegExecRollback;
275typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
276
277struct _xmlRegExecRollback {
278    xmlRegStatePtr state;/* the current state */
279    int index;		/* the index in the input stack */
280    int nextbranch;	/* the next transition to explore in that state */
281    int *counts;	/* save the automata state if it has some */
282};
283
284typedef struct _xmlRegInputToken xmlRegInputToken;
285typedef xmlRegInputToken *xmlRegInputTokenPtr;
286
287struct _xmlRegInputToken {
288    xmlChar *value;
289    void *data;
290};
291
292struct _xmlRegExecCtxt {
293    int status;		/* execution status != 0 indicate an error */
294    int determinist;	/* did we find an indeterministic behaviour */
295    xmlRegexpPtr comp;	/* the compiled regexp */
296    xmlRegExecCallbacks callback;
297    void *data;
298
299    xmlRegStatePtr state;/* the current state */
300    int transno;	/* the current transition on that state */
301    int transcount;	/* the number of chars in char counted transitions */
302
303    /*
304     * A stack of rollback states
305     */
306    int maxRollbacks;
307    int nbRollbacks;
308    xmlRegExecRollback *rollbacks;
309
310    /*
311     * The state of the automata if any
312     */
313    int *counts;
314
315    /*
316     * The input stack
317     */
318    int inputStackMax;
319    int inputStackNr;
320    int index;
321    int *charStack;
322    const xmlChar *inputString; /* when operating on characters */
323    xmlRegInputTokenPtr inputStack;/* when operating on strings */
324
325    /*
326     * error handling
327     */
328    int errStateNo;		/* the error state number */
329    xmlRegStatePtr errState;    /* the error state */
330    xmlChar *errString;		/* the string raising the error */
331    int *errCounts;		/* counters at the error state */
332    int nbPush;
333};
334
335#define REGEXP_ALL_COUNTER	0x123456
336#define REGEXP_ALL_LAX_COUNTER	0x123457
337
338static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
339static void xmlRegFreeState(xmlRegStatePtr state);
340static void xmlRegFreeAtom(xmlRegAtomPtr atom);
341static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
342static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
343static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
344                  int neg, int start, int end, const xmlChar *blockName);
345
346/************************************************************************
347 *									*
348 * 		Regexp memory error handler				*
349 *									*
350 ************************************************************************/
351/**
352 * xmlRegexpErrMemory:
353 * @extra:  extra information
354 *
355 * Handle an out of memory condition
356 */
357static void
358xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
359{
360    const char *regexp = NULL;
361    if (ctxt != NULL) {
362        regexp = (const char *) ctxt->string;
363	ctxt->error = XML_ERR_NO_MEMORY;
364    }
365    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
366		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
367		    regexp, NULL, 0, 0,
368		    "Memory allocation failed : %s\n", extra);
369}
370
371/**
372 * xmlRegexpErrCompile:
373 * @extra:  extra information
374 *
375 * Handle a compilation failure
376 */
377static void
378xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
379{
380    const char *regexp = NULL;
381    int idx = 0;
382
383    if (ctxt != NULL) {
384        regexp = (const char *) ctxt->string;
385	idx = ctxt->cur - ctxt->string;
386	ctxt->error = XML_REGEXP_COMPILE_ERROR;
387    }
388    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
389		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
390		    regexp, NULL, idx, 0,
391		    "failed to compile: %s\n", extra);
392}
393
394/************************************************************************
395 * 									*
396 * 			Allocation/Deallocation				*
397 * 									*
398 ************************************************************************/
399
400static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
401/**
402 * xmlRegEpxFromParse:
403 * @ctxt:  the parser context used to build it
404 *
405 * Allocate a new regexp and fill it with the result from the parser
406 *
407 * Returns the new regexp or NULL in case of error
408 */
409static xmlRegexpPtr
410xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
411    xmlRegexpPtr ret;
412
413    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
414    if (ret == NULL) {
415	xmlRegexpErrMemory(ctxt, "compiling regexp");
416	return(NULL);
417    }
418    memset(ret, 0, sizeof(xmlRegexp));
419    ret->string = ctxt->string;
420    ret->nbStates = ctxt->nbStates;
421    ret->states = ctxt->states;
422    ret->nbAtoms = ctxt->nbAtoms;
423    ret->atoms = ctxt->atoms;
424    ret->nbCounters = ctxt->nbCounters;
425    ret->counters = ctxt->counters;
426    ret->determinist = ctxt->determinist;
427    if (ret->determinist == -1) {
428        xmlRegexpIsDeterminist(ret);
429    }
430
431    if ((ret->determinist != 0) &&
432	(ret->nbCounters == 0) &&
433	(ctxt->negs == 0) &&
434	(ret->atoms != NULL) &&
435	(ret->atoms[0] != NULL) &&
436	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
437	int i, j, nbstates = 0, nbatoms = 0;
438	int *stateRemap;
439	int *stringRemap;
440	int *transitions;
441	void **transdata;
442	xmlChar **stringMap;
443        xmlChar *value;
444
445	/*
446	 * Switch to a compact representation
447	 * 1/ counting the effective number of states left
448	 * 2/ counting the unique number of atoms, and check that
449	 *    they are all of the string type
450	 * 3/ build a table state x atom for the transitions
451	 */
452
453	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
454	if (stateRemap == NULL) {
455	    xmlRegexpErrMemory(ctxt, "compiling regexp");
456	    xmlFree(ret);
457	    return(NULL);
458	}
459	for (i = 0;i < ret->nbStates;i++) {
460	    if (ret->states[i] != NULL) {
461		stateRemap[i] = nbstates;
462		nbstates++;
463	    } else {
464		stateRemap[i] = -1;
465	    }
466	}
467#ifdef DEBUG_COMPACTION
468	printf("Final: %d states\n", nbstates);
469#endif
470	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
471	if (stringMap == NULL) {
472	    xmlRegexpErrMemory(ctxt, "compiling regexp");
473	    xmlFree(stateRemap);
474	    xmlFree(ret);
475	    return(NULL);
476	}
477	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
478	if (stringRemap == NULL) {
479	    xmlRegexpErrMemory(ctxt, "compiling regexp");
480	    xmlFree(stringMap);
481	    xmlFree(stateRemap);
482	    xmlFree(ret);
483	    return(NULL);
484	}
485	for (i = 0;i < ret->nbAtoms;i++) {
486	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
487		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
488		value = ret->atoms[i]->valuep;
489                for (j = 0;j < nbatoms;j++) {
490		    if (xmlStrEqual(stringMap[j], value)) {
491			stringRemap[i] = j;
492			break;
493		    }
494		}
495		if (j >= nbatoms) {
496		    stringRemap[i] = nbatoms;
497		    stringMap[nbatoms] = xmlStrdup(value);
498		    if (stringMap[nbatoms] == NULL) {
499			for (i = 0;i < nbatoms;i++)
500			    xmlFree(stringMap[i]);
501			xmlFree(stringRemap);
502			xmlFree(stringMap);
503			xmlFree(stateRemap);
504			xmlFree(ret);
505			return(NULL);
506		    }
507		    nbatoms++;
508		}
509	    } else {
510		xmlFree(stateRemap);
511		xmlFree(stringRemap);
512		for (i = 0;i < nbatoms;i++)
513		    xmlFree(stringMap[i]);
514		xmlFree(stringMap);
515		xmlFree(ret);
516		return(NULL);
517	    }
518	}
519#ifdef DEBUG_COMPACTION
520	printf("Final: %d atoms\n", nbatoms);
521#endif
522	transitions = (int *) xmlMalloc((nbstates + 1) *
523	                                (nbatoms + 1) * sizeof(int));
524	if (transitions == NULL) {
525	    xmlFree(stateRemap);
526	    xmlFree(stringRemap);
527	    xmlFree(stringMap);
528	    xmlFree(ret);
529	    return(NULL);
530	}
531	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
532
533	/*
534	 * Allocate the transition table. The first entry for each
535	 * state corresponds to the state type.
536	 */
537	transdata = NULL;
538
539	for (i = 0;i < ret->nbStates;i++) {
540	    int stateno, atomno, targetno, prev;
541	    xmlRegStatePtr state;
542	    xmlRegTransPtr trans;
543
544	    stateno = stateRemap[i];
545	    if (stateno == -1)
546		continue;
547	    state = ret->states[i];
548
549	    transitions[stateno * (nbatoms + 1)] = state->type;
550
551	    for (j = 0;j < state->nbTrans;j++) {
552		trans = &(state->trans[j]);
553		if ((trans->to == -1) || (trans->atom == NULL))
554		    continue;
555                atomno = stringRemap[trans->atom->no];
556		if ((trans->atom->data != NULL) && (transdata == NULL)) {
557		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
558			                            sizeof(void *));
559		    if (transdata != NULL)
560			memset(transdata, 0,
561			       nbstates * nbatoms * sizeof(void *));
562		    else {
563			xmlRegexpErrMemory(ctxt, "compiling regexp");
564			break;
565		    }
566		}
567		targetno = stateRemap[trans->to];
568		/*
569		 * if the same atom can generate transitions to 2 different
570		 * states then it means the automata is not determinist and
571		 * the compact form can't be used !
572		 */
573		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
574		if (prev != 0) {
575		    if (prev != targetno + 1) {
576			ret->determinist = 0;
577#ifdef DEBUG_COMPACTION
578			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
579			       i, j, trans->atom->no, trans->to, atomno, targetno);
580			printf("       previous to is %d\n", prev);
581#endif
582			if (transdata != NULL)
583			    xmlFree(transdata);
584			xmlFree(transitions);
585			xmlFree(stateRemap);
586			xmlFree(stringRemap);
587			for (i = 0;i < nbatoms;i++)
588			    xmlFree(stringMap[i]);
589			xmlFree(stringMap);
590			goto not_determ;
591		    }
592		} else {
593#if 0
594		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
595			   i, j, trans->atom->no, trans->to, atomno, targetno);
596#endif
597		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
598			targetno + 1; /* to avoid 0 */
599		    if (transdata != NULL)
600			transdata[stateno * nbatoms + atomno] =
601			    trans->atom->data;
602		}
603	    }
604	}
605	ret->determinist = 1;
606#ifdef DEBUG_COMPACTION
607	/*
608	 * Debug
609	 */
610	for (i = 0;i < nbstates;i++) {
611	    for (j = 0;j < nbatoms + 1;j++) {
612                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
613	    }
614	    printf("\n");
615	}
616	printf("\n");
617#endif
618	/*
619	 * Cleanup of the old data
620	 */
621	if (ret->states != NULL) {
622	    for (i = 0;i < ret->nbStates;i++)
623		xmlRegFreeState(ret->states[i]);
624	    xmlFree(ret->states);
625	}
626	ret->states = NULL;
627	ret->nbStates = 0;
628	if (ret->atoms != NULL) {
629	    for (i = 0;i < ret->nbAtoms;i++)
630		xmlRegFreeAtom(ret->atoms[i]);
631	    xmlFree(ret->atoms);
632	}
633	ret->atoms = NULL;
634	ret->nbAtoms = 0;
635
636	ret->compact = transitions;
637	ret->transdata = transdata;
638	ret->stringMap = stringMap;
639	ret->nbstrings = nbatoms;
640	ret->nbstates = nbstates;
641	xmlFree(stateRemap);
642	xmlFree(stringRemap);
643    }
644not_determ:
645    ctxt->string = NULL;
646    ctxt->nbStates = 0;
647    ctxt->states = NULL;
648    ctxt->nbAtoms = 0;
649    ctxt->atoms = NULL;
650    ctxt->nbCounters = 0;
651    ctxt->counters = NULL;
652    return(ret);
653}
654
655/**
656 * xmlRegNewParserCtxt:
657 * @string:  the string to parse
658 *
659 * Allocate a new regexp parser context
660 *
661 * Returns the new context or NULL in case of error
662 */
663static xmlRegParserCtxtPtr
664xmlRegNewParserCtxt(const xmlChar *string) {
665    xmlRegParserCtxtPtr ret;
666
667    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
668    if (ret == NULL)
669	return(NULL);
670    memset(ret, 0, sizeof(xmlRegParserCtxt));
671    if (string != NULL)
672	ret->string = xmlStrdup(string);
673    ret->cur = ret->string;
674    ret->neg = 0;
675    ret->negs = 0;
676    ret->error = 0;
677    ret->determinist = -1;
678    return(ret);
679}
680
681/**
682 * xmlRegNewRange:
683 * @ctxt:  the regexp parser context
684 * @neg:  is that negative
685 * @type:  the type of range
686 * @start:  the start codepoint
687 * @end:  the end codepoint
688 *
689 * Allocate a new regexp range
690 *
691 * Returns the new range or NULL in case of error
692 */
693static xmlRegRangePtr
694xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
695	       int neg, xmlRegAtomType type, int start, int end) {
696    xmlRegRangePtr ret;
697
698    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
699    if (ret == NULL) {
700	xmlRegexpErrMemory(ctxt, "allocating range");
701	return(NULL);
702    }
703    ret->neg = neg;
704    ret->type = type;
705    ret->start = start;
706    ret->end = end;
707    return(ret);
708}
709
710/**
711 * xmlRegFreeRange:
712 * @range:  the regexp range
713 *
714 * Free a regexp range
715 */
716static void
717xmlRegFreeRange(xmlRegRangePtr range) {
718    if (range == NULL)
719	return;
720
721    if (range->blockName != NULL)
722	xmlFree(range->blockName);
723    xmlFree(range);
724}
725
726/**
727 * xmlRegNewAtom:
728 * @ctxt:  the regexp parser context
729 * @type:  the type of atom
730 *
731 * Allocate a new regexp range
732 *
733 * Returns the new atom or NULL in case of error
734 */
735static xmlRegAtomPtr
736xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
737    xmlRegAtomPtr ret;
738
739    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
740    if (ret == NULL) {
741	xmlRegexpErrMemory(ctxt, "allocating atom");
742	return(NULL);
743    }
744    memset(ret, 0, sizeof(xmlRegAtom));
745    ret->type = type;
746    ret->quant = XML_REGEXP_QUANT_ONCE;
747    ret->min = 0;
748    ret->max = 0;
749    return(ret);
750}
751
752/**
753 * xmlRegFreeAtom:
754 * @atom:  the regexp atom
755 *
756 * Free a regexp atom
757 */
758static void
759xmlRegFreeAtom(xmlRegAtomPtr atom) {
760    int i;
761
762    if (atom == NULL)
763	return;
764
765    for (i = 0;i < atom->nbRanges;i++)
766	xmlRegFreeRange(atom->ranges[i]);
767    if (atom->ranges != NULL)
768	xmlFree(atom->ranges);
769    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
770	xmlFree(atom->valuep);
771    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
772	xmlFree(atom->valuep2);
773    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
774	xmlFree(atom->valuep);
775    xmlFree(atom);
776}
777
778static xmlRegStatePtr
779xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
780    xmlRegStatePtr ret;
781
782    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
783    if (ret == NULL) {
784	xmlRegexpErrMemory(ctxt, "allocating state");
785	return(NULL);
786    }
787    memset(ret, 0, sizeof(xmlRegState));
788    ret->type = XML_REGEXP_TRANS_STATE;
789    ret->mark = XML_REGEXP_MARK_NORMAL;
790    return(ret);
791}
792
793/**
794 * xmlRegFreeState:
795 * @state:  the regexp state
796 *
797 * Free a regexp state
798 */
799static void
800xmlRegFreeState(xmlRegStatePtr state) {
801    if (state == NULL)
802	return;
803
804    if (state->trans != NULL)
805	xmlFree(state->trans);
806    if (state->transTo != NULL)
807	xmlFree(state->transTo);
808    xmlFree(state);
809}
810
811/**
812 * xmlRegFreeParserCtxt:
813 * @ctxt:  the regexp parser context
814 *
815 * Free a regexp parser context
816 */
817static void
818xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
819    int i;
820    if (ctxt == NULL)
821	return;
822
823    if (ctxt->string != NULL)
824	xmlFree(ctxt->string);
825    if (ctxt->states != NULL) {
826	for (i = 0;i < ctxt->nbStates;i++)
827	    xmlRegFreeState(ctxt->states[i]);
828	xmlFree(ctxt->states);
829    }
830    if (ctxt->atoms != NULL) {
831	for (i = 0;i < ctxt->nbAtoms;i++)
832	    xmlRegFreeAtom(ctxt->atoms[i]);
833	xmlFree(ctxt->atoms);
834    }
835    if (ctxt->counters != NULL)
836	xmlFree(ctxt->counters);
837    xmlFree(ctxt);
838}
839
840/************************************************************************
841 * 									*
842 * 			Display of Data structures			*
843 * 									*
844 ************************************************************************/
845
846static void
847xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
848    switch (type) {
849        case XML_REGEXP_EPSILON:
850	    fprintf(output, "epsilon "); break;
851        case XML_REGEXP_CHARVAL:
852	    fprintf(output, "charval "); break;
853        case XML_REGEXP_RANGES:
854	    fprintf(output, "ranges "); break;
855        case XML_REGEXP_SUBREG:
856	    fprintf(output, "subexpr "); break;
857        case XML_REGEXP_STRING:
858	    fprintf(output, "string "); break;
859        case XML_REGEXP_ANYCHAR:
860	    fprintf(output, "anychar "); break;
861        case XML_REGEXP_ANYSPACE:
862	    fprintf(output, "anyspace "); break;
863        case XML_REGEXP_NOTSPACE:
864	    fprintf(output, "notspace "); break;
865        case XML_REGEXP_INITNAME:
866	    fprintf(output, "initname "); break;
867        case XML_REGEXP_NOTINITNAME:
868	    fprintf(output, "notinitname "); break;
869        case XML_REGEXP_NAMECHAR:
870	    fprintf(output, "namechar "); break;
871        case XML_REGEXP_NOTNAMECHAR:
872	    fprintf(output, "notnamechar "); break;
873        case XML_REGEXP_DECIMAL:
874	    fprintf(output, "decimal "); break;
875        case XML_REGEXP_NOTDECIMAL:
876	    fprintf(output, "notdecimal "); break;
877        case XML_REGEXP_REALCHAR:
878	    fprintf(output, "realchar "); break;
879        case XML_REGEXP_NOTREALCHAR:
880	    fprintf(output, "notrealchar "); break;
881        case XML_REGEXP_LETTER:
882            fprintf(output, "LETTER "); break;
883        case XML_REGEXP_LETTER_UPPERCASE:
884            fprintf(output, "LETTER_UPPERCASE "); break;
885        case XML_REGEXP_LETTER_LOWERCASE:
886            fprintf(output, "LETTER_LOWERCASE "); break;
887        case XML_REGEXP_LETTER_TITLECASE:
888            fprintf(output, "LETTER_TITLECASE "); break;
889        case XML_REGEXP_LETTER_MODIFIER:
890            fprintf(output, "LETTER_MODIFIER "); break;
891        case XML_REGEXP_LETTER_OTHERS:
892            fprintf(output, "LETTER_OTHERS "); break;
893        case XML_REGEXP_MARK:
894            fprintf(output, "MARK "); break;
895        case XML_REGEXP_MARK_NONSPACING:
896            fprintf(output, "MARK_NONSPACING "); break;
897        case XML_REGEXP_MARK_SPACECOMBINING:
898            fprintf(output, "MARK_SPACECOMBINING "); break;
899        case XML_REGEXP_MARK_ENCLOSING:
900            fprintf(output, "MARK_ENCLOSING "); break;
901        case XML_REGEXP_NUMBER:
902            fprintf(output, "NUMBER "); break;
903        case XML_REGEXP_NUMBER_DECIMAL:
904            fprintf(output, "NUMBER_DECIMAL "); break;
905        case XML_REGEXP_NUMBER_LETTER:
906            fprintf(output, "NUMBER_LETTER "); break;
907        case XML_REGEXP_NUMBER_OTHERS:
908            fprintf(output, "NUMBER_OTHERS "); break;
909        case XML_REGEXP_PUNCT:
910            fprintf(output, "PUNCT "); break;
911        case XML_REGEXP_PUNCT_CONNECTOR:
912            fprintf(output, "PUNCT_CONNECTOR "); break;
913        case XML_REGEXP_PUNCT_DASH:
914            fprintf(output, "PUNCT_DASH "); break;
915        case XML_REGEXP_PUNCT_OPEN:
916            fprintf(output, "PUNCT_OPEN "); break;
917        case XML_REGEXP_PUNCT_CLOSE:
918            fprintf(output, "PUNCT_CLOSE "); break;
919        case XML_REGEXP_PUNCT_INITQUOTE:
920            fprintf(output, "PUNCT_INITQUOTE "); break;
921        case XML_REGEXP_PUNCT_FINQUOTE:
922            fprintf(output, "PUNCT_FINQUOTE "); break;
923        case XML_REGEXP_PUNCT_OTHERS:
924            fprintf(output, "PUNCT_OTHERS "); break;
925        case XML_REGEXP_SEPAR:
926            fprintf(output, "SEPAR "); break;
927        case XML_REGEXP_SEPAR_SPACE:
928            fprintf(output, "SEPAR_SPACE "); break;
929        case XML_REGEXP_SEPAR_LINE:
930            fprintf(output, "SEPAR_LINE "); break;
931        case XML_REGEXP_SEPAR_PARA:
932            fprintf(output, "SEPAR_PARA "); break;
933        case XML_REGEXP_SYMBOL:
934            fprintf(output, "SYMBOL "); break;
935        case XML_REGEXP_SYMBOL_MATH:
936            fprintf(output, "SYMBOL_MATH "); break;
937        case XML_REGEXP_SYMBOL_CURRENCY:
938            fprintf(output, "SYMBOL_CURRENCY "); break;
939        case XML_REGEXP_SYMBOL_MODIFIER:
940            fprintf(output, "SYMBOL_MODIFIER "); break;
941        case XML_REGEXP_SYMBOL_OTHERS:
942            fprintf(output, "SYMBOL_OTHERS "); break;
943        case XML_REGEXP_OTHER:
944            fprintf(output, "OTHER "); break;
945        case XML_REGEXP_OTHER_CONTROL:
946            fprintf(output, "OTHER_CONTROL "); break;
947        case XML_REGEXP_OTHER_FORMAT:
948            fprintf(output, "OTHER_FORMAT "); break;
949        case XML_REGEXP_OTHER_PRIVATE:
950            fprintf(output, "OTHER_PRIVATE "); break;
951        case XML_REGEXP_OTHER_NA:
952            fprintf(output, "OTHER_NA "); break;
953        case XML_REGEXP_BLOCK_NAME:
954	    fprintf(output, "BLOCK "); break;
955    }
956}
957
958static void
959xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
960    switch (type) {
961        case XML_REGEXP_QUANT_EPSILON:
962	    fprintf(output, "epsilon "); break;
963        case XML_REGEXP_QUANT_ONCE:
964	    fprintf(output, "once "); break;
965        case XML_REGEXP_QUANT_OPT:
966	    fprintf(output, "? "); break;
967        case XML_REGEXP_QUANT_MULT:
968	    fprintf(output, "* "); break;
969        case XML_REGEXP_QUANT_PLUS:
970	    fprintf(output, "+ "); break;
971	case XML_REGEXP_QUANT_RANGE:
972	    fprintf(output, "range "); break;
973	case XML_REGEXP_QUANT_ONCEONLY:
974	    fprintf(output, "onceonly "); break;
975	case XML_REGEXP_QUANT_ALL:
976	    fprintf(output, "all "); break;
977    }
978}
979static void
980xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
981    fprintf(output, "  range: ");
982    if (range->neg)
983	fprintf(output, "negative ");
984    xmlRegPrintAtomType(output, range->type);
985    fprintf(output, "%c - %c\n", range->start, range->end);
986}
987
988static void
989xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
990    fprintf(output, " atom: ");
991    if (atom == NULL) {
992	fprintf(output, "NULL\n");
993	return;
994    }
995    if (atom->neg)
996        fprintf(output, "not ");
997    xmlRegPrintAtomType(output, atom->type);
998    xmlRegPrintQuantType(output, atom->quant);
999    if (atom->quant == XML_REGEXP_QUANT_RANGE)
1000	fprintf(output, "%d-%d ", atom->min, atom->max);
1001    if (atom->type == XML_REGEXP_STRING)
1002	fprintf(output, "'%s' ", (char *) atom->valuep);
1003    if (atom->type == XML_REGEXP_CHARVAL)
1004	fprintf(output, "char %c\n", atom->codepoint);
1005    else if (atom->type == XML_REGEXP_RANGES) {
1006	int i;
1007	fprintf(output, "%d entries\n", atom->nbRanges);
1008	for (i = 0; i < atom->nbRanges;i++)
1009	    xmlRegPrintRange(output, atom->ranges[i]);
1010    } else if (atom->type == XML_REGEXP_SUBREG) {
1011	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1012    } else {
1013	fprintf(output, "\n");
1014    }
1015}
1016
1017static void
1018xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1019    fprintf(output, "  trans: ");
1020    if (trans == NULL) {
1021	fprintf(output, "NULL\n");
1022	return;
1023    }
1024    if (trans->to < 0) {
1025	fprintf(output, "removed\n");
1026	return;
1027    }
1028    if (trans->nd != 0) {
1029	if (trans->nd == 2)
1030	    fprintf(output, "last not determinist, ");
1031	else
1032	    fprintf(output, "not determinist, ");
1033    }
1034    if (trans->counter >= 0) {
1035	fprintf(output, "counted %d, ", trans->counter);
1036    }
1037    if (trans->count == REGEXP_ALL_COUNTER) {
1038	fprintf(output, "all transition, ");
1039    } else if (trans->count >= 0) {
1040	fprintf(output, "count based %d, ", trans->count);
1041    }
1042    if (trans->atom == NULL) {
1043	fprintf(output, "epsilon to %d\n", trans->to);
1044	return;
1045    }
1046    if (trans->atom->type == XML_REGEXP_CHARVAL)
1047	fprintf(output, "char %c ", trans->atom->codepoint);
1048    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1049}
1050
1051static void
1052xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1053    int i;
1054
1055    fprintf(output, " state: ");
1056    if (state == NULL) {
1057	fprintf(output, "NULL\n");
1058	return;
1059    }
1060    if (state->type == XML_REGEXP_START_STATE)
1061	fprintf(output, "START ");
1062    if (state->type == XML_REGEXP_FINAL_STATE)
1063	fprintf(output, "FINAL ");
1064
1065    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1066    for (i = 0;i < state->nbTrans; i++) {
1067	xmlRegPrintTrans(output, &(state->trans[i]));
1068    }
1069}
1070
1071#ifdef DEBUG_REGEXP_GRAPH
1072static void
1073xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1074    int i;
1075
1076    fprintf(output, " ctxt: ");
1077    if (ctxt == NULL) {
1078	fprintf(output, "NULL\n");
1079	return;
1080    }
1081    fprintf(output, "'%s' ", ctxt->string);
1082    if (ctxt->error)
1083	fprintf(output, "error ");
1084    if (ctxt->neg)
1085	fprintf(output, "neg ");
1086    fprintf(output, "\n");
1087    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1088    for (i = 0;i < ctxt->nbAtoms; i++) {
1089	fprintf(output, " %02d ", i);
1090	xmlRegPrintAtom(output, ctxt->atoms[i]);
1091    }
1092    if (ctxt->atom != NULL) {
1093	fprintf(output, "current atom:\n");
1094	xmlRegPrintAtom(output, ctxt->atom);
1095    }
1096    fprintf(output, "%d states:", ctxt->nbStates);
1097    if (ctxt->start != NULL)
1098	fprintf(output, " start: %d", ctxt->start->no);
1099    if (ctxt->end != NULL)
1100	fprintf(output, " end: %d", ctxt->end->no);
1101    fprintf(output, "\n");
1102    for (i = 0;i < ctxt->nbStates; i++) {
1103	xmlRegPrintState(output, ctxt->states[i]);
1104    }
1105    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1106    for (i = 0;i < ctxt->nbCounters; i++) {
1107	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1108		                                ctxt->counters[i].max);
1109    }
1110}
1111#endif
1112
1113/************************************************************************
1114 * 									*
1115 *		 Finite Automata structures manipulations		*
1116 * 									*
1117 ************************************************************************/
1118
1119static void
1120xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1121	           int neg, xmlRegAtomType type, int start, int end,
1122		   xmlChar *blockName) {
1123    xmlRegRangePtr range;
1124
1125    if (atom == NULL) {
1126	ERROR("add range: atom is NULL");
1127	return;
1128    }
1129    if (atom->type != XML_REGEXP_RANGES) {
1130	ERROR("add range: atom is not ranges");
1131	return;
1132    }
1133    if (atom->maxRanges == 0) {
1134	atom->maxRanges = 4;
1135	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1136		                             sizeof(xmlRegRangePtr));
1137	if (atom->ranges == NULL) {
1138	    xmlRegexpErrMemory(ctxt, "adding ranges");
1139	    atom->maxRanges = 0;
1140	    return;
1141	}
1142    } else if (atom->nbRanges >= atom->maxRanges) {
1143	xmlRegRangePtr *tmp;
1144	atom->maxRanges *= 2;
1145	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1146		                             sizeof(xmlRegRangePtr));
1147	if (tmp == NULL) {
1148	    xmlRegexpErrMemory(ctxt, "adding ranges");
1149	    atom->maxRanges /= 2;
1150	    return;
1151	}
1152	atom->ranges = tmp;
1153    }
1154    range = xmlRegNewRange(ctxt, neg, type, start, end);
1155    if (range == NULL)
1156	return;
1157    range->blockName = blockName;
1158    atom->ranges[atom->nbRanges++] = range;
1159
1160}
1161
1162static int
1163xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1164    if (ctxt->maxCounters == 0) {
1165	ctxt->maxCounters = 4;
1166	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1167		                             sizeof(xmlRegCounter));
1168	if (ctxt->counters == NULL) {
1169	    xmlRegexpErrMemory(ctxt, "allocating counter");
1170	    ctxt->maxCounters = 0;
1171	    return(-1);
1172	}
1173    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1174	xmlRegCounter *tmp;
1175	ctxt->maxCounters *= 2;
1176	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1177		                           sizeof(xmlRegCounter));
1178	if (tmp == NULL) {
1179	    xmlRegexpErrMemory(ctxt, "allocating counter");
1180	    ctxt->maxCounters /= 2;
1181	    return(-1);
1182	}
1183	ctxt->counters = tmp;
1184    }
1185    ctxt->counters[ctxt->nbCounters].min = -1;
1186    ctxt->counters[ctxt->nbCounters].max = -1;
1187    return(ctxt->nbCounters++);
1188}
1189
1190static int
1191xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1192    if (atom == NULL) {
1193	ERROR("atom push: atom is NULL");
1194	return(-1);
1195    }
1196    if (ctxt->maxAtoms == 0) {
1197	ctxt->maxAtoms = 4;
1198	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1199		                             sizeof(xmlRegAtomPtr));
1200	if (ctxt->atoms == NULL) {
1201	    xmlRegexpErrMemory(ctxt, "pushing atom");
1202	    ctxt->maxAtoms = 0;
1203	    return(-1);
1204	}
1205    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1206	xmlRegAtomPtr *tmp;
1207	ctxt->maxAtoms *= 2;
1208	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1209		                             sizeof(xmlRegAtomPtr));
1210	if (tmp == NULL) {
1211	    xmlRegexpErrMemory(ctxt, "allocating counter");
1212	    ctxt->maxAtoms /= 2;
1213	    return(-1);
1214	}
1215	ctxt->atoms = tmp;
1216    }
1217    atom->no = ctxt->nbAtoms;
1218    ctxt->atoms[ctxt->nbAtoms++] = atom;
1219    return(0);
1220}
1221
1222static void
1223xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1224                      int from) {
1225    if (target->maxTransTo == 0) {
1226	target->maxTransTo = 8;
1227	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1228		                             sizeof(int));
1229	if (target->transTo == NULL) {
1230	    xmlRegexpErrMemory(ctxt, "adding transition");
1231	    target->maxTransTo = 0;
1232	    return;
1233	}
1234    } else if (target->nbTransTo >= target->maxTransTo) {
1235	int *tmp;
1236	target->maxTransTo *= 2;
1237	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1238		                             sizeof(int));
1239	if (tmp == NULL) {
1240	    xmlRegexpErrMemory(ctxt, "adding transition");
1241	    target->maxTransTo /= 2;
1242	    return;
1243	}
1244	target->transTo = tmp;
1245    }
1246    target->transTo[target->nbTransTo] = from;
1247    target->nbTransTo++;
1248}
1249
1250static void
1251xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1252	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1253		    int counter, int count) {
1254
1255    int nrtrans;
1256
1257    if (state == NULL) {
1258	ERROR("add state: state is NULL");
1259	return;
1260    }
1261    if (target == NULL) {
1262	ERROR("add state: target is NULL");
1263	return;
1264    }
1265    /*
1266     * Other routines follow the philosophy 'When in doubt, add a transition'
1267     * so we check here whether such a transition is already present and, if
1268     * so, silently ignore this request.
1269     */
1270
1271    for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1272	xmlRegTransPtr trans = &(state->trans[nrtrans]);
1273	if ((trans->atom == atom) &&
1274	    (trans->to == target->no) &&
1275	    (trans->counter == counter) &&
1276	    (trans->count == count)) {
1277#ifdef DEBUG_REGEXP_GRAPH
1278	    printf("Ignoring duplicate transition from %d to %d\n",
1279		    state->no, target->no);
1280#endif
1281	    return;
1282	}
1283    }
1284
1285    if (state->maxTrans == 0) {
1286	state->maxTrans = 8;
1287	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1288		                             sizeof(xmlRegTrans));
1289	if (state->trans == NULL) {
1290	    xmlRegexpErrMemory(ctxt, "adding transition");
1291	    state->maxTrans = 0;
1292	    return;
1293	}
1294    } else if (state->nbTrans >= state->maxTrans) {
1295	xmlRegTrans *tmp;
1296	state->maxTrans *= 2;
1297	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1298		                             sizeof(xmlRegTrans));
1299	if (tmp == NULL) {
1300	    xmlRegexpErrMemory(ctxt, "adding transition");
1301	    state->maxTrans /= 2;
1302	    return;
1303	}
1304	state->trans = tmp;
1305    }
1306#ifdef DEBUG_REGEXP_GRAPH
1307    printf("Add trans from %d to %d ", state->no, target->no);
1308    if (count == REGEXP_ALL_COUNTER)
1309	printf("all transition\n");
1310    else if (count >= 0)
1311	printf("count based %d\n", count);
1312    else if (counter >= 0)
1313	printf("counted %d\n", counter);
1314    else if (atom == NULL)
1315	printf("epsilon transition\n");
1316    else if (atom != NULL)
1317        xmlRegPrintAtom(stdout, atom);
1318#endif
1319
1320    state->trans[state->nbTrans].atom = atom;
1321    state->trans[state->nbTrans].to = target->no;
1322    state->trans[state->nbTrans].counter = counter;
1323    state->trans[state->nbTrans].count = count;
1324    state->trans[state->nbTrans].nd = 0;
1325    state->nbTrans++;
1326    xmlRegStateAddTransTo(ctxt, target, state->no);
1327}
1328
1329static int
1330xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1331    if (state == NULL) return(-1);
1332    if (ctxt->maxStates == 0) {
1333	ctxt->maxStates = 4;
1334	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1335		                             sizeof(xmlRegStatePtr));
1336	if (ctxt->states == NULL) {
1337	    xmlRegexpErrMemory(ctxt, "adding state");
1338	    ctxt->maxStates = 0;
1339	    return(-1);
1340	}
1341    } else if (ctxt->nbStates >= ctxt->maxStates) {
1342	xmlRegStatePtr *tmp;
1343	ctxt->maxStates *= 2;
1344	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1345		                             sizeof(xmlRegStatePtr));
1346	if (tmp == NULL) {
1347	    xmlRegexpErrMemory(ctxt, "adding state");
1348	    ctxt->maxStates /= 2;
1349	    return(-1);
1350	}
1351	ctxt->states = tmp;
1352    }
1353    state->no = ctxt->nbStates;
1354    ctxt->states[ctxt->nbStates++] = state;
1355    return(0);
1356}
1357
1358/**
1359 * xmlFAGenerateAllTransition:
1360 * @ctxt:  a regexp parser context
1361 * @from:  the from state
1362 * @to:  the target state or NULL for building a new one
1363 * @lax:
1364 *
1365 */
1366static void
1367xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1368			   xmlRegStatePtr from, xmlRegStatePtr to,
1369			   int lax) {
1370    if (to == NULL) {
1371	to = xmlRegNewState(ctxt);
1372	xmlRegStatePush(ctxt, to);
1373	ctxt->state = to;
1374    }
1375    if (lax)
1376	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1377    else
1378	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1379}
1380
1381/**
1382 * xmlFAGenerateEpsilonTransition:
1383 * @ctxt:  a regexp parser context
1384 * @from:  the from state
1385 * @to:  the target state or NULL for building a new one
1386 *
1387 */
1388static void
1389xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1390			       xmlRegStatePtr from, xmlRegStatePtr to) {
1391    if (to == NULL) {
1392	to = xmlRegNewState(ctxt);
1393	xmlRegStatePush(ctxt, to);
1394	ctxt->state = to;
1395    }
1396    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1397}
1398
1399/**
1400 * xmlFAGenerateCountedEpsilonTransition:
1401 * @ctxt:  a regexp parser context
1402 * @from:  the from state
1403 * @to:  the target state or NULL for building a new one
1404 * counter:  the counter for that transition
1405 *
1406 */
1407static void
1408xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1409	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1410    if (to == NULL) {
1411	to = xmlRegNewState(ctxt);
1412	xmlRegStatePush(ctxt, to);
1413	ctxt->state = to;
1414    }
1415    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1416}
1417
1418/**
1419 * xmlFAGenerateCountedTransition:
1420 * @ctxt:  a regexp parser context
1421 * @from:  the from state
1422 * @to:  the target state or NULL for building a new one
1423 * counter:  the counter for that transition
1424 *
1425 */
1426static void
1427xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1428	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1429    if (to == NULL) {
1430	to = xmlRegNewState(ctxt);
1431	xmlRegStatePush(ctxt, to);
1432	ctxt->state = to;
1433    }
1434    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1435}
1436
1437/**
1438 * xmlFAGenerateTransitions:
1439 * @ctxt:  a regexp parser context
1440 * @from:  the from state
1441 * @to:  the target state or NULL for building a new one
1442 * @atom:  the atom generating the transition
1443 *
1444 * Returns 0 if success and -1 in case of error.
1445 */
1446static int
1447xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1448	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1449    if (atom == NULL) {
1450	ERROR("genrate transition: atom == NULL");
1451	return(-1);
1452    }
1453    if (atom->type == XML_REGEXP_SUBREG) {
1454	/*
1455	 * this is a subexpression handling one should not need to
1456	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1457	 */
1458	if (xmlRegAtomPush(ctxt, atom) < 0) {
1459	    return(-1);
1460	}
1461	if ((to != NULL) && (atom->stop != to) &&
1462	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1463	    /*
1464	     * Generate an epsilon transition to link to the target
1465	     */
1466	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1467#ifdef DV
1468	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
1469		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
1470	    to = xmlRegNewState(ctxt);
1471	    xmlRegStatePush(ctxt, to);
1472	    ctxt->state = to;
1473	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1474#endif
1475	}
1476	switch (atom->quant) {
1477	    case XML_REGEXP_QUANT_OPT:
1478		atom->quant = XML_REGEXP_QUANT_ONCE;
1479		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1480		break;
1481	    case XML_REGEXP_QUANT_MULT:
1482		atom->quant = XML_REGEXP_QUANT_ONCE;
1483		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1484		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1485		break;
1486	    case XML_REGEXP_QUANT_PLUS:
1487		atom->quant = XML_REGEXP_QUANT_ONCE;
1488		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1489		break;
1490	    case XML_REGEXP_QUANT_RANGE: {
1491		int counter;
1492		xmlRegStatePtr newstate;
1493
1494		/*
1495		 * This one is nasty:
1496		 *   1/ if range has minOccurs == 0, create a new state
1497		 *	and create epsilon transitions from atom->start
1498		 *	to atom->stop, as well as atom->start to the new
1499		 *	state
1500		 *   2/ register a new counter
1501		 *   3/ register an epsilon transition associated to
1502		 *      this counter going from atom->stop to atom->start
1503		 *   4/ create a new state
1504		 *   5/ generate a counted transition from atom->stop to
1505		 *      that state
1506		 */
1507		if (atom->min == 0) {
1508		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1509			atom->stop);
1510		    newstate = xmlRegNewState(ctxt);
1511		    xmlRegStatePush(ctxt, newstate);
1512		    ctxt->state = newstate;
1513		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1514			newstate);
1515		}
1516		counter = xmlRegGetCounter(ctxt);
1517		ctxt->counters[counter].min = atom->min - 1;
1518		ctxt->counters[counter].max = atom->max - 1;
1519		atom->min = 0;
1520		atom->max = 0;
1521		atom->quant = XML_REGEXP_QUANT_ONCE;
1522		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1523			                              atom->start, counter);
1524		if (to != NULL) {
1525		    newstate = to;
1526		} else {
1527		    newstate = xmlRegNewState(ctxt);
1528		    xmlRegStatePush(ctxt, newstate);
1529		    ctxt->state = newstate;
1530		}
1531		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1532			                       newstate, counter);
1533	    }
1534	    default:
1535		break;
1536	}
1537	return(0);
1538    }
1539    if ((atom->min == 0) && (atom->max == 0) &&
1540               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1541        /*
1542	 * we can discard the atom and generate an epsilon transition instead
1543	 */
1544	if (to == NULL) {
1545	    to = xmlRegNewState(ctxt);
1546	    if (to != NULL)
1547		xmlRegStatePush(ctxt, to);
1548	    else {
1549		return(-1);
1550	    }
1551	}
1552	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1553	ctxt->state = to;
1554	xmlRegFreeAtom(atom);
1555	return(0);
1556    }
1557    if (to == NULL) {
1558	to = xmlRegNewState(ctxt);
1559	if (to != NULL)
1560	    xmlRegStatePush(ctxt, to);
1561	else {
1562	    return(-1);
1563	}
1564    }
1565    if (xmlRegAtomPush(ctxt, atom) < 0) {
1566	return(-1);
1567    }
1568    xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1569    ctxt->state = to;
1570    switch (atom->quant) {
1571	case XML_REGEXP_QUANT_OPT:
1572	    atom->quant = XML_REGEXP_QUANT_ONCE;
1573	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1574	    break;
1575	case XML_REGEXP_QUANT_MULT:
1576	    atom->quant = XML_REGEXP_QUANT_ONCE;
1577	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1578	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1579	    break;
1580	case XML_REGEXP_QUANT_PLUS:
1581	    atom->quant = XML_REGEXP_QUANT_ONCE;
1582	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1583	    break;
1584	default:
1585	    break;
1586    }
1587    return(0);
1588}
1589
1590/**
1591 * xmlFAReduceEpsilonTransitions:
1592 * @ctxt:  a regexp parser context
1593 * @fromnr:  the from state
1594 * @tonr:  the to state
1595 * @counter:  should that transition be associated to a counted
1596 *
1597 */
1598static void
1599xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1600	                      int tonr, int counter) {
1601    int transnr;
1602    xmlRegStatePtr from;
1603    xmlRegStatePtr to;
1604
1605#ifdef DEBUG_REGEXP_GRAPH
1606    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1607#endif
1608    from = ctxt->states[fromnr];
1609    if (from == NULL)
1610	return;
1611    to = ctxt->states[tonr];
1612    if (to == NULL)
1613	return;
1614    if ((to->mark == XML_REGEXP_MARK_START) ||
1615	(to->mark == XML_REGEXP_MARK_VISITED))
1616	return;
1617
1618    to->mark = XML_REGEXP_MARK_VISITED;
1619    if (to->type == XML_REGEXP_FINAL_STATE) {
1620#ifdef DEBUG_REGEXP_GRAPH
1621	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1622#endif
1623	from->type = XML_REGEXP_FINAL_STATE;
1624    }
1625    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1626        if (to->trans[transnr].to < 0)
1627	    continue;
1628	if (to->trans[transnr].atom == NULL) {
1629	    /*
1630	     * Don't remove counted transitions
1631	     * Don't loop either
1632	     */
1633	    if (to->trans[transnr].to != fromnr) {
1634		if (to->trans[transnr].count >= 0) {
1635		    int newto = to->trans[transnr].to;
1636
1637		    xmlRegStateAddTrans(ctxt, from, NULL,
1638					ctxt->states[newto],
1639					-1, to->trans[transnr].count);
1640		} else {
1641#ifdef DEBUG_REGEXP_GRAPH
1642		    printf("Found epsilon trans %d from %d to %d\n",
1643			   transnr, tonr, to->trans[transnr].to);
1644#endif
1645		    if (to->trans[transnr].counter >= 0) {
1646			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1647					      to->trans[transnr].to,
1648					      to->trans[transnr].counter);
1649		    } else {
1650			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1651					      to->trans[transnr].to,
1652					      counter);
1653		    }
1654		}
1655	    }
1656	} else {
1657	    int newto = to->trans[transnr].to;
1658
1659	    if (to->trans[transnr].counter >= 0) {
1660		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1661				    ctxt->states[newto],
1662				    to->trans[transnr].counter, -1);
1663	    } else {
1664		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1665				    ctxt->states[newto], counter, -1);
1666	    }
1667	}
1668    }
1669    to->mark = XML_REGEXP_MARK_NORMAL;
1670}
1671
1672/**
1673 * xmlFAEliminateSimpleEpsilonTransitions:
1674 * @ctxt:  a regexp parser context
1675 *
1676 * Eliminating general epsilon transitions can get costly in the general
1677 * algorithm due to the large amount of generated new transitions and
1678 * associated comparisons. However for simple epsilon transition used just
1679 * to separate building blocks when generating the automata this can be
1680 * reduced to state elimination:
1681 *    - if there exists an epsilon from X to Y
1682 *    - if there is no other transition from X
1683 * then X and Y are semantically equivalent and X can be eliminated
1684 * If X is the start state then make Y the start state, else replace the
1685 * target of all transitions to X by transitions to Y.
1686 */
1687static void
1688xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1689    int statenr, i, j, newto;
1690    xmlRegStatePtr state, tmp;
1691
1692    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1693	state = ctxt->states[statenr];
1694	if (state == NULL)
1695	    continue;
1696	if (state->nbTrans != 1)
1697	    continue;
1698	/* is the only transition out a basic transition */
1699	if ((state->trans[0].atom == NULL) &&
1700	    (state->trans[0].to >= 0) &&
1701	    (state->trans[0].to != statenr) &&
1702	    (state->trans[0].counter < 0) &&
1703	    (state->trans[0].count < 0)) {
1704	    newto = state->trans[0].to;
1705
1706            if (state->type == XML_REGEXP_START_STATE) {
1707#ifdef DEBUG_REGEXP_GRAPH
1708		printf("Found simple epsilon trans from start %d to %d\n",
1709		       statenr, newto);
1710#endif
1711            } else {
1712#ifdef DEBUG_REGEXP_GRAPH
1713		printf("Found simple epsilon trans from %d to %d\n",
1714		       statenr, newto);
1715#endif
1716	        for (i = 0;i < state->nbTransTo;i++) {
1717		    tmp = ctxt->states[state->transTo[i]];
1718		    for (j = 0;j < tmp->nbTrans;j++) {
1719			if (tmp->trans[j].to == statenr) {
1720			    tmp->trans[j].to = newto;
1721#ifdef DEBUG_REGEXP_GRAPH
1722			    printf("Changed transition %d on %d to go to %d\n",
1723				   j, tmp->no, newto);
1724#endif
1725                            xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
1726			                          tmp->no);
1727			}
1728		    }
1729		}
1730#if 0
1731	        for (i = 0;i < ctxt->nbStates;i++) {
1732		    tmp = ctxt->states[i];
1733		    for (j = 0;j < tmp->nbTrans;j++) {
1734			if (tmp->trans[j].to == statenr) {
1735			    tmp->trans[j].to = newto;
1736#ifdef DEBUG_REGEXP_GRAPH
1737			    printf("Changed transition %d on %d to go to %d\n",
1738				   j, tmp->no, newto);
1739#endif
1740			}
1741		    }
1742		}
1743#endif
1744		if (state->type == XML_REGEXP_FINAL_STATE)
1745		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1746		/* eliminate the transition completely */
1747		state->nbTrans = 0;
1748
1749
1750	    }
1751
1752	}
1753    }
1754}
1755/**
1756 * xmlFAEliminateEpsilonTransitions:
1757 * @ctxt:  a regexp parser context
1758 *
1759 */
1760static void
1761xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1762    int statenr, transnr;
1763    xmlRegStatePtr state;
1764    int has_epsilon;
1765
1766    if (ctxt->states == NULL) return;
1767
1768    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1769
1770    has_epsilon = 0;
1771
1772    /*
1773     * build the completed transitions bypassing the epsilons
1774     * Use a marking algorithm to avoid loops
1775     * mark sink states too.
1776     */
1777    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1778	state = ctxt->states[statenr];
1779	if (state == NULL)
1780	    continue;
1781	if ((state->nbTrans == 0) &&
1782	    (state->type != XML_REGEXP_FINAL_STATE)) {
1783	    state->type = XML_REGEXP_SINK_STATE;
1784	}
1785	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1786	    if ((state->trans[transnr].atom == NULL) &&
1787		(state->trans[transnr].to >= 0)) {
1788		if (state->trans[transnr].to == statenr) {
1789		    state->trans[transnr].to = -1;
1790#ifdef DEBUG_REGEXP_GRAPH
1791		    printf("Removed loopback epsilon trans %d on %d\n",
1792			   transnr, statenr);
1793#endif
1794		} else if (state->trans[transnr].count < 0) {
1795		    int newto = state->trans[transnr].to;
1796
1797#ifdef DEBUG_REGEXP_GRAPH
1798		    printf("Found epsilon trans %d from %d to %d\n",
1799			   transnr, statenr, newto);
1800#endif
1801		    state->mark = XML_REGEXP_MARK_START;
1802		    has_epsilon = 1;
1803		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1804				      newto, state->trans[transnr].counter);
1805		    state->mark = XML_REGEXP_MARK_NORMAL;
1806#ifdef DEBUG_REGEXP_GRAPH
1807		} else {
1808		    printf("Found counted transition %d on %d\n",
1809			   transnr, statenr);
1810#endif
1811	        }
1812	    }
1813	}
1814    }
1815    /*
1816     * Eliminate the epsilon transitions
1817     */
1818    if (has_epsilon) {
1819	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1820	    state = ctxt->states[statenr];
1821	    if (state == NULL)
1822		continue;
1823	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1824		xmlRegTransPtr trans = &(state->trans[transnr]);
1825		if ((trans->atom == NULL) &&
1826		    (trans->count < 0) &&
1827		    (trans->to >= 0)) {
1828		    trans->to = -1;
1829		}
1830	    }
1831	}
1832    }
1833
1834    /*
1835     * Use this pass to detect unreachable states too
1836     */
1837    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1838	state = ctxt->states[statenr];
1839	if (state != NULL)
1840	    state->reached = XML_REGEXP_MARK_NORMAL;
1841    }
1842    state = ctxt->states[0];
1843    if (state != NULL)
1844	state->reached = XML_REGEXP_MARK_START;
1845    while (state != NULL) {
1846	xmlRegStatePtr target = NULL;
1847	state->reached = XML_REGEXP_MARK_VISITED;
1848	/*
1849	 * Mark all states reachable from the current reachable state
1850	 */
1851	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1852	    if ((state->trans[transnr].to >= 0) &&
1853		((state->trans[transnr].atom != NULL) ||
1854		 (state->trans[transnr].count >= 0))) {
1855		int newto = state->trans[transnr].to;
1856
1857		if (ctxt->states[newto] == NULL)
1858		    continue;
1859		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
1860		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
1861		    target = ctxt->states[newto];
1862		}
1863	    }
1864	}
1865
1866	/*
1867	 * find the next accessible state not explored
1868	 */
1869	if (target == NULL) {
1870	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1871		state = ctxt->states[statenr];
1872		if ((state != NULL) && (state->reached ==
1873			XML_REGEXP_MARK_START)) {
1874		    target = state;
1875		    break;
1876		}
1877	    }
1878	}
1879	state = target;
1880    }
1881    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1882	state = ctxt->states[statenr];
1883	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
1884#ifdef DEBUG_REGEXP_GRAPH
1885	    printf("Removed unreachable state %d\n", statenr);
1886#endif
1887	    xmlRegFreeState(state);
1888	    ctxt->states[statenr] = NULL;
1889	}
1890    }
1891
1892}
1893
1894static int
1895xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
1896    int ret = 0;
1897
1898    if ((range1->type == XML_REGEXP_RANGES) ||
1899        (range2->type == XML_REGEXP_RANGES) ||
1900        (range2->type == XML_REGEXP_SUBREG) ||
1901        (range1->type == XML_REGEXP_SUBREG) ||
1902        (range1->type == XML_REGEXP_STRING) ||
1903        (range2->type == XML_REGEXP_STRING))
1904	return(-1);
1905
1906    /* put them in order */
1907    if (range1->type > range2->type) {
1908        xmlRegRangePtr tmp;
1909
1910	tmp = range1;
1911	range1 = range2;
1912	range2 = tmp;
1913    }
1914    if ((range1->type == XML_REGEXP_ANYCHAR) ||
1915        (range2->type == XML_REGEXP_ANYCHAR)) {
1916	ret = 1;
1917    } else if ((range1->type == XML_REGEXP_EPSILON) ||
1918               (range2->type == XML_REGEXP_EPSILON)) {
1919	return(0);
1920    } else if (range1->type == range2->type) {
1921        if ((range1->type != XML_REGEXP_CHARVAL) ||
1922	    (range1->end < range2->start) ||
1923	    (range2->end < range1->start))
1924	    ret = 1;
1925	else
1926	    ret = 0;
1927    } else if (range1->type == XML_REGEXP_CHARVAL) {
1928        int codepoint;
1929	int neg = 0;
1930
1931	/*
1932	 * just check all codepoints in the range for acceptance,
1933	 * this is usually way cheaper since done only once at
1934	 * compilation than testing over and over at runtime or
1935	 * pushing too many states when evaluating.
1936	 */
1937	if (((range1->neg == 0) && (range2->neg != 0)) ||
1938	    ((range1->neg != 0) && (range2->neg == 0)))
1939	    neg = 1;
1940
1941	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
1942	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
1943					    0, range2->start, range2->end,
1944					    range2->blockName);
1945	    if (ret < 0)
1946	        return(-1);
1947	    if (((neg == 1) && (ret == 0)) ||
1948	        ((neg == 0) && (ret == 1)))
1949		return(1);
1950	}
1951	return(0);
1952    } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
1953               (range2->type == XML_REGEXP_BLOCK_NAME)) {
1954	if (range1->type == range2->type) {
1955	    ret = xmlStrEqual(range1->blockName, range2->blockName);
1956	} else {
1957	    /*
1958	     * comparing a block range with anything else is way
1959	     * too costly, and maintining the table is like too much
1960	     * memory too, so let's force the automata to save state
1961	     * here.
1962	     */
1963	    return(1);
1964	}
1965    } else if ((range1->type < XML_REGEXP_LETTER) ||
1966               (range2->type < XML_REGEXP_LETTER)) {
1967	if ((range1->type == XML_REGEXP_ANYSPACE) &&
1968	    (range2->type == XML_REGEXP_NOTSPACE))
1969	    ret = 0;
1970	else if ((range1->type == XML_REGEXP_INITNAME) &&
1971	         (range2->type == XML_REGEXP_NOTINITNAME))
1972	    ret = 0;
1973	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
1974	         (range2->type == XML_REGEXP_NOTNAMECHAR))
1975	    ret = 0;
1976	else if ((range1->type == XML_REGEXP_DECIMAL) &&
1977	         (range2->type == XML_REGEXP_NOTDECIMAL))
1978	    ret = 0;
1979	else if ((range1->type == XML_REGEXP_REALCHAR) &&
1980	         (range2->type == XML_REGEXP_NOTREALCHAR))
1981	    ret = 0;
1982	else {
1983	    /* same thing to limit complexity */
1984	    return(1);
1985	}
1986    } else {
1987        ret = 0;
1988        /* range1->type < range2->type here */
1989        switch (range1->type) {
1990	    case XML_REGEXP_LETTER:
1991	         /* all disjoint except in the subgroups */
1992	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
1993		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
1994		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
1995		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
1996		     (range2->type == XML_REGEXP_LETTER_OTHERS))
1997		     ret = 1;
1998		 break;
1999	    case XML_REGEXP_MARK:
2000	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
2001		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
2002		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
2003		     ret = 1;
2004		 break;
2005	    case XML_REGEXP_NUMBER:
2006	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
2007		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
2008		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
2009		     ret = 1;
2010		 break;
2011	    case XML_REGEXP_PUNCT:
2012	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
2013		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
2014		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
2015		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
2016		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
2017		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
2018		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
2019		     ret = 1;
2020		 break;
2021	    case XML_REGEXP_SEPAR:
2022	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
2023		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
2024		     (range2->type == XML_REGEXP_SEPAR_PARA))
2025		     ret = 1;
2026		 break;
2027	    case XML_REGEXP_SYMBOL:
2028	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
2029		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
2030		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
2031		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
2032		     ret = 1;
2033		 break;
2034	    case XML_REGEXP_OTHER:
2035	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
2036		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
2037		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
2038		     ret = 1;
2039		 break;
2040            default:
2041	         if ((range2->type >= XML_REGEXP_LETTER) &&
2042		     (range2->type < XML_REGEXP_BLOCK_NAME))
2043		     ret = 0;
2044		 else {
2045		     /* safety net ! */
2046		     return(1);
2047		 }
2048	}
2049    }
2050    if (((range1->neg == 0) && (range2->neg != 0)) ||
2051        ((range1->neg != 0) && (range2->neg == 0)))
2052	ret = !ret;
2053    return(1);
2054}
2055
2056/**
2057 * xmlFACompareAtoms:
2058 * @atom1:  an atom
2059 * @atom2:  an atom
2060 *
2061 * Compares two atoms to check whether they intersect in some ways,
2062 * this is used by xmlFAComputesDeterminism only
2063 *
2064 * Returns 1 if yes and 0 otherwise
2065 */
2066static int
2067xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
2068    int ret;
2069
2070    if (atom1 == atom2)
2071	return(1);
2072    if ((atom1 == NULL) || (atom2 == NULL))
2073	return(0);
2074
2075    if ((atom1->type == XML_REGEXP_RANGES) &&
2076        (atom2->type == XML_REGEXP_CHARVAL)) {
2077    } else if ((atom1->type == XML_REGEXP_CHARVAL) &&
2078	       (atom2->type == XML_REGEXP_RANGES)) {
2079	xmlRegAtomPtr tmp;
2080	tmp = atom1;
2081	atom1 = atom2;
2082	atom2 = tmp;
2083    } else if (atom1->type != atom2->type) {
2084	return(0);
2085    }
2086    switch (atom1->type) {
2087        case XML_REGEXP_STRING:
2088	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
2089	                                 (xmlChar *)atom2->valuep);
2090	    break;
2091        case XML_REGEXP_EPSILON:
2092	    goto not_determinist;
2093        case XML_REGEXP_CHARVAL:
2094	    ret = (atom1->codepoint == atom2->codepoint);
2095	    break;
2096        case XML_REGEXP_RANGES:
2097	    if (atom2->type == XML_REGEXP_CHARVAL) {
2098	        ret = xmlRegCheckCharacter(atom1, atom2->codepoint);
2099		if (ret < 0)
2100		    return(-1);
2101		break;
2102	    } else {
2103	        int i, j, res;
2104		xmlRegRangePtr r1, r2;
2105
2106		/*
2107		 * need to check that none of the ranges eventually matches
2108		 */
2109		for (i = 0;i < atom1->nbRanges;i++) {
2110		    for (j = 0;j < atom2->nbRanges;j++) {
2111			r1 = atom1->ranges[i];
2112			r2 = atom2->ranges[j];
2113			res = xmlFACompareRanges(r1, r2);
2114			if (res == 1) {
2115			    ret = 1;
2116			    goto done;
2117			}
2118		    }
2119		}
2120		ret = 0;
2121	    }
2122	    break;
2123	default:
2124	    goto not_determinist;
2125    }
2126done:
2127    if (atom1->neg != atom2->neg) {
2128        ret = !ret;
2129    }
2130    if (ret == 0)
2131        return(0);
2132not_determinist:
2133    return(1);
2134}
2135
2136/**
2137 * xmlFARecurseDeterminism:
2138 * @ctxt:  a regexp parser context
2139 *
2140 * Check whether the associated regexp is determinist,
2141 * should be called after xmlFAEliminateEpsilonTransitions()
2142 *
2143 */
2144static int
2145xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
2146	                 int to, xmlRegAtomPtr atom) {
2147    int ret = 1;
2148    int res;
2149    int transnr, nbTrans;
2150    xmlRegTransPtr t1;
2151
2152    if (state == NULL)
2153	return(ret);
2154    /*
2155     * don't recurse on transitions potentially added in the course of
2156     * the elimination.
2157     */
2158    nbTrans = state->nbTrans;
2159    for (transnr = 0;transnr < nbTrans;transnr++) {
2160	t1 = &(state->trans[transnr]);
2161	/*
2162	 * check transitions conflicting with the one looked at
2163	 */
2164	if (t1->atom == NULL) {
2165	    if (t1->to == -1)
2166		continue;
2167	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2168		                           to, atom);
2169	    if (res == 0) {
2170	        ret = 0;
2171		/* t1->nd = 1; */
2172	    }
2173	    continue;
2174	}
2175	if (t1->to != to)
2176	    continue;
2177	if (xmlFACompareAtoms(t1->atom, atom)) {
2178	    ret = 0;
2179	    /* mark the transition as non-deterministic */
2180	    t1->nd = 1;
2181	}
2182    }
2183    return(ret);
2184}
2185
2186/**
2187 * xmlFAComputesDeterminism:
2188 * @ctxt:  a regexp parser context
2189 *
2190 * Check whether the associated regexp is determinist,
2191 * should be called after xmlFAEliminateEpsilonTransitions()
2192 *
2193 */
2194static int
2195xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
2196    int statenr, transnr;
2197    xmlRegStatePtr state;
2198    xmlRegTransPtr t1, t2, last;
2199    int i;
2200    int ret = 1;
2201
2202#ifdef DEBUG_REGEXP_GRAPH
2203    printf("xmlFAComputesDeterminism\n");
2204    xmlRegPrintCtxt(stdout, ctxt);
2205#endif
2206    if (ctxt->determinist != -1)
2207	return(ctxt->determinist);
2208
2209    /*
2210     * First cleanup the automata removing cancelled transitions
2211     */
2212    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2213	state = ctxt->states[statenr];
2214	if (state == NULL)
2215	    continue;
2216	if (state->nbTrans < 2)
2217	    continue;
2218	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2219	    t1 = &(state->trans[transnr]);
2220	    /*
2221	     * Determinism checks in case of counted or all transitions
2222	     * will have to be handled separately
2223	     */
2224	    if (t1->atom == NULL) {
2225		/* t1->nd = 1; */
2226		continue;
2227	    }
2228	    if (t1->to == -1) /* eliminated */
2229		continue;
2230	    for (i = 0;i < transnr;i++) {
2231		t2 = &(state->trans[i]);
2232		if (t2->to == -1) /* eliminated */
2233		    continue;
2234		if (t2->atom != NULL) {
2235		    if (t1->to == t2->to) {
2236			if (xmlFACompareAtoms(t1->atom, t2->atom))
2237			    t2->to = -1; /* eliminated */
2238		    }
2239		}
2240	    }
2241	}
2242    }
2243
2244    /*
2245     * Check for all states that there aren't 2 transitions
2246     * with the same atom and a different target.
2247     */
2248    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
2249	state = ctxt->states[statenr];
2250	if (state == NULL)
2251	    continue;
2252	if (state->nbTrans < 2)
2253	    continue;
2254	last = NULL;
2255	for (transnr = 0;transnr < state->nbTrans;transnr++) {
2256	    t1 = &(state->trans[transnr]);
2257	    /*
2258	     * Determinism checks in case of counted or all transitions
2259	     * will have to be handled separately
2260	     */
2261	    if (t1->atom == NULL) {
2262		continue;
2263	    }
2264	    if (t1->to == -1) /* eliminated */
2265		continue;
2266	    for (i = 0;i < transnr;i++) {
2267		t2 = &(state->trans[i]);
2268		if (t2->to == -1) /* eliminated */
2269		    continue;
2270		if (t2->atom != NULL) {
2271		    /* not determinist ! */
2272		    if (xmlFACompareAtoms(t1->atom, t2->atom)) {
2273			ret = 0;
2274			/* mark the transitions as non-deterministic ones */
2275			t1->nd = 1;
2276			t2->nd = 1;
2277			last = t1;
2278		    }
2279		} else if (t1->to != -1) {
2280		    /*
2281		     * do the closure in case of remaining specific
2282		     * epsilon transitions like choices or all
2283		     */
2284		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2285						   t2->to, t2->atom);
2286		    /* don't shortcut the computation so all non deterministic
2287		       transition get marked down
2288		    if (ret == 0)
2289			return(0);
2290		     */
2291		    if (ret == 0) {
2292			t1->nd = 1;
2293			/* t2->nd = 1; */
2294			last = t1;
2295		    }
2296		}
2297	    }
2298	    /* don't shortcut the computation so all non deterministic
2299	       transition get marked down
2300	    if (ret == 0)
2301		break; */
2302	}
2303
2304	/*
2305	 * mark specifically the last non-deterministic transition
2306	 * from a state since there is no need to set-up rollback
2307	 * from it
2308	 */
2309	if (last != NULL) {
2310	    last->nd = 2;
2311	}
2312
2313	/* don't shortcut the computation so all non deterministic
2314	   transition get marked down
2315	if (ret == 0)
2316	    break; */
2317    }
2318
2319    ctxt->determinist = ret;
2320    return(ret);
2321}
2322
2323/************************************************************************
2324 * 									*
2325 *	Routines to check input against transition atoms		*
2326 * 									*
2327 ************************************************************************/
2328
2329static int
2330xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2331	                  int start, int end, const xmlChar *blockName) {
2332    int ret = 0;
2333
2334    switch (type) {
2335        case XML_REGEXP_STRING:
2336        case XML_REGEXP_SUBREG:
2337        case XML_REGEXP_RANGES:
2338        case XML_REGEXP_EPSILON:
2339	    return(-1);
2340        case XML_REGEXP_ANYCHAR:
2341	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2342	    break;
2343        case XML_REGEXP_CHARVAL:
2344	    ret = ((codepoint >= start) && (codepoint <= end));
2345	    break;
2346        case XML_REGEXP_NOTSPACE:
2347	    neg = !neg;
2348        case XML_REGEXP_ANYSPACE:
2349	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2350		   (codepoint == '\t') || (codepoint == ' '));
2351	    break;
2352        case XML_REGEXP_NOTINITNAME:
2353	    neg = !neg;
2354        case XML_REGEXP_INITNAME:
2355	    ret = (IS_LETTER(codepoint) ||
2356		   (codepoint == '_') || (codepoint == ':'));
2357	    break;
2358        case XML_REGEXP_NOTNAMECHAR:
2359	    neg = !neg;
2360        case XML_REGEXP_NAMECHAR:
2361	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2362		   (codepoint == '.') || (codepoint == '-') ||
2363		   (codepoint == '_') || (codepoint == ':') ||
2364		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2365	    break;
2366        case XML_REGEXP_NOTDECIMAL:
2367	    neg = !neg;
2368        case XML_REGEXP_DECIMAL:
2369	    ret = xmlUCSIsCatNd(codepoint);
2370	    break;
2371        case XML_REGEXP_REALCHAR:
2372	    neg = !neg;
2373        case XML_REGEXP_NOTREALCHAR:
2374	    ret = xmlUCSIsCatP(codepoint);
2375	    if (ret == 0)
2376		ret = xmlUCSIsCatZ(codepoint);
2377	    if (ret == 0)
2378		ret = xmlUCSIsCatC(codepoint);
2379	    break;
2380        case XML_REGEXP_LETTER:
2381	    ret = xmlUCSIsCatL(codepoint);
2382	    break;
2383        case XML_REGEXP_LETTER_UPPERCASE:
2384	    ret = xmlUCSIsCatLu(codepoint);
2385	    break;
2386        case XML_REGEXP_LETTER_LOWERCASE:
2387	    ret = xmlUCSIsCatLl(codepoint);
2388	    break;
2389        case XML_REGEXP_LETTER_TITLECASE:
2390	    ret = xmlUCSIsCatLt(codepoint);
2391	    break;
2392        case XML_REGEXP_LETTER_MODIFIER:
2393	    ret = xmlUCSIsCatLm(codepoint);
2394	    break;
2395        case XML_REGEXP_LETTER_OTHERS:
2396	    ret = xmlUCSIsCatLo(codepoint);
2397	    break;
2398        case XML_REGEXP_MARK:
2399	    ret = xmlUCSIsCatM(codepoint);
2400	    break;
2401        case XML_REGEXP_MARK_NONSPACING:
2402	    ret = xmlUCSIsCatMn(codepoint);
2403	    break;
2404        case XML_REGEXP_MARK_SPACECOMBINING:
2405	    ret = xmlUCSIsCatMc(codepoint);
2406	    break;
2407        case XML_REGEXP_MARK_ENCLOSING:
2408	    ret = xmlUCSIsCatMe(codepoint);
2409	    break;
2410        case XML_REGEXP_NUMBER:
2411	    ret = xmlUCSIsCatN(codepoint);
2412	    break;
2413        case XML_REGEXP_NUMBER_DECIMAL:
2414	    ret = xmlUCSIsCatNd(codepoint);
2415	    break;
2416        case XML_REGEXP_NUMBER_LETTER:
2417	    ret = xmlUCSIsCatNl(codepoint);
2418	    break;
2419        case XML_REGEXP_NUMBER_OTHERS:
2420	    ret = xmlUCSIsCatNo(codepoint);
2421	    break;
2422        case XML_REGEXP_PUNCT:
2423	    ret = xmlUCSIsCatP(codepoint);
2424	    break;
2425        case XML_REGEXP_PUNCT_CONNECTOR:
2426	    ret = xmlUCSIsCatPc(codepoint);
2427	    break;
2428        case XML_REGEXP_PUNCT_DASH:
2429	    ret = xmlUCSIsCatPd(codepoint);
2430	    break;
2431        case XML_REGEXP_PUNCT_OPEN:
2432	    ret = xmlUCSIsCatPs(codepoint);
2433	    break;
2434        case XML_REGEXP_PUNCT_CLOSE:
2435	    ret = xmlUCSIsCatPe(codepoint);
2436	    break;
2437        case XML_REGEXP_PUNCT_INITQUOTE:
2438	    ret = xmlUCSIsCatPi(codepoint);
2439	    break;
2440        case XML_REGEXP_PUNCT_FINQUOTE:
2441	    ret = xmlUCSIsCatPf(codepoint);
2442	    break;
2443        case XML_REGEXP_PUNCT_OTHERS:
2444	    ret = xmlUCSIsCatPo(codepoint);
2445	    break;
2446        case XML_REGEXP_SEPAR:
2447	    ret = xmlUCSIsCatZ(codepoint);
2448	    break;
2449        case XML_REGEXP_SEPAR_SPACE:
2450	    ret = xmlUCSIsCatZs(codepoint);
2451	    break;
2452        case XML_REGEXP_SEPAR_LINE:
2453	    ret = xmlUCSIsCatZl(codepoint);
2454	    break;
2455        case XML_REGEXP_SEPAR_PARA:
2456	    ret = xmlUCSIsCatZp(codepoint);
2457	    break;
2458        case XML_REGEXP_SYMBOL:
2459	    ret = xmlUCSIsCatS(codepoint);
2460	    break;
2461        case XML_REGEXP_SYMBOL_MATH:
2462	    ret = xmlUCSIsCatSm(codepoint);
2463	    break;
2464        case XML_REGEXP_SYMBOL_CURRENCY:
2465	    ret = xmlUCSIsCatSc(codepoint);
2466	    break;
2467        case XML_REGEXP_SYMBOL_MODIFIER:
2468	    ret = xmlUCSIsCatSk(codepoint);
2469	    break;
2470        case XML_REGEXP_SYMBOL_OTHERS:
2471	    ret = xmlUCSIsCatSo(codepoint);
2472	    break;
2473        case XML_REGEXP_OTHER:
2474	    ret = xmlUCSIsCatC(codepoint);
2475	    break;
2476        case XML_REGEXP_OTHER_CONTROL:
2477	    ret = xmlUCSIsCatCc(codepoint);
2478	    break;
2479        case XML_REGEXP_OTHER_FORMAT:
2480	    ret = xmlUCSIsCatCf(codepoint);
2481	    break;
2482        case XML_REGEXP_OTHER_PRIVATE:
2483	    ret = xmlUCSIsCatCo(codepoint);
2484	    break;
2485        case XML_REGEXP_OTHER_NA:
2486	    /* ret = xmlUCSIsCatCn(codepoint); */
2487	    /* Seems it doesn't exist anymore in recent Unicode releases */
2488	    ret = 0;
2489	    break;
2490        case XML_REGEXP_BLOCK_NAME:
2491	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2492	    break;
2493    }
2494    if (neg)
2495	return(!ret);
2496    return(ret);
2497}
2498
2499static int
2500xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2501    int i, ret = 0;
2502    xmlRegRangePtr range;
2503
2504    if ((atom == NULL) || (!IS_CHAR(codepoint)))
2505	return(-1);
2506
2507    switch (atom->type) {
2508        case XML_REGEXP_SUBREG:
2509        case XML_REGEXP_EPSILON:
2510	    return(-1);
2511        case XML_REGEXP_CHARVAL:
2512            return(codepoint == atom->codepoint);
2513        case XML_REGEXP_RANGES: {
2514	    int accept = 0;
2515
2516	    for (i = 0;i < atom->nbRanges;i++) {
2517		range = atom->ranges[i];
2518		if (range->neg == 2) {
2519		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2520						0, range->start, range->end,
2521						range->blockName);
2522		    if (ret != 0)
2523			return(0); /* excluded char */
2524		} else if (range->neg) {
2525		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2526						0, range->start, range->end,
2527						range->blockName);
2528		    if (ret == 0)
2529		        accept = 1;
2530		    else
2531		        return(0);
2532		} else {
2533		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2534						0, range->start, range->end,
2535						range->blockName);
2536		    if (ret != 0)
2537			accept = 1; /* might still be excluded */
2538		}
2539	    }
2540	    return(accept);
2541	}
2542        case XML_REGEXP_STRING:
2543	    printf("TODO: XML_REGEXP_STRING\n");
2544	    return(-1);
2545        case XML_REGEXP_ANYCHAR:
2546        case XML_REGEXP_ANYSPACE:
2547        case XML_REGEXP_NOTSPACE:
2548        case XML_REGEXP_INITNAME:
2549        case XML_REGEXP_NOTINITNAME:
2550        case XML_REGEXP_NAMECHAR:
2551        case XML_REGEXP_NOTNAMECHAR:
2552        case XML_REGEXP_DECIMAL:
2553        case XML_REGEXP_NOTDECIMAL:
2554        case XML_REGEXP_REALCHAR:
2555        case XML_REGEXP_NOTREALCHAR:
2556        case XML_REGEXP_LETTER:
2557        case XML_REGEXP_LETTER_UPPERCASE:
2558        case XML_REGEXP_LETTER_LOWERCASE:
2559        case XML_REGEXP_LETTER_TITLECASE:
2560        case XML_REGEXP_LETTER_MODIFIER:
2561        case XML_REGEXP_LETTER_OTHERS:
2562        case XML_REGEXP_MARK:
2563        case XML_REGEXP_MARK_NONSPACING:
2564        case XML_REGEXP_MARK_SPACECOMBINING:
2565        case XML_REGEXP_MARK_ENCLOSING:
2566        case XML_REGEXP_NUMBER:
2567        case XML_REGEXP_NUMBER_DECIMAL:
2568        case XML_REGEXP_NUMBER_LETTER:
2569        case XML_REGEXP_NUMBER_OTHERS:
2570        case XML_REGEXP_PUNCT:
2571        case XML_REGEXP_PUNCT_CONNECTOR:
2572        case XML_REGEXP_PUNCT_DASH:
2573        case XML_REGEXP_PUNCT_OPEN:
2574        case XML_REGEXP_PUNCT_CLOSE:
2575        case XML_REGEXP_PUNCT_INITQUOTE:
2576        case XML_REGEXP_PUNCT_FINQUOTE:
2577        case XML_REGEXP_PUNCT_OTHERS:
2578        case XML_REGEXP_SEPAR:
2579        case XML_REGEXP_SEPAR_SPACE:
2580        case XML_REGEXP_SEPAR_LINE:
2581        case XML_REGEXP_SEPAR_PARA:
2582        case XML_REGEXP_SYMBOL:
2583        case XML_REGEXP_SYMBOL_MATH:
2584        case XML_REGEXP_SYMBOL_CURRENCY:
2585        case XML_REGEXP_SYMBOL_MODIFIER:
2586        case XML_REGEXP_SYMBOL_OTHERS:
2587        case XML_REGEXP_OTHER:
2588        case XML_REGEXP_OTHER_CONTROL:
2589        case XML_REGEXP_OTHER_FORMAT:
2590        case XML_REGEXP_OTHER_PRIVATE:
2591        case XML_REGEXP_OTHER_NA:
2592	case XML_REGEXP_BLOCK_NAME:
2593	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
2594		                            (const xmlChar *)atom->valuep);
2595	    if (atom->neg)
2596		ret = !ret;
2597	    break;
2598    }
2599    return(ret);
2600}
2601
2602/************************************************************************
2603 * 									*
2604 *	Saving and restoring state of an execution context		*
2605 * 									*
2606 ************************************************************************/
2607
2608#ifdef DEBUG_REGEXP_EXEC
2609static void
2610xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
2611    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
2612    if (exec->inputStack != NULL) {
2613	int i;
2614	printf(": ");
2615	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
2616	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
2617    } else {
2618	printf(": %s", &(exec->inputString[exec->index]));
2619    }
2620    printf("\n");
2621}
2622#endif
2623
2624static void
2625xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
2626#ifdef DEBUG_REGEXP_EXEC
2627    printf("saving ");
2628    exec->transno++;
2629    xmlFARegDebugExec(exec);
2630    exec->transno--;
2631#endif
2632#ifdef MAX_PUSH
2633    if (exec->nbPush > MAX_PUSH) {
2634        return;
2635    }
2636    exec->nbPush++;
2637#endif
2638
2639    if (exec->maxRollbacks == 0) {
2640	exec->maxRollbacks = 4;
2641	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
2642		                             sizeof(xmlRegExecRollback));
2643	if (exec->rollbacks == NULL) {
2644	    xmlRegexpErrMemory(NULL, "saving regexp");
2645	    exec->maxRollbacks = 0;
2646	    return;
2647	}
2648	memset(exec->rollbacks, 0,
2649	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
2650    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
2651	xmlRegExecRollback *tmp;
2652	int len = exec->maxRollbacks;
2653
2654	exec->maxRollbacks *= 2;
2655	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
2656			exec->maxRollbacks * sizeof(xmlRegExecRollback));
2657	if (tmp == NULL) {
2658	    xmlRegexpErrMemory(NULL, "saving regexp");
2659	    exec->maxRollbacks /= 2;
2660	    return;
2661	}
2662	exec->rollbacks = tmp;
2663	tmp = &exec->rollbacks[len];
2664	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
2665    }
2666    exec->rollbacks[exec->nbRollbacks].state = exec->state;
2667    exec->rollbacks[exec->nbRollbacks].index = exec->index;
2668    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
2669    if (exec->comp->nbCounters > 0) {
2670	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2671	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
2672		xmlMalloc(exec->comp->nbCounters * sizeof(int));
2673	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2674		xmlRegexpErrMemory(NULL, "saving regexp");
2675		exec->status = -5;
2676		return;
2677	    }
2678	}
2679	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
2680	       exec->comp->nbCounters * sizeof(int));
2681    }
2682    exec->nbRollbacks++;
2683}
2684
2685static void
2686xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
2687    if (exec->nbRollbacks <= 0) {
2688	exec->status = -1;
2689#ifdef DEBUG_REGEXP_EXEC
2690	printf("rollback failed on empty stack\n");
2691#endif
2692	return;
2693    }
2694    exec->nbRollbacks--;
2695    exec->state = exec->rollbacks[exec->nbRollbacks].state;
2696    exec->index = exec->rollbacks[exec->nbRollbacks].index;
2697    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
2698    if (exec->comp->nbCounters > 0) {
2699	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2700	    fprintf(stderr, "exec save: allocation failed");
2701	    exec->status = -6;
2702	    return;
2703	}
2704	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
2705	       exec->comp->nbCounters * sizeof(int));
2706    }
2707
2708#ifdef DEBUG_REGEXP_EXEC
2709    printf("restored ");
2710    xmlFARegDebugExec(exec);
2711#endif
2712}
2713
2714/************************************************************************
2715 * 									*
2716 *	Verifier, running an input against a compiled regexp		*
2717 * 									*
2718 ************************************************************************/
2719
2720static int
2721xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
2722    xmlRegExecCtxt execval;
2723    xmlRegExecCtxtPtr exec = &execval;
2724    int ret, codepoint = 0, len, deter;
2725
2726    exec->inputString = content;
2727    exec->index = 0;
2728    exec->nbPush = 0;
2729    exec->determinist = 1;
2730    exec->maxRollbacks = 0;
2731    exec->nbRollbacks = 0;
2732    exec->rollbacks = NULL;
2733    exec->status = 0;
2734    exec->comp = comp;
2735    exec->state = comp->states[0];
2736    exec->transno = 0;
2737    exec->transcount = 0;
2738    exec->inputStack = NULL;
2739    exec->inputStackMax = 0;
2740    if (comp->nbCounters > 0) {
2741	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2742	if (exec->counts == NULL) {
2743	    xmlRegexpErrMemory(NULL, "running regexp");
2744	    return(-1);
2745	}
2746        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2747    } else
2748	exec->counts = NULL;
2749    while ((exec->status == 0) &&
2750	   ((exec->inputString[exec->index] != 0) ||
2751	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2752	xmlRegTransPtr trans;
2753	xmlRegAtomPtr atom;
2754
2755	/*
2756	 * If end of input on non-terminal state, rollback, however we may
2757	 * still have epsilon like transition for counted transitions
2758	 * on counters, in that case don't break too early.  Additionally,
2759	 * if we are working on a range like "AB{0,2}", where B is not present,
2760	 * we don't want to break.
2761	 */
2762	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
2763	    /*
2764	     * if there is a transition, we must check if
2765	     *  atom allows minOccurs of 0
2766	     */
2767	    if (exec->transno < exec->state->nbTrans) {
2768	        trans = &exec->state->trans[exec->transno];
2769		if (trans->to >=0) {
2770		    atom = trans->atom;
2771		    if (!((atom->min == 0) && (atom->max > 0)))
2772		        goto rollback;
2773		}
2774	    } else
2775	        goto rollback;
2776	}
2777
2778	exec->transcount = 0;
2779	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2780	    trans = &exec->state->trans[exec->transno];
2781	    if (trans->to < 0)
2782		continue;
2783	    atom = trans->atom;
2784	    ret = 0;
2785	    deter = 1;
2786	    if (trans->count >= 0) {
2787		int count;
2788		xmlRegCounterPtr counter;
2789
2790		/*
2791		 * A counted transition.
2792		 */
2793
2794		count = exec->counts[trans->count];
2795		counter = &exec->comp->counters[trans->count];
2796#ifdef DEBUG_REGEXP_EXEC
2797		printf("testing count %d: val %d, min %d, max %d\n",
2798		       trans->count, count, counter->min,  counter->max);
2799#endif
2800		ret = ((count >= counter->min) && (count <= counter->max));
2801		if ((ret) && (counter->min != counter->max))
2802		    deter = 0;
2803	    } else if (atom == NULL) {
2804		fprintf(stderr, "epsilon transition left at runtime\n");
2805		exec->status = -2;
2806		break;
2807	    } else if (exec->inputString[exec->index] != 0) {
2808                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2809		ret = xmlRegCheckCharacter(atom, codepoint);
2810		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
2811		    xmlRegStatePtr to = comp->states[trans->to];
2812
2813		    /*
2814		     * this is a multiple input sequence
2815		     */
2816		    if (exec->state->nbTrans > exec->transno + 1) {
2817			xmlFARegExecSave(exec);
2818		    }
2819		    exec->transcount = 1;
2820		    do {
2821			/*
2822			 * Try to progress as much as possible on the input
2823			 */
2824			if (exec->transcount == atom->max) {
2825			    break;
2826			}
2827			exec->index += len;
2828			/*
2829			 * End of input: stop here
2830			 */
2831			if (exec->inputString[exec->index] == 0) {
2832			    exec->index -= len;
2833			    break;
2834			}
2835			if (exec->transcount >= atom->min) {
2836			    int transno = exec->transno;
2837			    xmlRegStatePtr state = exec->state;
2838
2839			    /*
2840			     * The transition is acceptable save it
2841			     */
2842			    exec->transno = -1; /* trick */
2843			    exec->state = to;
2844			    xmlFARegExecSave(exec);
2845			    exec->transno = transno;
2846			    exec->state = state;
2847			}
2848			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2849				              len);
2850			ret = xmlRegCheckCharacter(atom, codepoint);
2851			exec->transcount++;
2852		    } while (ret == 1);
2853		    if (exec->transcount < atom->min)
2854			ret = 0;
2855
2856		    /*
2857		     * If the last check failed but one transition was found
2858		     * possible, rollback
2859		     */
2860		    if (ret < 0)
2861			ret = 0;
2862		    if (ret == 0) {
2863			goto rollback;
2864		    }
2865		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
2866		    /*
2867		     * we don't match on the codepoint, but minOccurs of 0
2868		     * says that's ok.  Setting len to 0 inhibits stepping
2869		     * over the codepoint.
2870		     */
2871		    exec->transcount = 1;
2872		    len = 0;
2873		    ret = 1;
2874		}
2875	    } else if ((atom->min == 0) && (atom->max > 0)) {
2876	        /* another spot to match when minOccurs is 0 */
2877		exec->transcount = 1;
2878		len = 0;
2879		ret = 1;
2880	    }
2881	    if (ret == 1) {
2882		if ((trans->nd == 1) ||
2883		    ((trans->count >= 0) && (deter == 0) &&
2884		     (exec->state->nbTrans > exec->transno + 1))) {
2885#ifdef DEBUG_REGEXP_EXEC
2886		    if (trans->nd == 1)
2887		        printf("Saving on nd transition atom %d for %c at %d\n",
2888			       trans->atom->no, codepoint, exec->index);
2889		    else
2890		        printf("Saving on counted transition count %d for %c at %d\n",
2891			       trans->count, codepoint, exec->index);
2892#endif
2893		    xmlFARegExecSave(exec);
2894		}
2895		if (trans->counter >= 0) {
2896#ifdef DEBUG_REGEXP_EXEC
2897		    printf("Increasing count %d\n", trans->counter);
2898#endif
2899		    exec->counts[trans->counter]++;
2900		}
2901		if ((trans->count >= 0) &&
2902		    (trans->count < REGEXP_ALL_COUNTER)) {
2903#ifdef DEBUG_REGEXP_EXEC
2904		    printf("resetting count %d on transition\n",
2905		           trans->count);
2906#endif
2907		    exec->counts[trans->count] = 0;
2908		}
2909#ifdef DEBUG_REGEXP_EXEC
2910		printf("entering state %d\n", trans->to);
2911#endif
2912		exec->state = comp->states[trans->to];
2913		exec->transno = 0;
2914		if (trans->atom != NULL) {
2915		    exec->index += len;
2916		}
2917		goto progress;
2918	    } else if (ret < 0) {
2919		exec->status = -4;
2920		break;
2921	    }
2922	}
2923	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2924rollback:
2925	    /*
2926	     * Failed to find a way out
2927	     */
2928	    exec->determinist = 0;
2929#ifdef DEBUG_REGEXP_EXEC
2930	    printf("rollback from state %d on %d:%c\n", exec->state->no,
2931	           codepoint,codepoint);
2932#endif
2933	    xmlFARegExecRollBack(exec);
2934	}
2935progress:
2936	continue;
2937    }
2938    if (exec->rollbacks != NULL) {
2939	if (exec->counts != NULL) {
2940	    int i;
2941
2942	    for (i = 0;i < exec->maxRollbacks;i++)
2943		if (exec->rollbacks[i].counts != NULL)
2944		    xmlFree(exec->rollbacks[i].counts);
2945	}
2946	xmlFree(exec->rollbacks);
2947    }
2948    if (exec->counts != NULL)
2949	xmlFree(exec->counts);
2950    if (exec->status == 0)
2951	return(1);
2952    if (exec->status == -1) {
2953	if (exec->nbPush > MAX_PUSH)
2954	    return(-1);
2955	return(0);
2956    }
2957    return(exec->status);
2958}
2959
2960/************************************************************************
2961 * 									*
2962 *	Progressive interface to the verifier one atom at a time	*
2963 * 									*
2964 ************************************************************************/
2965#ifdef DEBUG_ERR
2966static void testerr(xmlRegExecCtxtPtr exec);
2967#endif
2968
2969/**
2970 * xmlRegNewExecCtxt:
2971 * @comp: a precompiled regular expression
2972 * @callback: a callback function used for handling progresses in the
2973 *            automata matching phase
2974 * @data: the context data associated to the callback in this context
2975 *
2976 * Build a context used for progressive evaluation of a regexp.
2977 *
2978 * Returns the new context
2979 */
2980xmlRegExecCtxtPtr
2981xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
2982    xmlRegExecCtxtPtr exec;
2983
2984    if (comp == NULL)
2985	return(NULL);
2986    if ((comp->compact == NULL) && (comp->states == NULL))
2987        return(NULL);
2988    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
2989    if (exec == NULL) {
2990	xmlRegexpErrMemory(NULL, "creating execution context");
2991	return(NULL);
2992    }
2993    memset(exec, 0, sizeof(xmlRegExecCtxt));
2994    exec->inputString = NULL;
2995    exec->index = 0;
2996    exec->determinist = 1;
2997    exec->maxRollbacks = 0;
2998    exec->nbRollbacks = 0;
2999    exec->rollbacks = NULL;
3000    exec->status = 0;
3001    exec->comp = comp;
3002    if (comp->compact == NULL)
3003	exec->state = comp->states[0];
3004    exec->transno = 0;
3005    exec->transcount = 0;
3006    exec->callback = callback;
3007    exec->data = data;
3008    if (comp->nbCounters > 0) {
3009        /*
3010	 * For error handling, exec->counts is allocated twice the size
3011	 * the second half is used to store the data in case of rollback
3012	 */
3013	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
3014	                                 * 2);
3015	if (exec->counts == NULL) {
3016	    xmlRegexpErrMemory(NULL, "creating execution context");
3017	    xmlFree(exec);
3018	    return(NULL);
3019	}
3020        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
3021	exec->errCounts = &exec->counts[comp->nbCounters];
3022    } else {
3023	exec->counts = NULL;
3024	exec->errCounts = NULL;
3025    }
3026    exec->inputStackMax = 0;
3027    exec->inputStackNr = 0;
3028    exec->inputStack = NULL;
3029    exec->errStateNo = -1;
3030    exec->errString = NULL;
3031    exec->nbPush = 0;
3032    return(exec);
3033}
3034
3035/**
3036 * xmlRegFreeExecCtxt:
3037 * @exec: a regular expression evaulation context
3038 *
3039 * Free the structures associated to a regular expression evaulation context.
3040 */
3041void
3042xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
3043    if (exec == NULL)
3044	return;
3045
3046    if (exec->rollbacks != NULL) {
3047	if (exec->counts != NULL) {
3048	    int i;
3049
3050	    for (i = 0;i < exec->maxRollbacks;i++)
3051		if (exec->rollbacks[i].counts != NULL)
3052		    xmlFree(exec->rollbacks[i].counts);
3053	}
3054	xmlFree(exec->rollbacks);
3055    }
3056    if (exec->counts != NULL)
3057	xmlFree(exec->counts);
3058    if (exec->inputStack != NULL) {
3059	int i;
3060
3061	for (i = 0;i < exec->inputStackNr;i++) {
3062	    if (exec->inputStack[i].value != NULL)
3063		xmlFree(exec->inputStack[i].value);
3064	}
3065	xmlFree(exec->inputStack);
3066    }
3067    if (exec->errString != NULL)
3068        xmlFree(exec->errString);
3069    xmlFree(exec);
3070}
3071
3072static void
3073xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3074	                    void *data) {
3075#ifdef DEBUG_PUSH
3076    printf("saving value: %d:%s\n", exec->inputStackNr, value);
3077#endif
3078    if (exec->inputStackMax == 0) {
3079	exec->inputStackMax = 4;
3080	exec->inputStack = (xmlRegInputTokenPtr)
3081	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
3082	if (exec->inputStack == NULL) {
3083	    xmlRegexpErrMemory(NULL, "pushing input string");
3084	    exec->inputStackMax = 0;
3085	    return;
3086	}
3087    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
3088	xmlRegInputTokenPtr tmp;
3089
3090	exec->inputStackMax *= 2;
3091	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
3092			exec->inputStackMax * sizeof(xmlRegInputToken));
3093	if (tmp == NULL) {
3094	    xmlRegexpErrMemory(NULL, "pushing input string");
3095	    exec->inputStackMax /= 2;
3096	    return;
3097	}
3098	exec->inputStack = tmp;
3099    }
3100    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
3101    exec->inputStack[exec->inputStackNr].data = data;
3102    exec->inputStackNr++;
3103    exec->inputStack[exec->inputStackNr].value = NULL;
3104    exec->inputStack[exec->inputStackNr].data = NULL;
3105}
3106
3107/**
3108 * xmlRegStrEqualWildcard:
3109 * @expStr:  the string to be evaluated
3110 * @valStr:  the validation string
3111 *
3112 * Checks if both strings are equal or have the same content. "*"
3113 * can be used as a wildcard in @valStr; "|" is used as a seperator of
3114 * substrings in both @expStr and @valStr.
3115 *
3116 * Returns 1 if the comparison is satisfied and the number of substrings
3117 * is equal, 0 otherwise.
3118 */
3119
3120static int
3121xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
3122    if (expStr == valStr) return(1);
3123    if (expStr == NULL) return(0);
3124    if (valStr == NULL) return(0);
3125    do {
3126	/*
3127	* Eval if we have a wildcard for the current item.
3128	*/
3129        if (*expStr != *valStr) {
3130	    /* if one of them starts with a wildcard make valStr be it */
3131	    if (*valStr == '*') {
3132	        const xmlChar *tmp;
3133
3134		tmp = valStr;
3135		valStr = expStr;
3136		expStr = tmp;
3137	    }
3138	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
3139		do {
3140		    if (*valStr == XML_REG_STRING_SEPARATOR)
3141			break;
3142		    valStr++;
3143		} while (*valStr != 0);
3144		continue;
3145	    } else
3146		return(0);
3147	}
3148	expStr++;
3149	valStr++;
3150    } while (*valStr != 0);
3151    if (*expStr != 0)
3152	return (0);
3153    else
3154	return (1);
3155}
3156
3157/**
3158 * xmlRegCompactPushString:
3159 * @exec: a regexp execution context
3160 * @comp:  the precompiled exec with a compact table
3161 * @value: a string token input
3162 * @data: data associated to the token to reuse in callbacks
3163 *
3164 * Push one input token in the execution context
3165 *
3166 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3167 *     a negative value in case of error.
3168 */
3169static int
3170xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
3171	                xmlRegexpPtr comp,
3172	                const xmlChar *value,
3173	                void *data) {
3174    int state = exec->index;
3175    int i, target;
3176
3177    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
3178	return(-1);
3179
3180    if (value == NULL) {
3181	/*
3182	 * are we at a final state ?
3183	 */
3184	if (comp->compact[state * (comp->nbstrings + 1)] ==
3185            XML_REGEXP_FINAL_STATE)
3186	    return(1);
3187	return(0);
3188    }
3189
3190#ifdef DEBUG_PUSH
3191    printf("value pushed: %s\n", value);
3192#endif
3193
3194    /*
3195     * Examine all outside transitions from current state
3196     */
3197    for (i = 0;i < comp->nbstrings;i++) {
3198	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3199	if ((target > 0) && (target <= comp->nbstates)) {
3200	    target--; /* to avoid 0 */
3201	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
3202		exec->index = target;
3203		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
3204		    exec->callback(exec->data, value,
3205			  comp->transdata[state * comp->nbstrings + i], data);
3206		}
3207#ifdef DEBUG_PUSH
3208		printf("entering state %d\n", target);
3209#endif
3210		if (comp->compact[target * (comp->nbstrings + 1)] ==
3211		    XML_REGEXP_SINK_STATE)
3212		    goto error;
3213
3214		if (comp->compact[target * (comp->nbstrings + 1)] ==
3215		    XML_REGEXP_FINAL_STATE)
3216		    return(1);
3217		return(0);
3218	    }
3219	}
3220    }
3221    /*
3222     * Failed to find an exit transition out from current state for the
3223     * current token
3224     */
3225#ifdef DEBUG_PUSH
3226    printf("failed to find a transition for %s on state %d\n", value, state);
3227#endif
3228error:
3229    if (exec->errString != NULL)
3230        xmlFree(exec->errString);
3231    exec->errString = xmlStrdup(value);
3232    exec->errStateNo = state;
3233    exec->status = -1;
3234#ifdef DEBUG_ERR
3235    testerr(exec);
3236#endif
3237    return(-1);
3238}
3239
3240/**
3241 * xmlRegExecPushStringInternal:
3242 * @exec: a regexp execution context or NULL to indicate the end
3243 * @value: a string token input
3244 * @data: data associated to the token to reuse in callbacks
3245 * @compound: value was assembled from 2 strings
3246 *
3247 * Push one input token in the execution context
3248 *
3249 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3250 *     a negative value in case of error.
3251 */
3252static int
3253xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
3254	                     void *data, int compound) {
3255    xmlRegTransPtr trans;
3256    xmlRegAtomPtr atom;
3257    int ret;
3258    int final = 0;
3259    int progress = 1;
3260
3261    if (exec == NULL)
3262	return(-1);
3263    if (exec->comp == NULL)
3264	return(-1);
3265    if (exec->status != 0)
3266	return(exec->status);
3267
3268    if (exec->comp->compact != NULL)
3269	return(xmlRegCompactPushString(exec, exec->comp, value, data));
3270
3271    if (value == NULL) {
3272        if (exec->state->type == XML_REGEXP_FINAL_STATE)
3273	    return(1);
3274	final = 1;
3275    }
3276
3277#ifdef DEBUG_PUSH
3278    printf("value pushed: %s\n", value);
3279#endif
3280    /*
3281     * If we have an active rollback stack push the new value there
3282     * and get back to where we were left
3283     */
3284    if ((value != NULL) && (exec->inputStackNr > 0)) {
3285	xmlFARegExecSaveInputString(exec, value, data);
3286	value = exec->inputStack[exec->index].value;
3287	data = exec->inputStack[exec->index].data;
3288#ifdef DEBUG_PUSH
3289	printf("value loaded: %s\n", value);
3290#endif
3291    }
3292
3293    while ((exec->status == 0) &&
3294	   ((value != NULL) ||
3295	    ((final == 1) &&
3296	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
3297
3298	/*
3299	 * End of input on non-terminal state, rollback, however we may
3300	 * still have epsilon like transition for counted transitions
3301	 * on counters, in that case don't break too early.
3302	 */
3303	if ((value == NULL) && (exec->counts == NULL))
3304	    goto rollback;
3305
3306	exec->transcount = 0;
3307	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3308	    trans = &exec->state->trans[exec->transno];
3309	    if (trans->to < 0)
3310		continue;
3311	    atom = trans->atom;
3312	    ret = 0;
3313	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3314		int i;
3315		int count;
3316		xmlRegTransPtr t;
3317		xmlRegCounterPtr counter;
3318
3319		ret = 0;
3320
3321#ifdef DEBUG_PUSH
3322		printf("testing all lax %d\n", trans->count);
3323#endif
3324		/*
3325		 * Check all counted transitions from the current state
3326		 */
3327		if ((value == NULL) && (final)) {
3328		    ret = 1;
3329		} else if (value != NULL) {
3330		    for (i = 0;i < exec->state->nbTrans;i++) {
3331			t = &exec->state->trans[i];
3332			if ((t->counter < 0) || (t == trans))
3333			    continue;
3334			counter = &exec->comp->counters[t->counter];
3335			count = exec->counts[t->counter];
3336			if ((count < counter->max) &&
3337		            (t->atom != NULL) &&
3338			    (xmlStrEqual(value, t->atom->valuep))) {
3339			    ret = 0;
3340			    break;
3341			}
3342			if ((count >= counter->min) &&
3343			    (count < counter->max) &&
3344			    (xmlStrEqual(value, t->atom->valuep))) {
3345			    ret = 1;
3346			    break;
3347			}
3348		    }
3349		}
3350	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3351		int i;
3352		int count;
3353		xmlRegTransPtr t;
3354		xmlRegCounterPtr counter;
3355
3356		ret = 1;
3357
3358#ifdef DEBUG_PUSH
3359		printf("testing all %d\n", trans->count);
3360#endif
3361		/*
3362		 * Check all counted transitions from the current state
3363		 */
3364		for (i = 0;i < exec->state->nbTrans;i++) {
3365                    t = &exec->state->trans[i];
3366		    if ((t->counter < 0) || (t == trans))
3367			continue;
3368                    counter = &exec->comp->counters[t->counter];
3369		    count = exec->counts[t->counter];
3370		    if ((count < counter->min) || (count > counter->max)) {
3371			ret = 0;
3372			break;
3373		    }
3374		}
3375	    } else if (trans->count >= 0) {
3376		int count;
3377		xmlRegCounterPtr counter;
3378
3379		/*
3380		 * A counted transition.
3381		 */
3382
3383		count = exec->counts[trans->count];
3384		counter = &exec->comp->counters[trans->count];
3385#ifdef DEBUG_PUSH
3386		printf("testing count %d: val %d, min %d, max %d\n",
3387		       trans->count, count, counter->min,  counter->max);
3388#endif
3389		ret = ((count >= counter->min) && (count <= counter->max));
3390	    } else if (atom == NULL) {
3391		fprintf(stderr, "epsilon transition left at runtime\n");
3392		exec->status = -2;
3393		break;
3394	    } else if (value != NULL) {
3395		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3396		if (atom->neg) {
3397		    ret = !ret;
3398		    if (!compound)
3399		        ret = 0;
3400		}
3401		if ((ret == 1) && (trans->counter >= 0)) {
3402		    xmlRegCounterPtr counter;
3403		    int count;
3404
3405		    count = exec->counts[trans->counter];
3406		    counter = &exec->comp->counters[trans->counter];
3407		    if (count >= counter->max)
3408			ret = 0;
3409		}
3410
3411		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3412		    xmlRegStatePtr to = exec->comp->states[trans->to];
3413
3414		    /*
3415		     * this is a multiple input sequence
3416		     */
3417		    if (exec->state->nbTrans > exec->transno + 1) {
3418			if (exec->inputStackNr <= 0) {
3419			    xmlFARegExecSaveInputString(exec, value, data);
3420			}
3421			xmlFARegExecSave(exec);
3422		    }
3423		    exec->transcount = 1;
3424		    do {
3425			/*
3426			 * Try to progress as much as possible on the input
3427			 */
3428			if (exec->transcount == atom->max) {
3429			    break;
3430			}
3431			exec->index++;
3432			value = exec->inputStack[exec->index].value;
3433			data = exec->inputStack[exec->index].data;
3434#ifdef DEBUG_PUSH
3435			printf("value loaded: %s\n", value);
3436#endif
3437
3438			/*
3439			 * End of input: stop here
3440			 */
3441			if (value == NULL) {
3442			    exec->index --;
3443			    break;
3444			}
3445			if (exec->transcount >= atom->min) {
3446			    int transno = exec->transno;
3447			    xmlRegStatePtr state = exec->state;
3448
3449			    /*
3450			     * The transition is acceptable save it
3451			     */
3452			    exec->transno = -1; /* trick */
3453			    exec->state = to;
3454			    if (exec->inputStackNr <= 0) {
3455				xmlFARegExecSaveInputString(exec, value, data);
3456			    }
3457			    xmlFARegExecSave(exec);
3458			    exec->transno = transno;
3459			    exec->state = state;
3460			}
3461			ret = xmlStrEqual(value, atom->valuep);
3462			exec->transcount++;
3463		    } while (ret == 1);
3464		    if (exec->transcount < atom->min)
3465			ret = 0;
3466
3467		    /*
3468		     * If the last check failed but one transition was found
3469		     * possible, rollback
3470		     */
3471		    if (ret < 0)
3472			ret = 0;
3473		    if (ret == 0) {
3474			goto rollback;
3475		    }
3476		}
3477	    }
3478	    if (ret == 1) {
3479		if ((exec->callback != NULL) && (atom != NULL) &&
3480			(data != NULL)) {
3481		    exec->callback(exec->data, atom->valuep,
3482			           atom->data, data);
3483		}
3484		if (exec->state->nbTrans > exec->transno + 1) {
3485		    if (exec->inputStackNr <= 0) {
3486			xmlFARegExecSaveInputString(exec, value, data);
3487		    }
3488		    xmlFARegExecSave(exec);
3489		}
3490		if (trans->counter >= 0) {
3491#ifdef DEBUG_PUSH
3492		    printf("Increasing count %d\n", trans->counter);
3493#endif
3494		    exec->counts[trans->counter]++;
3495		}
3496		if ((trans->count >= 0) &&
3497		    (trans->count < REGEXP_ALL_COUNTER)) {
3498#ifdef DEBUG_REGEXP_EXEC
3499		    printf("resetting count %d on transition\n",
3500		           trans->count);
3501#endif
3502		    exec->counts[trans->count] = 0;
3503		}
3504#ifdef DEBUG_PUSH
3505		printf("entering state %d\n", trans->to);
3506#endif
3507                if ((exec->comp->states[trans->to] != NULL) &&
3508		    (exec->comp->states[trans->to]->type ==
3509		     XML_REGEXP_SINK_STATE)) {
3510		    /*
3511		     * entering a sink state, save the current state as error
3512		     * state.
3513		     */
3514		    if (exec->errString != NULL)
3515			xmlFree(exec->errString);
3516		    exec->errString = xmlStrdup(value);
3517		    exec->errState = exec->state;
3518		    memcpy(exec->errCounts, exec->counts,
3519			   exec->comp->nbCounters * sizeof(int));
3520		}
3521		exec->state = exec->comp->states[trans->to];
3522		exec->transno = 0;
3523		if (trans->atom != NULL) {
3524		    if (exec->inputStack != NULL) {
3525			exec->index++;
3526			if (exec->index < exec->inputStackNr) {
3527			    value = exec->inputStack[exec->index].value;
3528			    data = exec->inputStack[exec->index].data;
3529#ifdef DEBUG_PUSH
3530			    printf("value loaded: %s\n", value);
3531#endif
3532			} else {
3533			    value = NULL;
3534			    data = NULL;
3535#ifdef DEBUG_PUSH
3536			    printf("end of input\n");
3537#endif
3538			}
3539		    } else {
3540			value = NULL;
3541			data = NULL;
3542#ifdef DEBUG_PUSH
3543			printf("end of input\n");
3544#endif
3545		    }
3546		}
3547		goto progress;
3548	    } else if (ret < 0) {
3549		exec->status = -4;
3550		break;
3551	    }
3552	}
3553	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3554rollback:
3555            /*
3556	     * if we didn't yet rollback on the current input
3557	     * store the current state as the error state.
3558	     */
3559	    if ((progress) && (exec->state != NULL) &&
3560	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
3561	        progress = 0;
3562		if (exec->errString != NULL)
3563		    xmlFree(exec->errString);
3564		exec->errString = xmlStrdup(value);
3565		exec->errState = exec->state;
3566		memcpy(exec->errCounts, exec->counts,
3567		       exec->comp->nbCounters * sizeof(int));
3568	    }
3569
3570	    /*
3571	     * Failed to find a way out
3572	     */
3573	    exec->determinist = 0;
3574	    xmlFARegExecRollBack(exec);
3575	    if (exec->status == 0) {
3576		value = exec->inputStack[exec->index].value;
3577		data = exec->inputStack[exec->index].data;
3578#ifdef DEBUG_PUSH
3579		printf("value loaded: %s\n", value);
3580#endif
3581	    }
3582	}
3583	continue;
3584progress:
3585        progress = 1;
3586	continue;
3587    }
3588    if (exec->status == 0) {
3589        return(exec->state->type == XML_REGEXP_FINAL_STATE);
3590    }
3591#ifdef DEBUG_ERR
3592    if (exec->status < 0) {
3593	testerr(exec);
3594    }
3595#endif
3596    return(exec->status);
3597}
3598
3599/**
3600 * xmlRegExecPushString:
3601 * @exec: a regexp execution context or NULL to indicate the end
3602 * @value: a string token input
3603 * @data: data associated to the token to reuse in callbacks
3604 *
3605 * Push one input token in the execution context
3606 *
3607 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3608 *     a negative value in case of error.
3609 */
3610int
3611xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3612	             void *data) {
3613    return(xmlRegExecPushStringInternal(exec, value, data, 0));
3614}
3615
3616/**
3617 * xmlRegExecPushString2:
3618 * @exec: a regexp execution context or NULL to indicate the end
3619 * @value: the first string token input
3620 * @value2: the second string token input
3621 * @data: data associated to the token to reuse in callbacks
3622 *
3623 * Push one input token in the execution context
3624 *
3625 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3626 *     a negative value in case of error.
3627 */
3628int
3629xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
3630                      const xmlChar *value2, void *data) {
3631    xmlChar buf[150];
3632    int lenn, lenp, ret;
3633    xmlChar *str;
3634
3635    if (exec == NULL)
3636	return(-1);
3637    if (exec->comp == NULL)
3638	return(-1);
3639    if (exec->status != 0)
3640	return(exec->status);
3641
3642    if (value2 == NULL)
3643        return(xmlRegExecPushString(exec, value, data));
3644
3645    lenn = strlen((char *) value2);
3646    lenp = strlen((char *) value);
3647
3648    if (150 < lenn + lenp + 2) {
3649	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
3650	if (str == NULL) {
3651	    exec->status = -1;
3652	    return(-1);
3653	}
3654    } else {
3655	str = buf;
3656    }
3657    memcpy(&str[0], value, lenp);
3658    str[lenp] = XML_REG_STRING_SEPARATOR;
3659    memcpy(&str[lenp + 1], value2, lenn);
3660    str[lenn + lenp + 1] = 0;
3661
3662    if (exec->comp->compact != NULL)
3663	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
3664    else
3665        ret = xmlRegExecPushStringInternal(exec, str, data, 1);
3666
3667    if (str != buf)
3668        xmlFree(buf);
3669    return(ret);
3670}
3671
3672/**
3673 * xmlRegExecGetValues:
3674 * @exec: a regexp execution context
3675 * @err: error extraction or normal one
3676 * @nbval: pointer to the number of accepted values IN/OUT
3677 * @nbneg: return number of negative transitions
3678 * @values: pointer to the array of acceptable values
3679 * @terminal: return value if this was a terminal state
3680 *
3681 * Extract informations from the regexp execution, internal routine to
3682 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
3683 *
3684 * Returns: 0 in case of success or -1 in case of error.
3685 */
3686static int
3687xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
3688                    int *nbval, int *nbneg,
3689		    xmlChar **values, int *terminal) {
3690    int maxval;
3691    int nb = 0;
3692
3693    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
3694        (values == NULL) || (*nbval <= 0))
3695        return(-1);
3696
3697    maxval = *nbval;
3698    *nbval = 0;
3699    *nbneg = 0;
3700    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
3701        xmlRegexpPtr comp;
3702	int target, i, state;
3703
3704        comp = exec->comp;
3705
3706	if (err) {
3707	    if (exec->errStateNo == -1) return(-1);
3708	    state = exec->errStateNo;
3709	} else {
3710	    state = exec->index;
3711	}
3712	if (terminal != NULL) {
3713	    if (comp->compact[state * (comp->nbstrings + 1)] ==
3714	        XML_REGEXP_FINAL_STATE)
3715		*terminal = 1;
3716	    else
3717		*terminal = 0;
3718	}
3719	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3720	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3721	    if ((target > 0) && (target <= comp->nbstates) &&
3722	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
3723		 XML_REGEXP_SINK_STATE)) {
3724	        values[nb++] = comp->stringMap[i];
3725		(*nbval)++;
3726	    }
3727	}
3728	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3729	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3730	    if ((target > 0) && (target <= comp->nbstates) &&
3731	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
3732		 XML_REGEXP_SINK_STATE)) {
3733	        values[nb++] = comp->stringMap[i];
3734		(*nbneg)++;
3735	    }
3736	}
3737    } else {
3738        int transno;
3739	xmlRegTransPtr trans;
3740	xmlRegAtomPtr atom;
3741	xmlRegStatePtr state;
3742
3743	if (terminal != NULL) {
3744	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
3745		*terminal = 1;
3746	    else
3747		*terminal = 0;
3748	}
3749
3750	if (err) {
3751	    if (exec->errState == NULL) return(-1);
3752	    state = exec->errState;
3753	} else {
3754	    if (exec->state == NULL) return(-1);
3755	    state = exec->state;
3756	}
3757	for (transno = 0;
3758	     (transno < state->nbTrans) && (nb < maxval);
3759	     transno++) {
3760	    trans = &state->trans[transno];
3761	    if (trans->to < 0)
3762		continue;
3763	    atom = trans->atom;
3764	    if ((atom == NULL) || (atom->valuep == NULL))
3765		continue;
3766	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3767	        /* this should not be reached but ... */
3768	        TODO;
3769	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3770	        /* this should not be reached but ... */
3771	        TODO;
3772	    } else if (trans->counter >= 0) {
3773		xmlRegCounterPtr counter;
3774		int count;
3775
3776		if (err)
3777		    count = exec->errCounts[trans->counter];
3778		else
3779		    count = exec->counts[trans->counter];
3780		counter = &exec->comp->counters[trans->counter];
3781		if (count < counter->max) {
3782		    if (atom->neg)
3783			values[nb++] = (xmlChar *) atom->valuep2;
3784		    else
3785			values[nb++] = (xmlChar *) atom->valuep;
3786		    (*nbval)++;
3787		}
3788	    } else {
3789                if ((exec->comp->states[trans->to] != NULL) &&
3790		    (exec->comp->states[trans->to]->type !=
3791		     XML_REGEXP_SINK_STATE)) {
3792		    if (atom->neg)
3793			values[nb++] = (xmlChar *) atom->valuep2;
3794		    else
3795			values[nb++] = (xmlChar *) atom->valuep;
3796		    (*nbval)++;
3797		}
3798	    }
3799	}
3800	for (transno = 0;
3801	     (transno < state->nbTrans) && (nb < maxval);
3802	     transno++) {
3803	    trans = &state->trans[transno];
3804	    if (trans->to < 0)
3805		continue;
3806	    atom = trans->atom;
3807	    if ((atom == NULL) || (atom->valuep == NULL))
3808		continue;
3809	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3810	        continue;
3811	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3812	        continue;
3813	    } else if (trans->counter >= 0) {
3814	        continue;
3815	    } else {
3816                if ((exec->comp->states[trans->to] != NULL) &&
3817		    (exec->comp->states[trans->to]->type ==
3818		     XML_REGEXP_SINK_STATE)) {
3819		    if (atom->neg)
3820			values[nb++] = (xmlChar *) atom->valuep2;
3821		    else
3822			values[nb++] = (xmlChar *) atom->valuep;
3823		    (*nbneg)++;
3824		}
3825	    }
3826	}
3827    }
3828    return(0);
3829}
3830
3831/**
3832 * xmlRegExecNextValues:
3833 * @exec: a regexp execution context
3834 * @nbval: pointer to the number of accepted values IN/OUT
3835 * @nbneg: return number of negative transitions
3836 * @values: pointer to the array of acceptable values
3837 * @terminal: return value if this was a terminal state
3838 *
3839 * Extract informations from the regexp execution,
3840 * the parameter @values must point to an array of @nbval string pointers
3841 * on return nbval will contain the number of possible strings in that
3842 * state and the @values array will be updated with them. The string values
3843 * returned will be freed with the @exec context and don't need to be
3844 * deallocated.
3845 *
3846 * Returns: 0 in case of success or -1 in case of error.
3847 */
3848int
3849xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
3850                     xmlChar **values, int *terminal) {
3851    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
3852}
3853
3854/**
3855 * xmlRegExecErrInfo:
3856 * @exec: a regexp execution context generating an error
3857 * @string: return value for the error string
3858 * @nbval: pointer to the number of accepted values IN/OUT
3859 * @nbneg: return number of negative transitions
3860 * @values: pointer to the array of acceptable values
3861 * @terminal: return value if this was a terminal state
3862 *
3863 * Extract error informations from the regexp execution, the parameter
3864 * @string will be updated with the value pushed and not accepted,
3865 * the parameter @values must point to an array of @nbval string pointers
3866 * on return nbval will contain the number of possible strings in that
3867 * state and the @values array will be updated with them. The string values
3868 * returned will be freed with the @exec context and don't need to be
3869 * deallocated.
3870 *
3871 * Returns: 0 in case of success or -1 in case of error.
3872 */
3873int
3874xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
3875                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
3876    if (exec == NULL)
3877        return(-1);
3878    if (string != NULL) {
3879        if (exec->status != 0)
3880	    *string = exec->errString;
3881	else
3882	    *string = NULL;
3883    }
3884    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
3885}
3886
3887#ifdef DEBUG_ERR
3888static void testerr(xmlRegExecCtxtPtr exec) {
3889    const xmlChar *string;
3890    xmlChar *values[5];
3891    int nb = 5;
3892    int nbneg;
3893    int terminal;
3894    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
3895}
3896#endif
3897
3898#if 0
3899static int
3900xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
3901    xmlRegTransPtr trans;
3902    xmlRegAtomPtr atom;
3903    int ret;
3904    int codepoint, len;
3905
3906    if (exec == NULL)
3907	return(-1);
3908    if (exec->status != 0)
3909	return(exec->status);
3910
3911    while ((exec->status == 0) &&
3912	   ((exec->inputString[exec->index] != 0) ||
3913	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
3914
3915	/*
3916	 * End of input on non-terminal state, rollback, however we may
3917	 * still have epsilon like transition for counted transitions
3918	 * on counters, in that case don't break too early.
3919	 */
3920	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
3921	    goto rollback;
3922
3923	exec->transcount = 0;
3924	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3925	    trans = &exec->state->trans[exec->transno];
3926	    if (trans->to < 0)
3927		continue;
3928	    atom = trans->atom;
3929	    ret = 0;
3930	    if (trans->count >= 0) {
3931		int count;
3932		xmlRegCounterPtr counter;
3933
3934		/*
3935		 * A counted transition.
3936		 */
3937
3938		count = exec->counts[trans->count];
3939		counter = &exec->comp->counters[trans->count];
3940#ifdef DEBUG_REGEXP_EXEC
3941		printf("testing count %d: val %d, min %d, max %d\n",
3942		       trans->count, count, counter->min,  counter->max);
3943#endif
3944		ret = ((count >= counter->min) && (count <= counter->max));
3945	    } else if (atom == NULL) {
3946		fprintf(stderr, "epsilon transition left at runtime\n");
3947		exec->status = -2;
3948		break;
3949	    } else if (exec->inputString[exec->index] != 0) {
3950                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3951		ret = xmlRegCheckCharacter(atom, codepoint);
3952		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3953		    xmlRegStatePtr to = exec->comp->states[trans->to];
3954
3955		    /*
3956		     * this is a multiple input sequence
3957		     */
3958		    if (exec->state->nbTrans > exec->transno + 1) {
3959			xmlFARegExecSave(exec);
3960		    }
3961		    exec->transcount = 1;
3962		    do {
3963			/*
3964			 * Try to progress as much as possible on the input
3965			 */
3966			if (exec->transcount == atom->max) {
3967			    break;
3968			}
3969			exec->index += len;
3970			/*
3971			 * End of input: stop here
3972			 */
3973			if (exec->inputString[exec->index] == 0) {
3974			    exec->index -= len;
3975			    break;
3976			}
3977			if (exec->transcount >= atom->min) {
3978			    int transno = exec->transno;
3979			    xmlRegStatePtr state = exec->state;
3980
3981			    /*
3982			     * The transition is acceptable save it
3983			     */
3984			    exec->transno = -1; /* trick */
3985			    exec->state = to;
3986			    xmlFARegExecSave(exec);
3987			    exec->transno = transno;
3988			    exec->state = state;
3989			}
3990			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3991				              len);
3992			ret = xmlRegCheckCharacter(atom, codepoint);
3993			exec->transcount++;
3994		    } while (ret == 1);
3995		    if (exec->transcount < atom->min)
3996			ret = 0;
3997
3998		    /*
3999		     * If the last check failed but one transition was found
4000		     * possible, rollback
4001		     */
4002		    if (ret < 0)
4003			ret = 0;
4004		    if (ret == 0) {
4005			goto rollback;
4006		    }
4007		}
4008	    }
4009	    if (ret == 1) {
4010		if (exec->state->nbTrans > exec->transno + 1) {
4011		    xmlFARegExecSave(exec);
4012		}
4013		if (trans->counter >= 0) {
4014#ifdef DEBUG_REGEXP_EXEC
4015		    printf("Increasing count %d\n", trans->counter);
4016#endif
4017		    exec->counts[trans->counter]++;
4018		}
4019#ifdef DEBUG_REGEXP_EXEC
4020		printf("entering state %d\n", trans->to);
4021#endif
4022		exec->state = exec->comp->states[trans->to];
4023		exec->transno = 0;
4024		if (trans->atom != NULL) {
4025		    exec->index += len;
4026		}
4027		goto progress;
4028	    } else if (ret < 0) {
4029		exec->status = -4;
4030		break;
4031	    }
4032	}
4033	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
4034rollback:
4035	    /*
4036	     * Failed to find a way out
4037	     */
4038	    exec->determinist = 0;
4039	    xmlFARegExecRollBack(exec);
4040	}
4041progress:
4042	continue;
4043    }
4044}
4045#endif
4046/************************************************************************
4047 * 									*
4048 *	Parser for the Schemas Datatype Regular Expressions		*
4049 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
4050 * 									*
4051 ************************************************************************/
4052
4053/**
4054 * xmlFAIsChar:
4055 * @ctxt:  a regexp parser context
4056 *
4057 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
4058 */
4059static int
4060xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
4061    int cur;
4062    int len;
4063
4064    cur = CUR_SCHAR(ctxt->cur, len);
4065    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
4066	(cur == '*') || (cur == '+') || (cur == '(') ||
4067	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
4068	(cur == 0x5D) || (cur == 0))
4069	return(-1);
4070    return(cur);
4071}
4072
4073/**
4074 * xmlFAParseCharProp:
4075 * @ctxt:  a regexp parser context
4076 *
4077 * [27]   charProp   ::=   IsCategory | IsBlock
4078 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
4079 *                       Separators | Symbols | Others
4080 * [29]   Letters   ::=   'L' [ultmo]?
4081 * [30]   Marks   ::=   'M' [nce]?
4082 * [31]   Numbers   ::=   'N' [dlo]?
4083 * [32]   Punctuation   ::=   'P' [cdseifo]?
4084 * [33]   Separators   ::=   'Z' [slp]?
4085 * [34]   Symbols   ::=   'S' [mcko]?
4086 * [35]   Others   ::=   'C' [cfon]?
4087 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
4088 */
4089static void
4090xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
4091    int cur;
4092    xmlRegAtomType type = (xmlRegAtomType) 0;
4093    xmlChar *blockName = NULL;
4094
4095    cur = CUR;
4096    if (cur == 'L') {
4097	NEXT;
4098	cur = CUR;
4099	if (cur == 'u') {
4100	    NEXT;
4101	    type = XML_REGEXP_LETTER_UPPERCASE;
4102	} else if (cur == 'l') {
4103	    NEXT;
4104	    type = XML_REGEXP_LETTER_LOWERCASE;
4105	} else if (cur == 't') {
4106	    NEXT;
4107	    type = XML_REGEXP_LETTER_TITLECASE;
4108	} else if (cur == 'm') {
4109	    NEXT;
4110	    type = XML_REGEXP_LETTER_MODIFIER;
4111	} else if (cur == 'o') {
4112	    NEXT;
4113	    type = XML_REGEXP_LETTER_OTHERS;
4114	} else {
4115	    type = XML_REGEXP_LETTER;
4116	}
4117    } else if (cur == 'M') {
4118	NEXT;
4119	cur = CUR;
4120	if (cur == 'n') {
4121	    NEXT;
4122	    /* nonspacing */
4123	    type = XML_REGEXP_MARK_NONSPACING;
4124	} else if (cur == 'c') {
4125	    NEXT;
4126	    /* spacing combining */
4127	    type = XML_REGEXP_MARK_SPACECOMBINING;
4128	} else if (cur == 'e') {
4129	    NEXT;
4130	    /* enclosing */
4131	    type = XML_REGEXP_MARK_ENCLOSING;
4132	} else {
4133	    /* all marks */
4134	    type = XML_REGEXP_MARK;
4135	}
4136    } else if (cur == 'N') {
4137	NEXT;
4138	cur = CUR;
4139	if (cur == 'd') {
4140	    NEXT;
4141	    /* digital */
4142	    type = XML_REGEXP_NUMBER_DECIMAL;
4143	} else if (cur == 'l') {
4144	    NEXT;
4145	    /* letter */
4146	    type = XML_REGEXP_NUMBER_LETTER;
4147	} else if (cur == 'o') {
4148	    NEXT;
4149	    /* other */
4150	    type = XML_REGEXP_NUMBER_OTHERS;
4151	} else {
4152	    /* all numbers */
4153	    type = XML_REGEXP_NUMBER;
4154	}
4155    } else if (cur == 'P') {
4156	NEXT;
4157	cur = CUR;
4158	if (cur == 'c') {
4159	    NEXT;
4160	    /* connector */
4161	    type = XML_REGEXP_PUNCT_CONNECTOR;
4162	} else if (cur == 'd') {
4163	    NEXT;
4164	    /* dash */
4165	    type = XML_REGEXP_PUNCT_DASH;
4166	} else if (cur == 's') {
4167	    NEXT;
4168	    /* open */
4169	    type = XML_REGEXP_PUNCT_OPEN;
4170	} else if (cur == 'e') {
4171	    NEXT;
4172	    /* close */
4173	    type = XML_REGEXP_PUNCT_CLOSE;
4174	} else if (cur == 'i') {
4175	    NEXT;
4176	    /* initial quote */
4177	    type = XML_REGEXP_PUNCT_INITQUOTE;
4178	} else if (cur == 'f') {
4179	    NEXT;
4180	    /* final quote */
4181	    type = XML_REGEXP_PUNCT_FINQUOTE;
4182	} else if (cur == 'o') {
4183	    NEXT;
4184	    /* other */
4185	    type = XML_REGEXP_PUNCT_OTHERS;
4186	} else {
4187	    /* all punctuation */
4188	    type = XML_REGEXP_PUNCT;
4189	}
4190    } else if (cur == 'Z') {
4191	NEXT;
4192	cur = CUR;
4193	if (cur == 's') {
4194	    NEXT;
4195	    /* space */
4196	    type = XML_REGEXP_SEPAR_SPACE;
4197	} else if (cur == 'l') {
4198	    NEXT;
4199	    /* line */
4200	    type = XML_REGEXP_SEPAR_LINE;
4201	} else if (cur == 'p') {
4202	    NEXT;
4203	    /* paragraph */
4204	    type = XML_REGEXP_SEPAR_PARA;
4205	} else {
4206	    /* all separators */
4207	    type = XML_REGEXP_SEPAR;
4208	}
4209    } else if (cur == 'S') {
4210	NEXT;
4211	cur = CUR;
4212	if (cur == 'm') {
4213	    NEXT;
4214	    type = XML_REGEXP_SYMBOL_MATH;
4215	    /* math */
4216	} else if (cur == 'c') {
4217	    NEXT;
4218	    type = XML_REGEXP_SYMBOL_CURRENCY;
4219	    /* currency */
4220	} else if (cur == 'k') {
4221	    NEXT;
4222	    type = XML_REGEXP_SYMBOL_MODIFIER;
4223	    /* modifiers */
4224	} else if (cur == 'o') {
4225	    NEXT;
4226	    type = XML_REGEXP_SYMBOL_OTHERS;
4227	    /* other */
4228	} else {
4229	    /* all symbols */
4230	    type = XML_REGEXP_SYMBOL;
4231	}
4232    } else if (cur == 'C') {
4233	NEXT;
4234	cur = CUR;
4235	if (cur == 'c') {
4236	    NEXT;
4237	    /* control */
4238	    type = XML_REGEXP_OTHER_CONTROL;
4239	} else if (cur == 'f') {
4240	    NEXT;
4241	    /* format */
4242	    type = XML_REGEXP_OTHER_FORMAT;
4243	} else if (cur == 'o') {
4244	    NEXT;
4245	    /* private use */
4246	    type = XML_REGEXP_OTHER_PRIVATE;
4247	} else if (cur == 'n') {
4248	    NEXT;
4249	    /* not assigned */
4250	    type = XML_REGEXP_OTHER_NA;
4251	} else {
4252	    /* all others */
4253	    type = XML_REGEXP_OTHER;
4254	}
4255    } else if (cur == 'I') {
4256	const xmlChar *start;
4257	NEXT;
4258	cur = CUR;
4259	if (cur != 's') {
4260	    ERROR("IsXXXX expected");
4261	    return;
4262	}
4263	NEXT;
4264	start = ctxt->cur;
4265	cur = CUR;
4266	if (((cur >= 'a') && (cur <= 'z')) ||
4267	    ((cur >= 'A') && (cur <= 'Z')) ||
4268	    ((cur >= '0') && (cur <= '9')) ||
4269	    (cur == 0x2D)) {
4270	    NEXT;
4271	    cur = CUR;
4272	    while (((cur >= 'a') && (cur <= 'z')) ||
4273		((cur >= 'A') && (cur <= 'Z')) ||
4274		((cur >= '0') && (cur <= '9')) ||
4275		(cur == 0x2D)) {
4276		NEXT;
4277		cur = CUR;
4278	    }
4279	}
4280	type = XML_REGEXP_BLOCK_NAME;
4281	blockName = xmlStrndup(start, ctxt->cur - start);
4282    } else {
4283	ERROR("Unknown char property");
4284	return;
4285    }
4286    if (ctxt->atom == NULL) {
4287	ctxt->atom = xmlRegNewAtom(ctxt, type);
4288	if (ctxt->atom != NULL)
4289	    ctxt->atom->valuep = blockName;
4290    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4291        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4292		           type, 0, 0, blockName);
4293    }
4294}
4295
4296/**
4297 * xmlFAParseCharClassEsc:
4298 * @ctxt:  a regexp parser context
4299 *
4300 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
4301 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
4302 * [25] catEsc   ::=   '\p{' charProp '}'
4303 * [26] complEsc ::=   '\P{' charProp '}'
4304 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
4305 */
4306static void
4307xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
4308    int cur;
4309
4310    if (CUR == '.') {
4311	if (ctxt->atom == NULL) {
4312	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
4313	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4314	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4315			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
4316	}
4317	NEXT;
4318	return;
4319    }
4320    if (CUR != '\\') {
4321	ERROR("Escaped sequence: expecting \\");
4322	return;
4323    }
4324    NEXT;
4325    cur = CUR;
4326    if (cur == 'p') {
4327	NEXT;
4328	if (CUR != '{') {
4329	    ERROR("Expecting '{'");
4330	    return;
4331	}
4332	NEXT;
4333	xmlFAParseCharProp(ctxt);
4334	if (CUR != '}') {
4335	    ERROR("Expecting '}'");
4336	    return;
4337	}
4338	NEXT;
4339    } else if (cur == 'P') {
4340	NEXT;
4341	if (CUR != '{') {
4342	    ERROR("Expecting '{'");
4343	    return;
4344	}
4345	NEXT;
4346	xmlFAParseCharProp(ctxt);
4347	ctxt->atom->neg = 1;
4348	if (CUR != '}') {
4349	    ERROR("Expecting '}'");
4350	    return;
4351	}
4352	NEXT;
4353    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4354	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4355	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4356	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4357	(cur == 0x5E)) {
4358	if (ctxt->atom == NULL) {
4359	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4360	    if (ctxt->atom != NULL) {
4361	        switch (cur) {
4362		    case 'n':
4363		        ctxt->atom->codepoint = '\n';
4364			break;
4365		    case 'r':
4366		        ctxt->atom->codepoint = '\r';
4367			break;
4368		    case 't':
4369		        ctxt->atom->codepoint = '\t';
4370			break;
4371		    default:
4372			ctxt->atom->codepoint = cur;
4373		}
4374	    }
4375	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4376	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4377			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4378	}
4379	NEXT;
4380    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4381	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4382	(cur == 'w') || (cur == 'W')) {
4383	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4384
4385	switch (cur) {
4386	    case 's':
4387		type = XML_REGEXP_ANYSPACE;
4388		break;
4389	    case 'S':
4390		type = XML_REGEXP_NOTSPACE;
4391		break;
4392	    case 'i':
4393		type = XML_REGEXP_INITNAME;
4394		break;
4395	    case 'I':
4396		type = XML_REGEXP_NOTINITNAME;
4397		break;
4398	    case 'c':
4399		type = XML_REGEXP_NAMECHAR;
4400		break;
4401	    case 'C':
4402		type = XML_REGEXP_NOTNAMECHAR;
4403		break;
4404	    case 'd':
4405		type = XML_REGEXP_DECIMAL;
4406		break;
4407	    case 'D':
4408		type = XML_REGEXP_NOTDECIMAL;
4409		break;
4410	    case 'w':
4411		type = XML_REGEXP_REALCHAR;
4412		break;
4413	    case 'W':
4414		type = XML_REGEXP_NOTREALCHAR;
4415		break;
4416	}
4417	NEXT;
4418	if (ctxt->atom == NULL) {
4419	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4420	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4421	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4422			       type, 0, 0, NULL);
4423	}
4424    }
4425}
4426
4427/**
4428 * xmlFAParseCharRef:
4429 * @ctxt:  a regexp parser context
4430 *
4431 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
4432 */
4433static int
4434xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
4435    int ret = 0, cur;
4436
4437    if ((CUR != '&') || (NXT(1) != '#'))
4438	return(-1);
4439    NEXT;
4440    NEXT;
4441    cur = CUR;
4442    if (cur == 'x') {
4443	NEXT;
4444	cur = CUR;
4445	if (((cur >= '0') && (cur <= '9')) ||
4446	    ((cur >= 'a') && (cur <= 'f')) ||
4447	    ((cur >= 'A') && (cur <= 'F'))) {
4448	    while (((cur >= '0') && (cur <= '9')) ||
4449		   ((cur >= 'A') && (cur <= 'F'))) {
4450		if ((cur >= '0') && (cur <= '9'))
4451		    ret = ret * 16 + cur - '0';
4452		else if ((cur >= 'a') && (cur <= 'f'))
4453		    ret = ret * 16 + 10 + (cur - 'a');
4454		else
4455		    ret = ret * 16 + 10 + (cur - 'A');
4456		NEXT;
4457		cur = CUR;
4458	    }
4459	} else {
4460	    ERROR("Char ref: expecting [0-9A-F]");
4461	    return(-1);
4462	}
4463    } else {
4464	if ((cur >= '0') && (cur <= '9')) {
4465	    while ((cur >= '0') && (cur <= '9')) {
4466		ret = ret * 10 + cur - '0';
4467		NEXT;
4468		cur = CUR;
4469	    }
4470	} else {
4471	    ERROR("Char ref: expecting [0-9]");
4472	    return(-1);
4473	}
4474    }
4475    if (cur != ';') {
4476	ERROR("Char ref: expecting ';'");
4477	return(-1);
4478    } else {
4479	NEXT;
4480    }
4481    return(ret);
4482}
4483
4484/**
4485 * xmlFAParseCharRange:
4486 * @ctxt:  a regexp parser context
4487 *
4488 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
4489 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
4490 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
4491 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
4492 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
4493 */
4494static void
4495xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4496    int cur, len;
4497    int start = -1;
4498    int end = -1;
4499
4500    if ((CUR == '&') && (NXT(1) == '#')) {
4501	end = start = xmlFAParseCharRef(ctxt);
4502        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4503	                   XML_REGEXP_CHARVAL, start, end, NULL);
4504	return;
4505    }
4506    cur = CUR;
4507    if (cur == '\\') {
4508	NEXT;
4509	cur = CUR;
4510	switch (cur) {
4511	    case 'n': start = 0xA; break;
4512	    case 'r': start = 0xD; break;
4513	    case 't': start = 0x9; break;
4514	    case '\\': case '|': case '.': case '-': case '^': case '?':
4515	    case '*': case '+': case '{': case '}': case '(': case ')':
4516	    case '[': case ']':
4517		start = cur; break;
4518	    default:
4519		ERROR("Invalid escape value");
4520		return;
4521	}
4522	end = start;
4523        len = 1;
4524    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4525        end = start = CUR_SCHAR(ctxt->cur, len);
4526    } else {
4527	ERROR("Expecting a char range");
4528	return;
4529    }
4530    NEXTL(len);
4531    if (start == '-') {
4532	return;
4533    }
4534    cur = CUR;
4535    if ((cur != '-') || (NXT(1) == ']')) {
4536        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4537		              XML_REGEXP_CHARVAL, start, end, NULL);
4538	return;
4539    }
4540    NEXT;
4541    cur = CUR;
4542    if (cur == '\\') {
4543	NEXT;
4544	cur = CUR;
4545	switch (cur) {
4546	    case 'n': end = 0xA; break;
4547	    case 'r': end = 0xD; break;
4548	    case 't': end = 0x9; break;
4549	    case '\\': case '|': case '.': case '-': case '^': case '?':
4550	    case '*': case '+': case '{': case '}': case '(': case ')':
4551	    case '[': case ']':
4552		end = cur; break;
4553	    default:
4554		ERROR("Invalid escape value");
4555		return;
4556	}
4557        len = 1;
4558    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4559        end = CUR_SCHAR(ctxt->cur, len);
4560    } else {
4561	ERROR("Expecting the end of a char range");
4562	return;
4563    }
4564    NEXTL(len);
4565    /* TODO check that the values are acceptable character ranges for XML */
4566    if (end < start) {
4567	ERROR("End of range is before start of range");
4568    } else {
4569        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4570		           XML_REGEXP_CHARVAL, start, end, NULL);
4571    }
4572    return;
4573}
4574
4575/**
4576 * xmlFAParsePosCharGroup:
4577 * @ctxt:  a regexp parser context
4578 *
4579 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
4580 */
4581static void
4582xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
4583    do {
4584	if ((CUR == '\\') || (CUR == '.')) {
4585	    xmlFAParseCharClassEsc(ctxt);
4586	} else {
4587	    xmlFAParseCharRange(ctxt);
4588	}
4589    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
4590	     (ctxt->error == 0));
4591}
4592
4593/**
4594 * xmlFAParseCharGroup:
4595 * @ctxt:  a regexp parser context
4596 *
4597 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
4598 * [15]   negCharGroup ::= '^' posCharGroup
4599 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
4600 * [12]   charClassExpr ::= '[' charGroup ']'
4601 */
4602static void
4603xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
4604    int n = ctxt->neg;
4605    while ((CUR != ']') && (ctxt->error == 0)) {
4606	if (CUR == '^') {
4607	    int neg = ctxt->neg;
4608
4609	    NEXT;
4610	    ctxt->neg = !ctxt->neg;
4611	    xmlFAParsePosCharGroup(ctxt);
4612	    ctxt->neg = neg;
4613	} else if ((CUR == '-') && (NXT(1) == '[')) {
4614	    int neg = ctxt->neg;
4615	    ctxt->neg = 2;
4616	    NEXT;	/* eat the '-' */
4617	    NEXT;	/* eat the '[' */
4618	    xmlFAParseCharGroup(ctxt);
4619	    if (CUR == ']') {
4620		NEXT;
4621	    } else {
4622		ERROR("charClassExpr: ']' expected");
4623		break;
4624	    }
4625	    ctxt->neg = neg;
4626	    break;
4627	} else if (CUR != ']') {
4628	    xmlFAParsePosCharGroup(ctxt);
4629	}
4630    }
4631    ctxt->neg = n;
4632}
4633
4634/**
4635 * xmlFAParseCharClass:
4636 * @ctxt:  a regexp parser context
4637 *
4638 * [11]   charClass   ::=     charClassEsc | charClassExpr
4639 * [12]   charClassExpr   ::=   '[' charGroup ']'
4640 */
4641static void
4642xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
4643    if (CUR == '[') {
4644	NEXT;
4645	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
4646	if (ctxt->atom == NULL)
4647	    return;
4648	xmlFAParseCharGroup(ctxt);
4649	if (CUR == ']') {
4650	    NEXT;
4651	} else {
4652	    ERROR("xmlFAParseCharClass: ']' expected");
4653	}
4654    } else {
4655	xmlFAParseCharClassEsc(ctxt);
4656    }
4657}
4658
4659/**
4660 * xmlFAParseQuantExact:
4661 * @ctxt:  a regexp parser context
4662 *
4663 * [8]   QuantExact   ::=   [0-9]+
4664 *
4665 * Returns 0 if success or -1 in case of error
4666 */
4667static int
4668xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
4669    int ret = 0;
4670    int ok = 0;
4671
4672    while ((CUR >= '0') && (CUR <= '9')) {
4673	ret = ret * 10 + (CUR - '0');
4674	ok = 1;
4675	NEXT;
4676    }
4677    if (ok != 1) {
4678	return(-1);
4679    }
4680    return(ret);
4681}
4682
4683/**
4684 * xmlFAParseQuantifier:
4685 * @ctxt:  a regexp parser context
4686 *
4687 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
4688 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
4689 * [6]   quantRange   ::=   QuantExact ',' QuantExact
4690 * [7]   quantMin   ::=   QuantExact ','
4691 * [8]   QuantExact   ::=   [0-9]+
4692 */
4693static int
4694xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
4695    int cur;
4696
4697    cur = CUR;
4698    if ((cur == '?') || (cur == '*') || (cur == '+')) {
4699	if (ctxt->atom != NULL) {
4700	    if (cur == '?')
4701		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
4702	    else if (cur == '*')
4703		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
4704	    else if (cur == '+')
4705		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
4706	}
4707	NEXT;
4708	return(1);
4709    }
4710    if (cur == '{') {
4711	int min = 0, max = 0;
4712
4713	NEXT;
4714	cur = xmlFAParseQuantExact(ctxt);
4715	if (cur >= 0)
4716	    min = cur;
4717	if (CUR == ',') {
4718	    NEXT;
4719	    if (CUR == '}')
4720	        max = INT_MAX;
4721	    else {
4722	        cur = xmlFAParseQuantExact(ctxt);
4723	        if (cur >= 0)
4724		    max = cur;
4725		else {
4726		    ERROR("Improper quantifier");
4727		}
4728	    }
4729	}
4730	if (CUR == '}') {
4731	    NEXT;
4732	} else {
4733	    ERROR("Unterminated quantifier");
4734	}
4735	if (max == 0)
4736	    max = min;
4737	if (ctxt->atom != NULL) {
4738	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
4739	    ctxt->atom->min = min;
4740	    ctxt->atom->max = max;
4741	}
4742	return(1);
4743    }
4744    return(0);
4745}
4746
4747/**
4748 * xmlFAParseAtom:
4749 * @ctxt:  a regexp parser context
4750 *
4751 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
4752 */
4753static int
4754xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
4755    int codepoint, len;
4756
4757    codepoint = xmlFAIsChar(ctxt);
4758    if (codepoint > 0) {
4759	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4760	if (ctxt->atom == NULL)
4761	    return(-1);
4762	codepoint = CUR_SCHAR(ctxt->cur, len);
4763	ctxt->atom->codepoint = codepoint;
4764	NEXTL(len);
4765	return(1);
4766    } else if (CUR == '|') {
4767	return(0);
4768    } else if (CUR == 0) {
4769	return(0);
4770    } else if (CUR == ')') {
4771	return(0);
4772    } else if (CUR == '(') {
4773	xmlRegStatePtr start, oldend;
4774
4775	NEXT;
4776	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
4777	start = ctxt->state;
4778	oldend = ctxt->end;
4779	ctxt->end = NULL;
4780	ctxt->atom = NULL;
4781	xmlFAParseRegExp(ctxt, 0);
4782	if (CUR == ')') {
4783	    NEXT;
4784	} else {
4785	    ERROR("xmlFAParseAtom: expecting ')'");
4786	}
4787	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
4788	if (ctxt->atom == NULL)
4789	    return(-1);
4790	ctxt->atom->start = start;
4791	ctxt->atom->stop = ctxt->state;
4792	ctxt->end = oldend;
4793	return(1);
4794    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
4795	xmlFAParseCharClass(ctxt);
4796	return(1);
4797    }
4798    return(0);
4799}
4800
4801/**
4802 * xmlFAParsePiece:
4803 * @ctxt:  a regexp parser context
4804 *
4805 * [3]   piece   ::=   atom quantifier?
4806 */
4807static int
4808xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
4809    int ret;
4810
4811    ctxt->atom = NULL;
4812    ret = xmlFAParseAtom(ctxt);
4813    if (ret == 0)
4814	return(0);
4815    if (ctxt->atom == NULL) {
4816	ERROR("internal: no atom generated");
4817    }
4818    xmlFAParseQuantifier(ctxt);
4819    return(1);
4820}
4821
4822/**
4823 * xmlFAParseBranch:
4824 * @ctxt:  a regexp parser context
4825 *
4826 * [2]   branch   ::=   piece*
4827 8
4828 */
4829static int
4830xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) {
4831    xmlRegStatePtr previous;
4832    int ret;
4833
4834    previous = ctxt->state;
4835    ret = xmlFAParsePiece(ctxt);
4836    if (ret != 0) {
4837	if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0)
4838	    return(-1);
4839	previous = ctxt->state;
4840	ctxt->atom = NULL;
4841    }
4842    while ((ret != 0) && (ctxt->error == 0)) {
4843	ret = xmlFAParsePiece(ctxt);
4844	if (ret != 0) {
4845	    if (xmlFAGenerateTransitions(ctxt, previous, NULL,
4846					 ctxt->atom) < 0)
4847		    return(-1);
4848	    previous = ctxt->state;
4849	    ctxt->atom = NULL;
4850	}
4851    }
4852    return(0);
4853}
4854
4855/**
4856 * xmlFAParseRegExp:
4857 * @ctxt:  a regexp parser context
4858 * @top:  is this the top-level expression ?
4859 *
4860 * [1]   regExp   ::=     branch  ( '|' branch )*
4861 */
4862static void
4863xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
4864    xmlRegStatePtr start, end;
4865
4866    /* if not top start should have been generated by an epsilon trans */
4867    start = ctxt->state;
4868    ctxt->end = NULL;
4869    xmlFAParseBranch(ctxt);
4870    if (top) {
4871#ifdef DEBUG_REGEXP_GRAPH
4872	printf("State %d is final\n", ctxt->state->no);
4873#endif
4874	ctxt->state->type = XML_REGEXP_FINAL_STATE;
4875    }
4876    if (CUR != '|') {
4877	ctxt->end = ctxt->state;
4878	return;
4879    }
4880    end = ctxt->state;
4881    while ((CUR == '|') && (ctxt->error == 0)) {
4882	NEXT;
4883	ctxt->state = start;
4884	ctxt->end = NULL;
4885	xmlFAParseBranch(ctxt);
4886	if (top) {
4887	    ctxt->state->type = XML_REGEXP_FINAL_STATE;
4888#ifdef DEBUG_REGEXP_GRAPH
4889	    printf("State %d is final\n", ctxt->state->no);
4890#endif
4891	} else {
4892	    xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end);
4893	}
4894    }
4895    if (!top) {
4896	ctxt->state = end;
4897	ctxt->end = end;
4898    }
4899}
4900
4901/************************************************************************
4902 * 									*
4903 * 			The basic API					*
4904 * 									*
4905 ************************************************************************/
4906
4907/**
4908 * xmlRegexpPrint:
4909 * @output: the file for the output debug
4910 * @regexp: the compiled regexp
4911 *
4912 * Print the content of the compiled regular expression
4913 */
4914void
4915xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
4916    int i;
4917
4918    if (output == NULL)
4919        return;
4920    fprintf(output, " regexp: ");
4921    if (regexp == NULL) {
4922	fprintf(output, "NULL\n");
4923	return;
4924    }
4925    fprintf(output, "'%s' ", regexp->string);
4926    fprintf(output, "\n");
4927    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
4928    for (i = 0;i < regexp->nbAtoms; i++) {
4929	fprintf(output, " %02d ", i);
4930	xmlRegPrintAtom(output, regexp->atoms[i]);
4931    }
4932    fprintf(output, "%d states:", regexp->nbStates);
4933    fprintf(output, "\n");
4934    for (i = 0;i < regexp->nbStates; i++) {
4935	xmlRegPrintState(output, regexp->states[i]);
4936    }
4937    fprintf(output, "%d counters:\n", regexp->nbCounters);
4938    for (i = 0;i < regexp->nbCounters; i++) {
4939	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
4940		                                regexp->counters[i].max);
4941    }
4942}
4943
4944/**
4945 * xmlRegexpCompile:
4946 * @regexp:  a regular expression string
4947 *
4948 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
4949 * Appendix F and builds an automata suitable for testing strings against
4950 * that regular expression
4951 *
4952 * Returns the compiled expression or NULL in case of error
4953 */
4954xmlRegexpPtr
4955xmlRegexpCompile(const xmlChar *regexp) {
4956    xmlRegexpPtr ret;
4957    xmlRegParserCtxtPtr ctxt;
4958
4959    ctxt = xmlRegNewParserCtxt(regexp);
4960    if (ctxt == NULL)
4961	return(NULL);
4962
4963    /* initialize the parser */
4964    ctxt->end = NULL;
4965    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
4966    xmlRegStatePush(ctxt, ctxt->start);
4967
4968    /* parse the expression building an automata */
4969    xmlFAParseRegExp(ctxt, 1);
4970    if (CUR != 0) {
4971	ERROR("xmlFAParseRegExp: extra characters");
4972    }
4973    ctxt->end = ctxt->state;
4974    ctxt->start->type = XML_REGEXP_START_STATE;
4975    ctxt->end->type = XML_REGEXP_FINAL_STATE;
4976
4977    /* remove the Epsilon except for counted transitions */
4978    xmlFAEliminateEpsilonTransitions(ctxt);
4979
4980
4981    if (ctxt->error != 0) {
4982	xmlRegFreeParserCtxt(ctxt);
4983	return(NULL);
4984    }
4985    ret = xmlRegEpxFromParse(ctxt);
4986    xmlRegFreeParserCtxt(ctxt);
4987    return(ret);
4988}
4989
4990/**
4991 * xmlRegexpExec:
4992 * @comp:  the compiled regular expression
4993 * @content:  the value to check against the regular expression
4994 *
4995 * Check if the regular expression generates the value
4996 *
4997 * Returns 1 if it matches, 0 if not and a negative value in case of error
4998 */
4999int
5000xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
5001    if ((comp == NULL) || (content == NULL))
5002	return(-1);
5003    return(xmlFARegExec(comp, content));
5004}
5005
5006/**
5007 * xmlRegexpIsDeterminist:
5008 * @comp:  the compiled regular expression
5009 *
5010 * Check if the regular expression is determinist
5011 *
5012 * Returns 1 if it yes, 0 if not and a negative value in case of error
5013 */
5014int
5015xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
5016    xmlAutomataPtr am;
5017    int ret;
5018
5019    if (comp == NULL)
5020	return(-1);
5021    if (comp->determinist != -1)
5022	return(comp->determinist);
5023
5024    am = xmlNewAutomata();
5025    if (am->states != NULL) {
5026	int i;
5027
5028	for (i = 0;i < am->nbStates;i++)
5029	    xmlRegFreeState(am->states[i]);
5030	xmlFree(am->states);
5031    }
5032    am->nbAtoms = comp->nbAtoms;
5033    am->atoms = comp->atoms;
5034    am->nbStates = comp->nbStates;
5035    am->states = comp->states;
5036    am->determinist = -1;
5037    ret = xmlFAComputesDeterminism(am);
5038    am->atoms = NULL;
5039    am->states = NULL;
5040    xmlFreeAutomata(am);
5041    return(ret);
5042}
5043
5044/**
5045 * xmlRegFreeRegexp:
5046 * @regexp:  the regexp
5047 *
5048 * Free a regexp
5049 */
5050void
5051xmlRegFreeRegexp(xmlRegexpPtr regexp) {
5052    int i;
5053    if (regexp == NULL)
5054	return;
5055
5056    if (regexp->string != NULL)
5057	xmlFree(regexp->string);
5058    if (regexp->states != NULL) {
5059	for (i = 0;i < regexp->nbStates;i++)
5060	    xmlRegFreeState(regexp->states[i]);
5061	xmlFree(regexp->states);
5062    }
5063    if (regexp->atoms != NULL) {
5064	for (i = 0;i < regexp->nbAtoms;i++)
5065	    xmlRegFreeAtom(regexp->atoms[i]);
5066	xmlFree(regexp->atoms);
5067    }
5068    if (regexp->counters != NULL)
5069	xmlFree(regexp->counters);
5070    if (regexp->compact != NULL)
5071	xmlFree(regexp->compact);
5072    if (regexp->transdata != NULL)
5073	xmlFree(regexp->transdata);
5074    if (regexp->stringMap != NULL) {
5075	for (i = 0; i < regexp->nbstrings;i++)
5076	    xmlFree(regexp->stringMap[i]);
5077	xmlFree(regexp->stringMap);
5078    }
5079
5080    xmlFree(regexp);
5081}
5082
5083#ifdef LIBXML_AUTOMATA_ENABLED
5084/************************************************************************
5085 * 									*
5086 * 			The Automata interface				*
5087 * 									*
5088 ************************************************************************/
5089
5090/**
5091 * xmlNewAutomata:
5092 *
5093 * Create a new automata
5094 *
5095 * Returns the new object or NULL in case of failure
5096 */
5097xmlAutomataPtr
5098xmlNewAutomata(void) {
5099    xmlAutomataPtr ctxt;
5100
5101    ctxt = xmlRegNewParserCtxt(NULL);
5102    if (ctxt == NULL)
5103	return(NULL);
5104
5105    /* initialize the parser */
5106    ctxt->end = NULL;
5107    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
5108    ctxt->start->type = XML_REGEXP_START_STATE;
5109    if (ctxt->start == NULL) {
5110	xmlFreeAutomata(ctxt);
5111	return(NULL);
5112    }
5113    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
5114        xmlRegFreeState(ctxt->start);
5115	xmlFreeAutomata(ctxt);
5116	return(NULL);
5117    }
5118
5119    return(ctxt);
5120}
5121
5122/**
5123 * xmlFreeAutomata:
5124 * @am: an automata
5125 *
5126 * Free an automata
5127 */
5128void
5129xmlFreeAutomata(xmlAutomataPtr am) {
5130    if (am == NULL)
5131	return;
5132    xmlRegFreeParserCtxt(am);
5133}
5134
5135/**
5136 * xmlAutomataGetInitState:
5137 * @am: an automata
5138 *
5139 * Initial state lookup
5140 *
5141 * Returns the initial state of the automata
5142 */
5143xmlAutomataStatePtr
5144xmlAutomataGetInitState(xmlAutomataPtr am) {
5145    if (am == NULL)
5146	return(NULL);
5147    return(am->start);
5148}
5149
5150/**
5151 * xmlAutomataSetFinalState:
5152 * @am: an automata
5153 * @state: a state in this automata
5154 *
5155 * Makes that state a final state
5156 *
5157 * Returns 0 or -1 in case of error
5158 */
5159int
5160xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
5161    if ((am == NULL) || (state == NULL))
5162	return(-1);
5163    state->type = XML_REGEXP_FINAL_STATE;
5164    return(0);
5165}
5166
5167/**
5168 * xmlAutomataNewTransition:
5169 * @am: an automata
5170 * @from: the starting point of the transition
5171 * @to: the target point of the transition or NULL
5172 * @token: the input string associated to that transition
5173 * @data: data passed to the callback function if the transition is activated
5174 *
5175 * If @to is NULL, this creates first a new target state in the automata
5176 * and then adds a transition from the @from state to the target state
5177 * activated by the value of @token
5178 *
5179 * Returns the target state or NULL in case of error
5180 */
5181xmlAutomataStatePtr
5182xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
5183			 xmlAutomataStatePtr to, const xmlChar *token,
5184			 void *data) {
5185    xmlRegAtomPtr atom;
5186
5187    if ((am == NULL) || (from == NULL) || (token == NULL))
5188	return(NULL);
5189    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5190    if (atom == NULL)
5191        return(NULL);
5192    atom->data = data;
5193    if (atom == NULL)
5194	return(NULL);
5195    atom->valuep = xmlStrdup(token);
5196
5197    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5198        xmlRegFreeAtom(atom);
5199	return(NULL);
5200    }
5201    if (to == NULL)
5202	return(am->state);
5203    return(to);
5204}
5205
5206/**
5207 * xmlAutomataNewTransition2:
5208 * @am: an automata
5209 * @from: the starting point of the transition
5210 * @to: the target point of the transition or NULL
5211 * @token: the first input string associated to that transition
5212 * @token2: the second input string associated to that transition
5213 * @data: data passed to the callback function if the transition is activated
5214 *
5215 * If @to is NULL, this creates first a new target state in the automata
5216 * and then adds a transition from the @from state to the target state
5217 * activated by the value of @token
5218 *
5219 * Returns the target state or NULL in case of error
5220 */
5221xmlAutomataStatePtr
5222xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5223			  xmlAutomataStatePtr to, const xmlChar *token,
5224			  const xmlChar *token2, void *data) {
5225    xmlRegAtomPtr atom;
5226
5227    if ((am == NULL) || (from == NULL) || (token == NULL))
5228	return(NULL);
5229    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5230    atom->data = data;
5231    if (atom == NULL)
5232	return(NULL);
5233    if ((token2 == NULL) || (*token2 == 0)) {
5234	atom->valuep = xmlStrdup(token);
5235    } else {
5236	int lenn, lenp;
5237	xmlChar *str;
5238
5239	lenn = strlen((char *) token2);
5240	lenp = strlen((char *) token);
5241
5242	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5243	if (str == NULL) {
5244	    xmlRegFreeAtom(atom);
5245	    return(NULL);
5246	}
5247	memcpy(&str[0], token, lenp);
5248	str[lenp] = '|';
5249	memcpy(&str[lenp + 1], token2, lenn);
5250	str[lenn + lenp + 1] = 0;
5251
5252	atom->valuep = str;
5253    }
5254
5255    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5256        xmlRegFreeAtom(atom);
5257	return(NULL);
5258    }
5259    if (to == NULL)
5260	return(am->state);
5261    return(to);
5262}
5263
5264/**
5265 * xmlAutomataNewNegTrans:
5266 * @am: an automata
5267 * @from: the starting point of the transition
5268 * @to: the target point of the transition or NULL
5269 * @token: the first input string associated to that transition
5270 * @token2: the second input string associated to that transition
5271 * @data: data passed to the callback function if the transition is activated
5272 *
5273 * If @to is NULL, this creates first a new target state in the automata
5274 * and then adds a transition from the @from state to the target state
5275 * activated by any value except (@token,@token2)
5276 * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
5277 # the semantic of XSD ##other
5278 *
5279 * Returns the target state or NULL in case of error
5280 */
5281xmlAutomataStatePtr
5282xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5283		       xmlAutomataStatePtr to, const xmlChar *token,
5284		       const xmlChar *token2, void *data) {
5285    xmlRegAtomPtr atom;
5286    xmlChar err_msg[200];
5287
5288    if ((am == NULL) || (from == NULL) || (token == NULL))
5289	return(NULL);
5290    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5291    if (atom == NULL)
5292	return(NULL);
5293    atom->data = data;
5294    atom->neg = 1;
5295    if ((token2 == NULL) || (*token2 == 0)) {
5296	atom->valuep = xmlStrdup(token);
5297    } else {
5298	int lenn, lenp;
5299	xmlChar *str;
5300
5301	lenn = strlen((char *) token2);
5302	lenp = strlen((char *) token);
5303
5304	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5305	if (str == NULL) {
5306	    xmlRegFreeAtom(atom);
5307	    return(NULL);
5308	}
5309	memcpy(&str[0], token, lenp);
5310	str[lenp] = '|';
5311	memcpy(&str[lenp + 1], token2, lenn);
5312	str[lenn + lenp + 1] = 0;
5313
5314	atom->valuep = str;
5315    }
5316    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
5317    err_msg[199] = 0;
5318    atom->valuep2 = xmlStrdup(err_msg);
5319
5320    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5321        xmlRegFreeAtom(atom);
5322	return(NULL);
5323    }
5324    am->negs++;
5325    if (to == NULL)
5326	return(am->state);
5327    return(to);
5328}
5329
5330/**
5331 * xmlAutomataNewCountTrans2:
5332 * @am: an automata
5333 * @from: the starting point of the transition
5334 * @to: the target point of the transition or NULL
5335 * @token: the input string associated to that transition
5336 * @token2: the second input string associated to that transition
5337 * @min:  the minimum successive occurences of token
5338 * @max:  the maximum successive occurences of token
5339 * @data:  data associated to the transition
5340 *
5341 * If @to is NULL, this creates first a new target state in the automata
5342 * and then adds a transition from the @from state to the target state
5343 * activated by a succession of input of value @token and @token2 and
5344 * whose number is between @min and @max
5345 *
5346 * Returns the target state or NULL in case of error
5347 */
5348xmlAutomataStatePtr
5349xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5350			 xmlAutomataStatePtr to, const xmlChar *token,
5351			 const xmlChar *token2,
5352			 int min, int max, void *data) {
5353    xmlRegAtomPtr atom;
5354    int counter;
5355
5356    if ((am == NULL) || (from == NULL) || (token == NULL))
5357	return(NULL);
5358    if (min < 0)
5359	return(NULL);
5360    if ((max < min) || (max < 1))
5361	return(NULL);
5362    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5363    if (atom == NULL)
5364	return(NULL);
5365    if ((token2 == NULL) || (*token2 == 0)) {
5366	atom->valuep = xmlStrdup(token);
5367    } else {
5368	int lenn, lenp;
5369	xmlChar *str;
5370
5371	lenn = strlen((char *) token2);
5372	lenp = strlen((char *) token);
5373
5374	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5375	if (str == NULL) {
5376	    xmlRegFreeAtom(atom);
5377	    return(NULL);
5378	}
5379	memcpy(&str[0], token, lenp);
5380	str[lenp] = '|';
5381	memcpy(&str[lenp + 1], token2, lenn);
5382	str[lenn + lenp + 1] = 0;
5383
5384	atom->valuep = str;
5385    }
5386    atom->data = data;
5387    if (min == 0)
5388	atom->min = 1;
5389    else
5390	atom->min = min;
5391    atom->max = max;
5392
5393    /*
5394     * associate a counter to the transition.
5395     */
5396    counter = xmlRegGetCounter(am);
5397    am->counters[counter].min = min;
5398    am->counters[counter].max = max;
5399
5400    /* xmlFAGenerateTransitions(am, from, to, atom); */
5401    if (to == NULL) {
5402        to = xmlRegNewState(am);
5403	xmlRegStatePush(am, to);
5404    }
5405    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5406    xmlRegAtomPush(am, atom);
5407    am->state = to;
5408
5409    if (to == NULL)
5410	to = am->state;
5411    if (to == NULL)
5412	return(NULL);
5413    if (min == 0)
5414	xmlFAGenerateEpsilonTransition(am, from, to);
5415    return(to);
5416}
5417
5418/**
5419 * xmlAutomataNewCountTrans:
5420 * @am: an automata
5421 * @from: the starting point of the transition
5422 * @to: the target point of the transition or NULL
5423 * @token: the input string associated to that transition
5424 * @min:  the minimum successive occurences of token
5425 * @max:  the maximum successive occurences of token
5426 * @data:  data associated to the transition
5427 *
5428 * If @to is NULL, this creates first a new target state in the automata
5429 * and then adds a transition from the @from state to the target state
5430 * activated by a succession of input of value @token and whose number
5431 * is between @min and @max
5432 *
5433 * Returns the target state or NULL in case of error
5434 */
5435xmlAutomataStatePtr
5436xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5437			 xmlAutomataStatePtr to, const xmlChar *token,
5438			 int min, int max, void *data) {
5439    xmlRegAtomPtr atom;
5440    int counter;
5441
5442    if ((am == NULL) || (from == NULL) || (token == NULL))
5443	return(NULL);
5444    if (min < 0)
5445	return(NULL);
5446    if ((max < min) || (max < 1))
5447	return(NULL);
5448    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5449    if (atom == NULL)
5450	return(NULL);
5451    atom->valuep = xmlStrdup(token);
5452    atom->data = data;
5453    if (min == 0)
5454	atom->min = 1;
5455    else
5456	atom->min = min;
5457    atom->max = max;
5458
5459    /*
5460     * associate a counter to the transition.
5461     */
5462    counter = xmlRegGetCounter(am);
5463    am->counters[counter].min = min;
5464    am->counters[counter].max = max;
5465
5466    /* xmlFAGenerateTransitions(am, from, to, atom); */
5467    if (to == NULL) {
5468        to = xmlRegNewState(am);
5469	xmlRegStatePush(am, to);
5470    }
5471    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5472    xmlRegAtomPush(am, atom);
5473    am->state = to;
5474
5475    if (to == NULL)
5476	to = am->state;
5477    if (to == NULL)
5478	return(NULL);
5479    if (min == 0)
5480	xmlFAGenerateEpsilonTransition(am, from, to);
5481    return(to);
5482}
5483
5484/**
5485 * xmlAutomataNewOnceTrans2:
5486 * @am: an automata
5487 * @from: the starting point of the transition
5488 * @to: the target point of the transition or NULL
5489 * @token: the input string associated to that transition
5490 * @token2: the second input string associated to that transition
5491 * @min:  the minimum successive occurences of token
5492 * @max:  the maximum successive occurences of token
5493 * @data:  data associated to the transition
5494 *
5495 * If @to is NULL, this creates first a new target state in the automata
5496 * and then adds a transition from the @from state to the target state
5497 * activated by a succession of input of value @token and @token2 and whose
5498 * number is between @min and @max, moreover that transition can only be
5499 * crossed once.
5500 *
5501 * Returns the target state or NULL in case of error
5502 */
5503xmlAutomataStatePtr
5504xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5505			 xmlAutomataStatePtr to, const xmlChar *token,
5506			 const xmlChar *token2,
5507			 int min, int max, void *data) {
5508    xmlRegAtomPtr atom;
5509    int counter;
5510
5511    if ((am == NULL) || (from == NULL) || (token == NULL))
5512	return(NULL);
5513    if (min < 1)
5514	return(NULL);
5515    if ((max < min) || (max < 1))
5516	return(NULL);
5517    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5518    if (atom == NULL)
5519	return(NULL);
5520    if ((token2 == NULL) || (*token2 == 0)) {
5521	atom->valuep = xmlStrdup(token);
5522    } else {
5523	int lenn, lenp;
5524	xmlChar *str;
5525
5526	lenn = strlen((char *) token2);
5527	lenp = strlen((char *) token);
5528
5529	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5530	if (str == NULL) {
5531	    xmlRegFreeAtom(atom);
5532	    return(NULL);
5533	}
5534	memcpy(&str[0], token, lenp);
5535	str[lenp] = '|';
5536	memcpy(&str[lenp + 1], token2, lenn);
5537	str[lenn + lenp + 1] = 0;
5538
5539	atom->valuep = str;
5540    }
5541    atom->data = data;
5542    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5543    if (min == 0)
5544	atom->min = 1;
5545    else
5546	atom->min = min;
5547    atom->max = max;
5548    /*
5549     * associate a counter to the transition.
5550     */
5551    counter = xmlRegGetCounter(am);
5552    am->counters[counter].min = 1;
5553    am->counters[counter].max = 1;
5554
5555    /* xmlFAGenerateTransitions(am, from, to, atom); */
5556    if (to == NULL) {
5557	to = xmlRegNewState(am);
5558	xmlRegStatePush(am, to);
5559    }
5560    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5561    xmlRegAtomPush(am, atom);
5562    am->state = to;
5563    return(to);
5564}
5565
5566
5567
5568/**
5569 * xmlAutomataNewOnceTrans:
5570 * @am: an automata
5571 * @from: the starting point of the transition
5572 * @to: the target point of the transition or NULL
5573 * @token: the input string associated to that transition
5574 * @min:  the minimum successive occurences of token
5575 * @max:  the maximum successive occurences of token
5576 * @data:  data associated to the transition
5577 *
5578 * If @to is NULL, this creates first a new target state in the automata
5579 * and then adds a transition from the @from state to the target state
5580 * activated by a succession of input of value @token and whose number
5581 * is between @min and @max, moreover that transition can only be crossed
5582 * once.
5583 *
5584 * Returns the target state or NULL in case of error
5585 */
5586xmlAutomataStatePtr
5587xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5588			 xmlAutomataStatePtr to, const xmlChar *token,
5589			 int min, int max, void *data) {
5590    xmlRegAtomPtr atom;
5591    int counter;
5592
5593    if ((am == NULL) || (from == NULL) || (token == NULL))
5594	return(NULL);
5595    if (min < 1)
5596	return(NULL);
5597    if ((max < min) || (max < 1))
5598	return(NULL);
5599    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5600    if (atom == NULL)
5601	return(NULL);
5602    atom->valuep = xmlStrdup(token);
5603    atom->data = data;
5604    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5605    if (min == 0)
5606	atom->min = 1;
5607    else
5608	atom->min = min;
5609    atom->max = max;
5610    /*
5611     * associate a counter to the transition.
5612     */
5613    counter = xmlRegGetCounter(am);
5614    am->counters[counter].min = 1;
5615    am->counters[counter].max = 1;
5616
5617    /* xmlFAGenerateTransitions(am, from, to, atom); */
5618    if (to == NULL) {
5619	to = xmlRegNewState(am);
5620	xmlRegStatePush(am, to);
5621    }
5622    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
5623    xmlRegAtomPush(am, atom);
5624    am->state = to;
5625    return(to);
5626}
5627
5628/**
5629 * xmlAutomataNewState:
5630 * @am: an automata
5631 *
5632 * Create a new disconnected state in the automata
5633 *
5634 * Returns the new state or NULL in case of error
5635 */
5636xmlAutomataStatePtr
5637xmlAutomataNewState(xmlAutomataPtr am) {
5638    xmlAutomataStatePtr to;
5639
5640    if (am == NULL)
5641	return(NULL);
5642    to = xmlRegNewState(am);
5643    xmlRegStatePush(am, to);
5644    return(to);
5645}
5646
5647/**
5648 * xmlAutomataNewEpsilon:
5649 * @am: an automata
5650 * @from: the starting point of the transition
5651 * @to: the target point of the transition or NULL
5652 *
5653 * If @to is NULL, this creates first a new target state in the automata
5654 * and then adds an epsilon transition from the @from state to the
5655 * target state
5656 *
5657 * Returns the target state or NULL in case of error
5658 */
5659xmlAutomataStatePtr
5660xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
5661		      xmlAutomataStatePtr to) {
5662    if ((am == NULL) || (from == NULL))
5663	return(NULL);
5664    xmlFAGenerateEpsilonTransition(am, from, to);
5665    if (to == NULL)
5666	return(am->state);
5667    return(to);
5668}
5669
5670/**
5671 * xmlAutomataNewAllTrans:
5672 * @am: an automata
5673 * @from: the starting point of the transition
5674 * @to: the target point of the transition or NULL
5675 * @lax: allow to transition if not all all transitions have been activated
5676 *
5677 * If @to is NULL, this creates first a new target state in the automata
5678 * and then adds a an ALL transition from the @from state to the
5679 * target state. That transition is an epsilon transition allowed only when
5680 * all transitions from the @from node have been activated.
5681 *
5682 * Returns the target state or NULL in case of error
5683 */
5684xmlAutomataStatePtr
5685xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5686		       xmlAutomataStatePtr to, int lax) {
5687    if ((am == NULL) || (from == NULL))
5688	return(NULL);
5689    xmlFAGenerateAllTransition(am, from, to, lax);
5690    if (to == NULL)
5691	return(am->state);
5692    return(to);
5693}
5694
5695/**
5696 * xmlAutomataNewCounter:
5697 * @am: an automata
5698 * @min:  the minimal value on the counter
5699 * @max:  the maximal value on the counter
5700 *
5701 * Create a new counter
5702 *
5703 * Returns the counter number or -1 in case of error
5704 */
5705int
5706xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
5707    int ret;
5708
5709    if (am == NULL)
5710	return(-1);
5711
5712    ret = xmlRegGetCounter(am);
5713    if (ret < 0)
5714	return(-1);
5715    am->counters[ret].min = min;
5716    am->counters[ret].max = max;
5717    return(ret);
5718}
5719
5720/**
5721 * xmlAutomataNewCountedTrans:
5722 * @am: an automata
5723 * @from: the starting point of the transition
5724 * @to: the target point of the transition or NULL
5725 * @counter: the counter associated to that transition
5726 *
5727 * If @to is NULL, this creates first a new target state in the automata
5728 * and then adds an epsilon transition from the @from state to the target state
5729 * which will increment the counter provided
5730 *
5731 * Returns the target state or NULL in case of error
5732 */
5733xmlAutomataStatePtr
5734xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5735		xmlAutomataStatePtr to, int counter) {
5736    if ((am == NULL) || (from == NULL) || (counter < 0))
5737	return(NULL);
5738    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
5739    if (to == NULL)
5740	return(am->state);
5741    return(to);
5742}
5743
5744/**
5745 * xmlAutomataNewCounterTrans:
5746 * @am: an automata
5747 * @from: the starting point of the transition
5748 * @to: the target point of the transition or NULL
5749 * @counter: the counter associated to that transition
5750 *
5751 * If @to is NULL, this creates first a new target state in the automata
5752 * and then adds an epsilon transition from the @from state to the target state
5753 * which will be allowed only if the counter is within the right range.
5754 *
5755 * Returns the target state or NULL in case of error
5756 */
5757xmlAutomataStatePtr
5758xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5759		xmlAutomataStatePtr to, int counter) {
5760    if ((am == NULL) || (from == NULL) || (counter < 0))
5761	return(NULL);
5762    xmlFAGenerateCountedTransition(am, from, to, counter);
5763    if (to == NULL)
5764	return(am->state);
5765    return(to);
5766}
5767
5768/**
5769 * xmlAutomataCompile:
5770 * @am: an automata
5771 *
5772 * Compile the automata into a Reg Exp ready for being executed.
5773 * The automata should be free after this point.
5774 *
5775 * Returns the compiled regexp or NULL in case of error
5776 */
5777xmlRegexpPtr
5778xmlAutomataCompile(xmlAutomataPtr am) {
5779    xmlRegexpPtr ret;
5780
5781    if ((am == NULL) || (am->error != 0)) return(NULL);
5782    xmlFAEliminateEpsilonTransitions(am);
5783    /* xmlFAComputesDeterminism(am); */
5784    ret = xmlRegEpxFromParse(am);
5785
5786    return(ret);
5787}
5788
5789/**
5790 * xmlAutomataIsDeterminist:
5791 * @am: an automata
5792 *
5793 * Checks if an automata is determinist.
5794 *
5795 * Returns 1 if true, 0 if not, and -1 in case of error
5796 */
5797int
5798xmlAutomataIsDeterminist(xmlAutomataPtr am) {
5799    int ret;
5800
5801    if (am == NULL)
5802	return(-1);
5803
5804    ret = xmlFAComputesDeterminism(am);
5805    return(ret);
5806}
5807#endif /* LIBXML_AUTOMATA_ENABLED */
5808
5809#ifdef LIBXML_EXPR_ENABLED
5810/************************************************************************
5811 *									*
5812 *		Formal Expression handling code				*
5813 *									*
5814 ************************************************************************/
5815/************************************************************************
5816 *									*
5817 *		Expression handling context				*
5818 *									*
5819 ************************************************************************/
5820
5821struct _xmlExpCtxt {
5822    xmlDictPtr dict;
5823    xmlExpNodePtr *table;
5824    int size;
5825    int nbElems;
5826    int nb_nodes;
5827    const char *expr;
5828    const char *cur;
5829    int nb_cons;
5830    int tabSize;
5831};
5832
5833/**
5834 * xmlExpNewCtxt:
5835 * @maxNodes:  the maximum number of nodes
5836 * @dict:  optional dictionnary to use internally
5837 *
5838 * Creates a new context for manipulating expressions
5839 *
5840 * Returns the context or NULL in case of error
5841 */
5842xmlExpCtxtPtr
5843xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
5844    xmlExpCtxtPtr ret;
5845    int size = 256;
5846
5847    if (maxNodes <= 4096)
5848        maxNodes = 4096;
5849
5850    ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
5851    if (ret == NULL)
5852        return(NULL);
5853    memset(ret, 0, sizeof(xmlExpCtxt));
5854    ret->size = size;
5855    ret->nbElems = 0;
5856    ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
5857    if (ret->table == NULL) {
5858        xmlFree(ret);
5859	return(NULL);
5860    }
5861    memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
5862    if (dict == NULL) {
5863        ret->dict = xmlDictCreate();
5864	if (ret->dict == NULL) {
5865	    xmlFree(ret->table);
5866	    xmlFree(ret);
5867	    return(NULL);
5868	}
5869    } else {
5870        ret->dict = dict;
5871	xmlDictReference(ret->dict);
5872    }
5873    return(ret);
5874}
5875
5876/**
5877 * xmlExpFreeCtxt:
5878 * @ctxt:  an expression context
5879 *
5880 * Free an expression context
5881 */
5882void
5883xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
5884    if (ctxt == NULL)
5885        return;
5886    xmlDictFree(ctxt->dict);
5887    if (ctxt->table != NULL)
5888	xmlFree(ctxt->table);
5889    xmlFree(ctxt);
5890}
5891
5892/************************************************************************
5893 *									*
5894 *		Structure associated to an expression node		*
5895 *									*
5896 ************************************************************************/
5897#define MAX_NODES 10000
5898
5899/* #define DEBUG_DERIV */
5900
5901/*
5902 * TODO:
5903 * - Wildcards
5904 * - public API for creation
5905 *
5906 * Started
5907 * - regression testing
5908 *
5909 * Done
5910 * - split into module and test tool
5911 * - memleaks
5912 */
5913
5914typedef enum {
5915    XML_EXP_NILABLE = (1 << 0)
5916} xmlExpNodeInfo;
5917
5918#define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
5919
5920struct _xmlExpNode {
5921    unsigned char type;/* xmlExpNodeType */
5922    unsigned char info;/* OR of xmlExpNodeInfo */
5923    unsigned short key;	/* the hash key */
5924    unsigned int ref;	/* The number of references */
5925    int c_max;		/* the maximum length it can consume */
5926    xmlExpNodePtr exp_left;
5927    xmlExpNodePtr next;/* the next node in the hash table or free list */
5928    union {
5929	struct {
5930	    int f_min;
5931	    int f_max;
5932	} count;
5933	struct {
5934	    xmlExpNodePtr f_right;
5935	} children;
5936        const xmlChar *f_str;
5937    } field;
5938};
5939
5940#define exp_min field.count.f_min
5941#define exp_max field.count.f_max
5942/* #define exp_left field.children.f_left */
5943#define exp_right field.children.f_right
5944#define exp_str field.f_str
5945
5946static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
5947static xmlExpNode forbiddenExpNode = {
5948    XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
5949};
5950xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
5951static xmlExpNode emptyExpNode = {
5952    XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
5953};
5954xmlExpNodePtr emptyExp = &emptyExpNode;
5955
5956/************************************************************************
5957 *									*
5958 *  The custom hash table for unicity and canonicalization		*
5959 *  of sub-expressions pointers						*
5960 *									*
5961 ************************************************************************/
5962/*
5963 * xmlExpHashNameComputeKey:
5964 * Calculate the hash key for a token
5965 */
5966static unsigned short
5967xmlExpHashNameComputeKey(const xmlChar *name) {
5968    unsigned short value = 0L;
5969    char ch;
5970
5971    if (name != NULL) {
5972	value += 30 * (*name);
5973	while ((ch = *name++) != 0) {
5974	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
5975	}
5976    }
5977    return (value);
5978}
5979
5980/*
5981 * xmlExpHashComputeKey:
5982 * Calculate the hash key for a compound expression
5983 */
5984static unsigned short
5985xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
5986                     xmlExpNodePtr right) {
5987    unsigned long value;
5988    unsigned short ret;
5989
5990    switch (type) {
5991        case XML_EXP_SEQ:
5992	    value = left->key;
5993	    value += right->key;
5994	    value *= 3;
5995	    ret = (unsigned short) value;
5996	    break;
5997        case XML_EXP_OR:
5998	    value = left->key;
5999	    value += right->key;
6000	    value *= 7;
6001	    ret = (unsigned short) value;
6002	    break;
6003        case XML_EXP_COUNT:
6004	    value = left->key;
6005	    value += right->key;
6006	    ret = (unsigned short) value;
6007	    break;
6008	default:
6009	    ret = 0;
6010    }
6011    return(ret);
6012}
6013
6014
6015static xmlExpNodePtr
6016xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
6017    xmlExpNodePtr ret;
6018
6019    if (ctxt->nb_nodes >= MAX_NODES)
6020        return(NULL);
6021    ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
6022    if (ret == NULL)
6023        return(NULL);
6024    memset(ret, 0, sizeof(xmlExpNode));
6025    ret->type = type;
6026    ret->next = NULL;
6027    ctxt->nb_nodes++;
6028    ctxt->nb_cons++;
6029    return(ret);
6030}
6031
6032/**
6033 * xmlExpHashGetEntry:
6034 * @table: the hash table
6035 *
6036 * Get the unique entry from the hash table. The entry is created if
6037 * needed. @left and @right are consumed, i.e. their ref count will
6038 * be decremented by the operation.
6039 *
6040 * Returns the pointer or NULL in case of error
6041 */
6042static xmlExpNodePtr
6043xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
6044                   xmlExpNodePtr left, xmlExpNodePtr right,
6045		   const xmlChar *name, int min, int max) {
6046    unsigned short kbase, key;
6047    xmlExpNodePtr entry;
6048    xmlExpNodePtr insert;
6049
6050    if (ctxt == NULL)
6051	return(NULL);
6052
6053    /*
6054     * Check for duplicate and insertion location.
6055     */
6056    if (type == XML_EXP_ATOM) {
6057	kbase = xmlExpHashNameComputeKey(name);
6058    } else if (type == XML_EXP_COUNT) {
6059        /* COUNT reduction rule 1 */
6060	/* a{1} -> a */
6061	if (min == max) {
6062	    if (min == 1) {
6063		return(left);
6064	    }
6065	    if (min == 0) {
6066		xmlExpFree(ctxt, left);
6067	        return(emptyExp);
6068	    }
6069	}
6070	if (min < 0) {
6071	    xmlExpFree(ctxt, left);
6072	    return(forbiddenExp);
6073	}
6074        if (max == -1)
6075	    kbase = min + 79;
6076	else
6077	    kbase = max - min;
6078	kbase += left->key;
6079    } else if (type == XML_EXP_OR) {
6080        /* Forbid reduction rules */
6081        if (left->type == XML_EXP_FORBID) {
6082	    xmlExpFree(ctxt, left);
6083	    return(right);
6084	}
6085        if (right->type == XML_EXP_FORBID) {
6086	    xmlExpFree(ctxt, right);
6087	    return(left);
6088	}
6089
6090        /* OR reduction rule 1 */
6091	/* a | a reduced to a */
6092        if (left == right) {
6093	    left->ref--;
6094	    return(left);
6095	}
6096        /* OR canonicalization rule 1 */
6097	/* linearize (a | b) | c into a | (b | c) */
6098        if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
6099	    xmlExpNodePtr tmp = left;
6100            left = right;
6101	    right = tmp;
6102	}
6103        /* OR reduction rule 2 */
6104	/* a | (a | b) and b | (a | b) are reduced to a | b */
6105        if (right->type == XML_EXP_OR) {
6106	    if ((left == right->exp_left) ||
6107	        (left == right->exp_right)) {
6108		xmlExpFree(ctxt, left);
6109		return(right);
6110	    }
6111	}
6112        /* OR canonicalization rule 2 */
6113	/* linearize (a | b) | c into a | (b | c) */
6114        if (left->type == XML_EXP_OR) {
6115	    xmlExpNodePtr tmp;
6116
6117	    /* OR canonicalization rule 2 */
6118	    if ((left->exp_right->type != XML_EXP_OR) &&
6119	        (left->exp_right->key < left->exp_left->key)) {
6120	        tmp = left->exp_right;
6121		left->exp_right = left->exp_left;
6122		left->exp_left = tmp;
6123	    }
6124	    left->exp_right->ref++;
6125	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
6126	                             NULL, 0, 0);
6127	    left->exp_left->ref++;
6128	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
6129	                             NULL, 0, 0);
6130
6131	    xmlExpFree(ctxt, left);
6132	    return(tmp);
6133	}
6134	if (right->type == XML_EXP_OR) {
6135	    /* Ordering in the tree */
6136	    /* C | (A | B) -> A | (B | C) */
6137	    if (left->key > right->exp_right->key) {
6138		xmlExpNodePtr tmp;
6139		right->exp_right->ref++;
6140		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
6141		                         left, NULL, 0, 0);
6142		right->exp_left->ref++;
6143		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6144		                         tmp, NULL, 0, 0);
6145		xmlExpFree(ctxt, right);
6146		return(tmp);
6147	    }
6148	    /* Ordering in the tree */
6149	    /* B | (A | C) -> A | (B | C) */
6150	    if (left->key > right->exp_left->key) {
6151		xmlExpNodePtr tmp;
6152		right->exp_right->ref++;
6153		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
6154		                         right->exp_right, NULL, 0, 0);
6155		right->exp_left->ref++;
6156		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
6157		                         tmp, NULL, 0, 0);
6158		xmlExpFree(ctxt, right);
6159		return(tmp);
6160	    }
6161	}
6162	/* we know both types are != XML_EXP_OR here */
6163        else if (left->key > right->key) {
6164	    xmlExpNodePtr tmp = left;
6165            left = right;
6166	    right = tmp;
6167	}
6168	kbase = xmlExpHashComputeKey(type, left, right);
6169    } else if (type == XML_EXP_SEQ) {
6170        /* Forbid reduction rules */
6171        if (left->type == XML_EXP_FORBID) {
6172	    xmlExpFree(ctxt, right);
6173	    return(left);
6174	}
6175        if (right->type == XML_EXP_FORBID) {
6176	    xmlExpFree(ctxt, left);
6177	    return(right);
6178	}
6179        /* Empty reduction rules */
6180        if (right->type == XML_EXP_EMPTY) {
6181	    return(left);
6182	}
6183        if (left->type == XML_EXP_EMPTY) {
6184	    return(right);
6185	}
6186	kbase = xmlExpHashComputeKey(type, left, right);
6187    } else
6188        return(NULL);
6189
6190    key = kbase % ctxt->size;
6191    if (ctxt->table[key] != NULL) {
6192	for (insert = ctxt->table[key]; insert != NULL;
6193	     insert = insert->next) {
6194	    if ((insert->key == kbase) &&
6195	        (insert->type == type)) {
6196		if (type == XML_EXP_ATOM) {
6197		    if (name == insert->exp_str) {
6198			insert->ref++;
6199			return(insert);
6200		    }
6201		} else if (type == XML_EXP_COUNT) {
6202		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
6203		        (insert->exp_left == left)) {
6204			insert->ref++;
6205			left->ref--;
6206			return(insert);
6207		    }
6208		} else if ((insert->exp_left == left) &&
6209			   (insert->exp_right == right)) {
6210		    insert->ref++;
6211		    left->ref--;
6212		    right->ref--;
6213		    return(insert);
6214		}
6215	    }
6216	}
6217    }
6218
6219    entry = xmlExpNewNode(ctxt, type);
6220    if (entry == NULL)
6221        return(NULL);
6222    entry->key = kbase;
6223    if (type == XML_EXP_ATOM) {
6224	entry->exp_str = name;
6225	entry->c_max = 1;
6226    } else if (type == XML_EXP_COUNT) {
6227        entry->exp_min = min;
6228        entry->exp_max = max;
6229	entry->exp_left = left;
6230	if ((min == 0) || (IS_NILLABLE(left)))
6231	    entry->info |= XML_EXP_NILABLE;
6232	if (max < 0)
6233	    entry->c_max = -1;
6234	else
6235	    entry->c_max = max * entry->exp_left->c_max;
6236    } else {
6237	entry->exp_left = left;
6238	entry->exp_right = right;
6239	if (type == XML_EXP_OR) {
6240	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
6241		entry->info |= XML_EXP_NILABLE;
6242	    if ((entry->exp_left->c_max == -1) ||
6243	        (entry->exp_right->c_max == -1))
6244		entry->c_max = -1;
6245	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
6246	        entry->c_max = entry->exp_left->c_max;
6247	    else
6248	        entry->c_max = entry->exp_right->c_max;
6249	} else {
6250	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
6251		entry->info |= XML_EXP_NILABLE;
6252	    if ((entry->exp_left->c_max == -1) ||
6253	        (entry->exp_right->c_max == -1))
6254		entry->c_max = -1;
6255	    else
6256	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
6257	}
6258    }
6259    entry->ref = 1;
6260    if (ctxt->table[key] != NULL)
6261        entry->next = ctxt->table[key];
6262
6263    ctxt->table[key] = entry;
6264    ctxt->nbElems++;
6265
6266    return(entry);
6267}
6268
6269/**
6270 * xmlExpFree:
6271 * @ctxt: the expression context
6272 * @exp: the expression
6273 *
6274 * Dereference the expression
6275 */
6276void
6277xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
6278    if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
6279        return;
6280    exp->ref--;
6281    if (exp->ref == 0) {
6282        unsigned short key;
6283
6284        /* Unlink it first from the hash table */
6285	key = exp->key % ctxt->size;
6286	if (ctxt->table[key] == exp) {
6287	    ctxt->table[key] = exp->next;
6288	} else {
6289	    xmlExpNodePtr tmp;
6290
6291	    tmp = ctxt->table[key];
6292	    while (tmp != NULL) {
6293	        if (tmp->next == exp) {
6294		    tmp->next = exp->next;
6295		    break;
6296		}
6297	        tmp = tmp->next;
6298	    }
6299	}
6300
6301        if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
6302	    xmlExpFree(ctxt, exp->exp_left);
6303	    xmlExpFree(ctxt, exp->exp_right);
6304	} else if (exp->type == XML_EXP_COUNT) {
6305	    xmlExpFree(ctxt, exp->exp_left);
6306	}
6307        xmlFree(exp);
6308	ctxt->nb_nodes--;
6309    }
6310}
6311
6312/**
6313 * xmlExpRef:
6314 * @exp: the expression
6315 *
6316 * Increase the reference count of the expression
6317 */
6318void
6319xmlExpRef(xmlExpNodePtr exp) {
6320    if (exp != NULL)
6321        exp->ref++;
6322}
6323
6324/**
6325 * xmlExpNewAtom:
6326 * @ctxt: the expression context
6327 * @name: the atom name
6328 * @len: the atom name lenght in byte (or -1);
6329 *
6330 * Get the atom associated to this name from that context
6331 *
6332 * Returns the node or NULL in case of error
6333 */
6334xmlExpNodePtr
6335xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
6336    if ((ctxt == NULL) || (name == NULL))
6337        return(NULL);
6338    name = xmlDictLookup(ctxt->dict, name, len);
6339    if (name == NULL)
6340        return(NULL);
6341    return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
6342}
6343
6344/**
6345 * xmlExpNewOr:
6346 * @ctxt: the expression context
6347 * @left: left expression
6348 * @right: right expression
6349 *
6350 * Get the atom associated to the choice @left | @right
6351 * Note that @left and @right are consumed in the operation, to keep
6352 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6353 * this is true even in case of failure (unless ctxt == NULL).
6354 *
6355 * Returns the node or NULL in case of error
6356 */
6357xmlExpNodePtr
6358xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6359    if ((ctxt == NULL) || (left == NULL) || (right == NULL)) {
6360        xmlExpFree(ctxt, left);
6361        xmlExpFree(ctxt, right);
6362        return(NULL);
6363    }
6364    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
6365}
6366
6367/**
6368 * xmlExpNewSeq:
6369 * @ctxt: the expression context
6370 * @left: left expression
6371 * @right: right expression
6372 *
6373 * Get the atom associated to the sequence @left , @right
6374 * Note that @left and @right are consumed in the operation, to keep
6375 * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
6376 * this is true even in case of failure (unless ctxt == NULL).
6377 *
6378 * Returns the node or NULL in case of error
6379 */
6380xmlExpNodePtr
6381xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
6382    if ((ctxt == NULL) || (left == NULL) || (right == NULL)) {
6383        xmlExpFree(ctxt, left);
6384        xmlExpFree(ctxt, right);
6385        return(NULL);
6386    }
6387    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
6388}
6389
6390/**
6391 * xmlExpNewRange:
6392 * @ctxt: the expression context
6393 * @subset: the expression to be repeated
6394 * @min: the lower bound for the repetition
6395 * @max: the upper bound for the repetition, -1 means infinite
6396 *
6397 * Get the atom associated to the range (@subset){@min, @max}
6398 * Note that @subset is consumed in the operation, to keep
6399 * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
6400 * this is true even in case of failure (unless ctxt == NULL).
6401 *
6402 * Returns the node or NULL in case of error
6403 */
6404xmlExpNodePtr
6405xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
6406    if ((ctxt == NULL) || (subset == NULL) || (min < 0) || (max < -1) ||
6407        ((max >= 0) && (min > max))) {
6408	xmlExpFree(ctxt, subset);
6409        return(NULL);
6410    }
6411    return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
6412                              NULL, NULL, min, max));
6413}
6414
6415/************************************************************************
6416 *									*
6417 *		Public API for operations on expressions		*
6418 *									*
6419 ************************************************************************/
6420
6421static int
6422xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6423                     const xmlChar**list, int len, int nb) {
6424    int tmp, tmp2;
6425tail:
6426    switch (exp->type) {
6427        case XML_EXP_EMPTY:
6428	    return(0);
6429        case XML_EXP_ATOM:
6430	    for (tmp = 0;tmp < nb;tmp++)
6431	        if (list[tmp] == exp->exp_str)
6432		    return(0);
6433            if (nb >= len)
6434	        return(-2);
6435	    list[nb++] = exp->exp_str;
6436	    return(1);
6437        case XML_EXP_COUNT:
6438	    exp = exp->exp_left;
6439	    goto tail;
6440        case XML_EXP_SEQ:
6441        case XML_EXP_OR:
6442	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
6443	    if (tmp < 0)
6444	        return(tmp);
6445	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
6446	                                nb + tmp);
6447	    if (tmp2 < 0)
6448	        return(tmp2);
6449            return(tmp + tmp2);
6450    }
6451    return(-1);
6452}
6453
6454/**
6455 * xmlExpGetLanguage:
6456 * @ctxt: the expression context
6457 * @exp: the expression
6458 * @list: where to store the tokens
6459 * @len: the allocated lenght of @list
6460 *
6461 * Find all the strings used in @exp and store them in @list
6462 *
6463 * Returns the number of unique strings found, -1 in case of errors and
6464 *         -2 if there is more than @len strings
6465 */
6466int
6467xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6468                  const xmlChar**list, int len) {
6469    if ((ctxt == NULL) || (exp == NULL) || (list == NULL) || (len <= 0))
6470        return(-1);
6471    return(xmlExpGetLanguageInt(ctxt, exp, list, len, 0));
6472}
6473
6474static int
6475xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6476                  const xmlChar**list, int len, int nb) {
6477    int tmp, tmp2;
6478tail:
6479    switch (exp->type) {
6480        case XML_EXP_FORBID:
6481	    return(0);
6482        case XML_EXP_EMPTY:
6483	    return(0);
6484        case XML_EXP_ATOM:
6485	    for (tmp = 0;tmp < nb;tmp++)
6486	        if (list[tmp] == exp->exp_str)
6487		    return(0);
6488            if (nb >= len)
6489	        return(-2);
6490	    list[nb++] = exp->exp_str;
6491	    return(1);
6492        case XML_EXP_COUNT:
6493	    exp = exp->exp_left;
6494	    goto tail;
6495        case XML_EXP_SEQ:
6496	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
6497	    if (tmp < 0)
6498	        return(tmp);
6499	    if (IS_NILLABLE(exp->exp_left)) {
6500		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
6501					    nb + tmp);
6502		if (tmp2 < 0)
6503		    return(tmp2);
6504		tmp += tmp2;
6505	    }
6506            return(tmp);
6507        case XML_EXP_OR:
6508	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
6509	    if (tmp < 0)
6510	        return(tmp);
6511	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
6512	                                nb + tmp);
6513	    if (tmp2 < 0)
6514	        return(tmp2);
6515            return(tmp + tmp2);
6516    }
6517    return(-1);
6518}
6519
6520/**
6521 * xmlExpGetStart:
6522 * @ctxt: the expression context
6523 * @exp: the expression
6524 * @list: where to store the tokens
6525 * @len: the allocated lenght of @list
6526 *
6527 * Find all the strings that appears at the start of the languages
6528 * accepted by @exp and store them in @list. E.g. for (a, b) | c
6529 * it will return the list [a, c]
6530 *
6531 * Returns the number of unique strings found, -1 in case of errors and
6532 *         -2 if there is more than @len strings
6533 */
6534int
6535xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6536               const xmlChar**list, int len) {
6537    if ((ctxt == NULL) || (exp == NULL) || (list == NULL) || (len <= 0))
6538        return(-1);
6539    return(xmlExpGetStartInt(ctxt, exp, list, len, 0));
6540}
6541
6542/**
6543 * xmlExpIsNillable:
6544 * @exp: the expression
6545 *
6546 * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
6547 *
6548 * Returns 1 if nillable, 0 if not and -1 in case of error
6549 */
6550int
6551xmlExpIsNillable(xmlExpNodePtr exp) {
6552    if (exp == NULL)
6553        return(-1);
6554    return(IS_NILLABLE(exp) != 0);
6555}
6556
6557static xmlExpNodePtr
6558xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
6559{
6560    xmlExpNodePtr ret;
6561
6562    switch (exp->type) {
6563	case XML_EXP_EMPTY:
6564	    return(forbiddenExp);
6565	case XML_EXP_FORBID:
6566	    return(forbiddenExp);
6567	case XML_EXP_ATOM:
6568	    if (exp->exp_str == str) {
6569#ifdef DEBUG_DERIV
6570		printf("deriv atom: equal => Empty\n");
6571#endif
6572	        ret = emptyExp;
6573	    } else {
6574#ifdef DEBUG_DERIV
6575		printf("deriv atom: mismatch => forbid\n");
6576#endif
6577	        /* TODO wildcards here */
6578		ret = forbiddenExp;
6579	    }
6580	    return(ret);
6581	case XML_EXP_OR: {
6582	    xmlExpNodePtr tmp;
6583
6584#ifdef DEBUG_DERIV
6585	    printf("deriv or: => or(derivs)\n");
6586#endif
6587	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
6588	    if (tmp == NULL) {
6589		return(NULL);
6590	    }
6591	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
6592	    if (ret == NULL) {
6593	        xmlExpFree(ctxt, tmp);
6594		return(NULL);
6595	    }
6596            ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
6597			     NULL, 0, 0);
6598	    return(ret);
6599	}
6600	case XML_EXP_SEQ:
6601#ifdef DEBUG_DERIV
6602	    printf("deriv seq: starting with left\n");
6603#endif
6604	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
6605	    if (ret == NULL) {
6606	        return(NULL);
6607	    } else if (ret == forbiddenExp) {
6608	        if (IS_NILLABLE(exp->exp_left)) {
6609#ifdef DEBUG_DERIV
6610		    printf("deriv seq: left failed but nillable\n");
6611#endif
6612		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
6613		}
6614	    } else {
6615#ifdef DEBUG_DERIV
6616		printf("deriv seq: left match => sequence\n");
6617#endif
6618	        exp->exp_right->ref++;
6619	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
6620		                         NULL, 0, 0);
6621	    }
6622	    return(ret);
6623	case XML_EXP_COUNT: {
6624	    int min, max;
6625	    xmlExpNodePtr tmp;
6626
6627	    if (exp->exp_max == 0)
6628		return(forbiddenExp);
6629	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
6630	    if (ret == NULL)
6631	        return(NULL);
6632	    if (ret == forbiddenExp) {
6633#ifdef DEBUG_DERIV
6634		printf("deriv count: pattern mismatch => forbid\n");
6635#endif
6636	        return(ret);
6637	    }
6638	    if (exp->exp_max == 1)
6639		return(ret);
6640	    if (exp->exp_max < 0) /* unbounded */
6641		max = -1;
6642	    else
6643		max = exp->exp_max - 1;
6644	    if (exp->exp_min > 0)
6645		min = exp->exp_min - 1;
6646	    else
6647		min = 0;
6648	    exp->exp_left->ref++;
6649	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
6650				     NULL, min, max);
6651	    if (ret == emptyExp) {
6652#ifdef DEBUG_DERIV
6653		printf("deriv count: match to empty => new count\n");
6654#endif
6655	        return(tmp);
6656	    }
6657#ifdef DEBUG_DERIV
6658	    printf("deriv count: match => sequence with new count\n");
6659#endif
6660	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
6661	                              NULL, 0, 0));
6662	}
6663    }
6664    return(NULL);
6665}
6666
6667/**
6668 * xmlExpStringDerive:
6669 * @ctxt: the expression context
6670 * @exp: the expression
6671 * @str: the string
6672 * @len: the string len in bytes if available
6673 *
6674 * Do one step of Brzozowski derivation of the expression @exp with
6675 * respect to the input string
6676 *
6677 * Returns the resulting expression or NULL in case of internal error
6678 */
6679xmlExpNodePtr
6680xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6681                   const xmlChar *str, int len) {
6682    const xmlChar *input;
6683
6684    if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
6685        return(NULL);
6686    }
6687    /*
6688     * check the string is in the dictionnary, if yes use an interned
6689     * copy, otherwise we know it's not an acceptable input
6690     */
6691    input = xmlDictExists(ctxt->dict, str, len);
6692    if (input == NULL) {
6693        return(forbiddenExp);
6694    }
6695    return(xmlExpStringDeriveInt(ctxt, exp, input));
6696}
6697
6698static int
6699xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
6700    int ret = 1;
6701
6702    if (sub->c_max == -1) {
6703        if (exp->c_max != -1)
6704	    ret = 0;
6705    } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
6706        ret = 0;
6707    }
6708#if 0
6709    if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
6710        ret = 0;
6711#endif
6712    return(ret);
6713}
6714
6715static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
6716                                        xmlExpNodePtr sub);
6717/**
6718 * xmlExpDivide:
6719 * @ctxt: the expressions context
6720 * @exp: the englobing expression
6721 * @sub: the subexpression
6722 * @mult: the multiple expression
6723 * @remain: the remain from the derivation of the multiple
6724 *
6725 * Check if exp is a multiple of sub, i.e. if there is a finite number n
6726 * so that sub{n} subsume exp
6727 *
6728 * Returns the multiple value if successful, 0 if it is not a multiple
6729 *         and -1 in case of internel error.
6730 */
6731
6732static int
6733xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
6734             xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
6735    int i;
6736    xmlExpNodePtr tmp, tmp2;
6737
6738    if (mult != NULL) *mult = NULL;
6739    if (remain != NULL) *remain = NULL;
6740    if (exp->c_max == -1) return(0);
6741    if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
6742
6743    for (i = 1;i <= exp->c_max;i++) {
6744        sub->ref++;
6745        tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
6746				 sub, NULL, NULL, i, i);
6747	if (tmp == NULL) {
6748	    return(-1);
6749	}
6750	if (!xmlExpCheckCard(tmp, exp)) {
6751	    xmlExpFree(ctxt, tmp);
6752	    continue;
6753	}
6754	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
6755	if (tmp2 == NULL) {
6756	    xmlExpFree(ctxt, tmp);
6757	    return(-1);
6758	}
6759	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
6760	    if (remain != NULL)
6761	        *remain = tmp2;
6762	    else
6763	        xmlExpFree(ctxt, tmp2);
6764	    if (mult != NULL)
6765	        *mult = tmp;
6766	    else
6767	        xmlExpFree(ctxt, tmp);
6768#ifdef DEBUG_DERIV
6769	    printf("Divide succeeded %d\n", i);
6770#endif
6771	    return(i);
6772	}
6773	xmlExpFree(ctxt, tmp);
6774	xmlExpFree(ctxt, tmp2);
6775    }
6776#ifdef DEBUG_DERIV
6777    printf("Divide failed\n");
6778#endif
6779    return(0);
6780}
6781
6782/**
6783 * xmlExpExpDeriveInt:
6784 * @ctxt: the expressions context
6785 * @exp: the englobing expression
6786 * @sub: the subexpression
6787 *
6788 * Try to do a step of Brzozowski derivation but at a higher level
6789 * the input being a subexpression.
6790 *
6791 * Returns the resulting expression or NULL in case of internal error
6792 */
6793static xmlExpNodePtr
6794xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
6795    xmlExpNodePtr ret, tmp, tmp2, tmp3;
6796    const xmlChar **tab;
6797    int len, i;
6798
6799    /*
6800     * In case of equality and if the expression can only consume a finite
6801     * amount, then the derivation is empty
6802     */
6803    if ((exp == sub) && (exp->c_max >= 0)) {
6804#ifdef DEBUG_DERIV
6805        printf("Equal(exp, sub) and finite -> Empty\n");
6806#endif
6807        return(emptyExp);
6808    }
6809    /*
6810     * decompose sub sequence first
6811     */
6812    if (sub->type == XML_EXP_EMPTY) {
6813#ifdef DEBUG_DERIV
6814        printf("Empty(sub) -> Empty\n");
6815#endif
6816	exp->ref++;
6817        return(exp);
6818    }
6819    if (sub->type == XML_EXP_SEQ) {
6820#ifdef DEBUG_DERIV
6821        printf("Seq(sub) -> decompose\n");
6822#endif
6823        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
6824	if (tmp == NULL)
6825	    return(NULL);
6826	if (tmp == forbiddenExp)
6827	    return(tmp);
6828	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
6829	xmlExpFree(ctxt, tmp);
6830	return(ret);
6831    }
6832    if (sub->type == XML_EXP_OR) {
6833#ifdef DEBUG_DERIV
6834        printf("Or(sub) -> decompose\n");
6835#endif
6836        tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
6837	if (tmp == forbiddenExp)
6838	    return(tmp);
6839	if (tmp == NULL)
6840	    return(NULL);
6841	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
6842	if ((ret == NULL) || (ret == forbiddenExp)) {
6843	    xmlExpFree(ctxt, tmp);
6844	    return(ret);
6845	}
6846	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
6847    }
6848    if (!xmlExpCheckCard(exp, sub)) {
6849#ifdef DEBUG_DERIV
6850        printf("CheckCard(exp, sub) failed -> Forbid\n");
6851#endif
6852        return(forbiddenExp);
6853    }
6854    switch (exp->type) {
6855        case XML_EXP_EMPTY:
6856	    if (sub == emptyExp)
6857	        return(emptyExp);
6858#ifdef DEBUG_DERIV
6859	    printf("Empty(exp) -> Forbid\n");
6860#endif
6861	    return(forbiddenExp);
6862        case XML_EXP_FORBID:
6863#ifdef DEBUG_DERIV
6864	    printf("Forbid(exp) -> Forbid\n");
6865#endif
6866	    return(forbiddenExp);
6867        case XML_EXP_ATOM:
6868	    if (sub->type == XML_EXP_ATOM) {
6869	        /* TODO: handle wildcards */
6870	        if (exp->exp_str == sub->exp_str) {
6871#ifdef DEBUG_DERIV
6872		    printf("Atom match -> Empty\n");
6873#endif
6874		    return(emptyExp);
6875                }
6876#ifdef DEBUG_DERIV
6877		printf("Atom mismatch -> Forbid\n");
6878#endif
6879	        return(forbiddenExp);
6880	    }
6881	    if ((sub->type == XML_EXP_COUNT) &&
6882	        (sub->exp_max == 1) &&
6883	        (sub->exp_left->type == XML_EXP_ATOM)) {
6884	        /* TODO: handle wildcards */
6885	        if (exp->exp_str == sub->exp_left->exp_str) {
6886#ifdef DEBUG_DERIV
6887		    printf("Atom match -> Empty\n");
6888#endif
6889		    return(emptyExp);
6890		}
6891#ifdef DEBUG_DERIV
6892		printf("Atom mismatch -> Forbid\n");
6893#endif
6894	        return(forbiddenExp);
6895	    }
6896#ifdef DEBUG_DERIV
6897	    printf("Compex exp vs Atom -> Forbid\n");
6898#endif
6899	    return(forbiddenExp);
6900        case XML_EXP_SEQ:
6901	    /* try to get the sequence consumed only if possible */
6902	    if (xmlExpCheckCard(exp->exp_left, sub)) {
6903		/* See if the sequence can be consumed directly */
6904#ifdef DEBUG_DERIV
6905		printf("Seq trying left only\n");
6906#endif
6907		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
6908		if ((ret != forbiddenExp) && (ret != NULL)) {
6909#ifdef DEBUG_DERIV
6910		    printf("Seq trying left only worked\n");
6911#endif
6912		    /*
6913		     * TODO: assumption here that we are determinist
6914		     *       i.e. we won't get to a nillable exp left
6915		     *       subset which could be matched by the right
6916		     *       part too.
6917		     * e.g.: (a | b)+,(a | c) and 'a+,a'
6918		     */
6919		    exp->exp_right->ref++;
6920		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
6921					      exp->exp_right, NULL, 0, 0));
6922		}
6923#ifdef DEBUG_DERIV
6924	    } else {
6925		printf("Seq: left too short\n");
6926#endif
6927	    }
6928	    /* Try instead to decompose */
6929	    if (sub->type == XML_EXP_COUNT) {
6930		int min, max;
6931
6932#ifdef DEBUG_DERIV
6933		printf("Seq: sub is a count\n");
6934#endif
6935	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
6936		if (ret == NULL)
6937		    return(NULL);
6938		if (ret != forbiddenExp) {
6939#ifdef DEBUG_DERIV
6940		    printf("Seq , Count match on left\n");
6941#endif
6942		    if (sub->exp_max < 0)
6943		        max = -1;
6944	            else
6945		        max = sub->exp_max -1;
6946		    if (sub->exp_min > 0)
6947		        min = sub->exp_min -1;
6948		    else
6949		        min = 0;
6950		    exp->exp_right->ref++;
6951		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
6952		                             exp->exp_right, NULL, 0, 0);
6953		    if (tmp == NULL)
6954		        return(NULL);
6955
6956		    sub->exp_left->ref++;
6957		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
6958				      sub->exp_left, NULL, NULL, min, max);
6959		    if (tmp2 == NULL) {
6960		        xmlExpFree(ctxt, tmp);
6961			return(NULL);
6962		    }
6963		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
6964		    xmlExpFree(ctxt, tmp);
6965		    xmlExpFree(ctxt, tmp2);
6966		    return(ret);
6967		}
6968	    }
6969	    /* we made no progress on structured operations */
6970	    break;
6971        case XML_EXP_OR:
6972#ifdef DEBUG_DERIV
6973	    printf("Or , trying both side\n");
6974#endif
6975	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
6976	    if (ret == NULL)
6977	        return(NULL);
6978	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
6979	    if (tmp == NULL) {
6980		xmlExpFree(ctxt, ret);
6981	        return(NULL);
6982	    }
6983	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
6984        case XML_EXP_COUNT: {
6985	    int min, max;
6986
6987	    if (sub->type == XML_EXP_COUNT) {
6988	        /*
6989		 * Try to see if the loop is completely subsumed
6990		 */
6991	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
6992		if (tmp == NULL)
6993		    return(NULL);
6994		if (tmp == forbiddenExp) {
6995		    int mult;
6996
6997#ifdef DEBUG_DERIV
6998		    printf("Count, Count inner don't subsume\n");
6999#endif
7000		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
7001		                        NULL, &tmp);
7002		    if (mult <= 0) {
7003#ifdef DEBUG_DERIV
7004			printf("Count, Count not multiple => forbidden\n");
7005#endif
7006                        return(forbiddenExp);
7007		    }
7008		    if (sub->exp_max == -1) {
7009		        max = -1;
7010			if (exp->exp_max == -1) {
7011			    if (exp->exp_min <= sub->exp_min * mult)
7012			        min = 0;
7013			    else
7014			        min = exp->exp_min - sub->exp_min * mult;
7015			} else {
7016#ifdef DEBUG_DERIV
7017			    printf("Count, Count finite can't subsume infinite\n");
7018#endif
7019                            xmlExpFree(ctxt, tmp);
7020			    return(forbiddenExp);
7021			}
7022		    } else {
7023			if (exp->exp_max == -1) {
7024#ifdef DEBUG_DERIV
7025			    printf("Infinite loop consume mult finite loop\n");
7026#endif
7027			    if (exp->exp_min > sub->exp_min * mult) {
7028				max = -1;
7029				min = exp->exp_min - sub->exp_min * mult;
7030			    } else {
7031				max = -1;
7032				min = 0;
7033			    }
7034			} else {
7035			    if (exp->exp_max < sub->exp_max * mult) {
7036#ifdef DEBUG_DERIV
7037				printf("loops max mult mismatch => forbidden\n");
7038#endif
7039				xmlExpFree(ctxt, tmp);
7040				return(forbiddenExp);
7041			    }
7042			    if (sub->exp_max * mult > exp->exp_min)
7043				min = 0;
7044			    else
7045				min = exp->exp_min - sub->exp_max * mult;
7046			    max = exp->exp_max - sub->exp_max * mult;
7047			}
7048		    }
7049		} else if (!IS_NILLABLE(tmp)) {
7050		    /*
7051		     * TODO: loop here to try to grow if working on finite
7052		     *       blocks.
7053		     */
7054#ifdef DEBUG_DERIV
7055		    printf("Count, Count remain not nillable => forbidden\n");
7056#endif
7057		    xmlExpFree(ctxt, tmp);
7058		    return(forbiddenExp);
7059		} else if (sub->exp_max == -1) {
7060		    if (exp->exp_max == -1) {
7061		        if (exp->exp_min <= sub->exp_min) {
7062#ifdef DEBUG_DERIV
7063			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
7064#endif
7065                            max = -1;
7066			    min = 0;
7067			} else {
7068#ifdef DEBUG_DERIV
7069			    printf("Infinite loops min => Count(X,Inf)\n");
7070#endif
7071                            max = -1;
7072			    min = exp->exp_min - sub->exp_min;
7073			}
7074		    } else if (exp->exp_min > sub->exp_min) {
7075#ifdef DEBUG_DERIV
7076			printf("loops min mismatch 1 => forbidden ???\n");
7077#endif
7078		        xmlExpFree(ctxt, tmp);
7079		        return(forbiddenExp);
7080		    } else {
7081			max = -1;
7082			min = 0;
7083		    }
7084		} else {
7085		    if (exp->exp_max == -1) {
7086#ifdef DEBUG_DERIV
7087			printf("Infinite loop consume finite loop\n");
7088#endif
7089		        if (exp->exp_min > sub->exp_min) {
7090			    max = -1;
7091			    min = exp->exp_min - sub->exp_min;
7092			} else {
7093			    max = -1;
7094			    min = 0;
7095			}
7096		    } else {
7097		        if (exp->exp_max < sub->exp_max) {
7098#ifdef DEBUG_DERIV
7099			    printf("loops max mismatch => forbidden\n");
7100#endif
7101			    xmlExpFree(ctxt, tmp);
7102			    return(forbiddenExp);
7103			}
7104			if (sub->exp_max > exp->exp_min)
7105			    min = 0;
7106			else
7107			    min = exp->exp_min - sub->exp_max;
7108			max = exp->exp_max - sub->exp_max;
7109		    }
7110		}
7111#ifdef DEBUG_DERIV
7112		printf("loops match => SEQ(COUNT())\n");
7113#endif
7114		exp->exp_left->ref++;
7115		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7116		                          NULL, NULL, min, max);
7117		if (tmp2 == NULL) {
7118		    return(NULL);
7119		}
7120                ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7121		                         NULL, 0, 0);
7122		return(ret);
7123	    }
7124	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
7125	    if (tmp == NULL)
7126		return(NULL);
7127	    if (tmp == forbiddenExp) {
7128#ifdef DEBUG_DERIV
7129		printf("loop mismatch => forbidden\n");
7130#endif
7131		return(forbiddenExp);
7132	    }
7133	    if (exp->exp_min > 0)
7134		min = exp->exp_min - 1;
7135	    else
7136		min = 0;
7137	    if (exp->exp_max < 0)
7138		max = -1;
7139	    else
7140		max = exp->exp_max - 1;
7141
7142#ifdef DEBUG_DERIV
7143	    printf("loop match => SEQ(COUNT())\n");
7144#endif
7145	    exp->exp_left->ref++;
7146	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
7147				      NULL, NULL, min, max);
7148	    if (tmp2 == NULL)
7149		return(NULL);
7150	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
7151				     NULL, 0, 0);
7152	    return(ret);
7153	}
7154    }
7155
7156#ifdef DEBUG_DERIV
7157    printf("Fallback to derivative\n");
7158#endif
7159    if (IS_NILLABLE(sub)) {
7160        if (!(IS_NILLABLE(exp)))
7161	    return(forbiddenExp);
7162	else
7163	    ret = emptyExp;
7164    } else
7165	ret = NULL;
7166    /*
7167     * here the structured derivation made no progress so
7168     * we use the default token based derivation to force one more step
7169     */
7170    if (ctxt->tabSize == 0)
7171        ctxt->tabSize = 40;
7172
7173    tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
7174	                               sizeof(const xmlChar *));
7175    if (tab == NULL) {
7176	return(NULL);
7177    }
7178
7179    /*
7180     * collect all the strings accepted by the subexpression on input
7181     */
7182    len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7183    while (len < 0) {
7184        const xmlChar **temp;
7185	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
7186	                                     sizeof(const xmlChar *));
7187	if (temp == NULL) {
7188	    xmlFree((xmlChar **) tab);
7189	    return(NULL);
7190	}
7191	tab = temp;
7192	ctxt->tabSize *= 2;
7193	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
7194    }
7195    for (i = 0;i < len;i++) {
7196        tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
7197	if ((tmp == NULL) || (tmp == forbiddenExp)) {
7198	    xmlExpFree(ctxt, ret);
7199	    xmlFree((xmlChar **) tab);
7200	    return(tmp);
7201	}
7202	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
7203	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
7204	    xmlExpFree(ctxt, tmp);
7205	    xmlExpFree(ctxt, ret);
7206	    xmlFree((xmlChar **) tab);
7207	    return(tmp);
7208	}
7209	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
7210	xmlExpFree(ctxt, tmp);
7211	xmlExpFree(ctxt, tmp2);
7212
7213	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
7214	    xmlExpFree(ctxt, ret);
7215	    xmlFree((xmlChar **) tab);
7216	    return(tmp3);
7217	}
7218
7219	if (ret == NULL)
7220	    ret = tmp3;
7221	else {
7222	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
7223	    if (ret == NULL) {
7224		xmlFree((xmlChar **) tab);
7225	        return(NULL);
7226	    }
7227	}
7228    }
7229    xmlFree((xmlChar **) tab);
7230    return(ret);
7231}
7232
7233/**
7234 * xmlExpExpDerive:
7235 * @ctxt: the expressions context
7236 * @exp: the englobing expression
7237 * @sub: the subexpression
7238 *
7239 * Evaluates the expression resulting from @exp consuming a sub expression @sub
7240 * Based on algebraic derivation and sometimes direct Brzozowski derivation
7241 * it usually tatkes less than linear time and can handle expressions generating
7242 * infinite languages.
7243 *
7244 * Returns the resulting expression or NULL in case of internal error, the
7245 *         result must be freed
7246 */
7247xmlExpNodePtr
7248xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7249    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7250        return(NULL);
7251
7252    /*
7253     * O(1) speedups
7254     */
7255    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7256#ifdef DEBUG_DERIV
7257	printf("Sub nillable and not exp : can't subsume\n");
7258#endif
7259        return(forbiddenExp);
7260    }
7261    if (xmlExpCheckCard(exp, sub) == 0) {
7262#ifdef DEBUG_DERIV
7263	printf("sub generate longuer sequances than exp : can't subsume\n");
7264#endif
7265        return(forbiddenExp);
7266    }
7267    return(xmlExpExpDeriveInt(ctxt, exp, sub));
7268}
7269
7270/**
7271 * xmlExpSubsume:
7272 * @ctxt: the expressions context
7273 * @exp: the englobing expression
7274 * @sub: the subexpression
7275 *
7276 * Check whether @exp accepts all the languages accexpted by @sub
7277 * the input being a subexpression.
7278 *
7279 * Returns 1 if true 0 if false and -1 in case of failure.
7280 */
7281int
7282xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
7283    xmlExpNodePtr tmp;
7284
7285    if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
7286        return(-1);
7287
7288    /*
7289     * TODO: speedup by checking the language of sub is a subset of the
7290     *       language of exp
7291     */
7292    /*
7293     * O(1) speedups
7294     */
7295    if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
7296#ifdef DEBUG_DERIV
7297	printf("Sub nillable and not exp : can't subsume\n");
7298#endif
7299        return(0);
7300    }
7301    if (xmlExpCheckCard(exp, sub) == 0) {
7302#ifdef DEBUG_DERIV
7303	printf("sub generate longuer sequances than exp : can't subsume\n");
7304#endif
7305        return(0);
7306    }
7307    tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
7308#ifdef DEBUG_DERIV
7309    printf("Result derivation :\n");
7310    PRINT_EXP(tmp);
7311#endif
7312    if (tmp == NULL)
7313        return(-1);
7314    if (tmp == forbiddenExp)
7315	return(0);
7316    if (tmp == emptyExp)
7317	return(1);
7318    if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
7319        xmlExpFree(ctxt, tmp);
7320        return(1);
7321    }
7322    xmlExpFree(ctxt, tmp);
7323    return(0);
7324}
7325
7326/************************************************************************
7327 *									*
7328 *			Parsing expression 				*
7329 *									*
7330 ************************************************************************/
7331
7332static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
7333
7334#undef CUR
7335#define CUR (*ctxt->cur)
7336#undef NEXT
7337#define NEXT ctxt->cur++;
7338#undef IS_BLANK
7339#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
7340#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
7341
7342static int
7343xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
7344    int ret = 0;
7345
7346    SKIP_BLANKS
7347    if (CUR == '*') {
7348	NEXT
7349	return(-1);
7350    }
7351    if ((CUR < '0') || (CUR > '9'))
7352        return(-1);
7353    while ((CUR >= '0') && (CUR <= '9')) {
7354        ret = ret * 10 + (CUR - '0');
7355	NEXT
7356    }
7357    return(ret);
7358}
7359
7360static xmlExpNodePtr
7361xmlExpParseOr(xmlExpCtxtPtr ctxt) {
7362    const char *base;
7363    xmlExpNodePtr ret;
7364    const xmlChar *val;
7365
7366    SKIP_BLANKS
7367    base = ctxt->cur;
7368    if (*ctxt->cur == '(') {
7369        NEXT
7370	ret = xmlExpParseExpr(ctxt);
7371	SKIP_BLANKS
7372	if (*ctxt->cur != ')') {
7373	    fprintf(stderr, "unbalanced '(' : %s\n", base);
7374	    xmlExpFree(ctxt, ret);
7375	    return(NULL);
7376	}
7377	NEXT;
7378	SKIP_BLANKS
7379	goto parse_quantifier;
7380    }
7381    while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
7382           (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
7383	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
7384	NEXT;
7385    val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
7386    if (val == NULL)
7387        return(NULL);
7388    ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
7389    if (ret == NULL)
7390        return(NULL);
7391    SKIP_BLANKS
7392parse_quantifier:
7393    if (CUR == '{') {
7394        int min, max;
7395
7396        NEXT
7397	min = xmlExpParseNumber(ctxt);
7398	if (min < 0) {
7399	    xmlExpFree(ctxt, ret);
7400	    return(NULL);
7401	}
7402	SKIP_BLANKS
7403	if (CUR == ',') {
7404	    NEXT
7405	    max = xmlExpParseNumber(ctxt);
7406	    SKIP_BLANKS
7407	} else
7408	    max = min;
7409	if (CUR != '}') {
7410	    xmlExpFree(ctxt, ret);
7411	    return(NULL);
7412	}
7413        NEXT
7414	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7415	                         min, max);
7416	SKIP_BLANKS
7417    } else if (CUR == '?') {
7418        NEXT
7419	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7420	                         0, 1);
7421	SKIP_BLANKS
7422    } else if (CUR == '+') {
7423        NEXT
7424	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7425	                         1, -1);
7426	SKIP_BLANKS
7427    } else if (CUR == '*') {
7428        NEXT
7429	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
7430	                         0, -1);
7431	SKIP_BLANKS
7432    }
7433    return(ret);
7434}
7435
7436
7437static xmlExpNodePtr
7438xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
7439    xmlExpNodePtr ret, right;
7440
7441    ret = xmlExpParseOr(ctxt);
7442    SKIP_BLANKS
7443    while (CUR == '|') {
7444        NEXT
7445	right = xmlExpParseOr(ctxt);
7446	if (right == NULL) {
7447	    xmlExpFree(ctxt, ret);
7448	    return(NULL);
7449	}
7450	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
7451	if (ret == NULL)
7452	    return(NULL);
7453    }
7454    return(ret);
7455}
7456
7457static xmlExpNodePtr
7458xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
7459    xmlExpNodePtr ret, right;
7460
7461    ret = xmlExpParseSeq(ctxt);
7462    SKIP_BLANKS
7463    while (CUR == ',') {
7464        NEXT
7465	right = xmlExpParseSeq(ctxt);
7466	if (right == NULL) {
7467	    xmlExpFree(ctxt, ret);
7468	    return(NULL);
7469	}
7470	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
7471	if (ret == NULL)
7472	    return(NULL);
7473    }
7474    return(ret);
7475}
7476
7477/**
7478 * xmlExpParse:
7479 * @ctxt: the expressions context
7480 * @expr: the 0 terminated string
7481 *
7482 * Minimal parser for regexps, it understand the following constructs
7483 *  - string terminals
7484 *  - choice operator |
7485 *  - sequence operator ,
7486 *  - subexpressions (...)
7487 *  - usual cardinality operators + * and ?
7488 *  - finite sequences  { min, max }
7489 *  - infinite sequences { min, * }
7490 * There is minimal checkings made especially no checking on strings values
7491 *
7492 * Returns a new expression or NULL in case of failure
7493 */
7494xmlExpNodePtr
7495xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
7496    xmlExpNodePtr ret;
7497
7498    ctxt->expr = expr;
7499    ctxt->cur = expr;
7500
7501    ret = xmlExpParseExpr(ctxt);
7502    SKIP_BLANKS
7503    if (*ctxt->cur != 0) {
7504        xmlExpFree(ctxt, ret);
7505        return(NULL);
7506    }
7507    return(ret);
7508}
7509
7510static void
7511xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
7512    xmlExpNodePtr c;
7513
7514    if (expr == NULL) return;
7515    if (glob) xmlBufferWriteChar(buf, "(");
7516    switch (expr->type) {
7517        case XML_EXP_EMPTY:
7518	    xmlBufferWriteChar(buf, "empty");
7519	    break;
7520        case XML_EXP_FORBID:
7521	    xmlBufferWriteChar(buf, "forbidden");
7522	    break;
7523        case XML_EXP_ATOM:
7524	    xmlBufferWriteCHAR(buf, expr->exp_str);
7525	    break;
7526        case XML_EXP_SEQ:
7527	    c = expr->exp_left;
7528	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7529	        xmlExpDumpInt(buf, c, 1);
7530	    else
7531	        xmlExpDumpInt(buf, c, 0);
7532	    xmlBufferWriteChar(buf, " , ");
7533	    c = expr->exp_right;
7534	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7535	        xmlExpDumpInt(buf, c, 1);
7536	    else
7537	        xmlExpDumpInt(buf, c, 0);
7538            break;
7539        case XML_EXP_OR:
7540	    c = expr->exp_left;
7541	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7542	        xmlExpDumpInt(buf, c, 1);
7543	    else
7544	        xmlExpDumpInt(buf, c, 0);
7545	    xmlBufferWriteChar(buf, " | ");
7546	    c = expr->exp_right;
7547	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7548	        xmlExpDumpInt(buf, c, 1);
7549	    else
7550	        xmlExpDumpInt(buf, c, 0);
7551            break;
7552        case XML_EXP_COUNT: {
7553	    char rep[40];
7554
7555	    c = expr->exp_left;
7556	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
7557	        xmlExpDumpInt(buf, c, 1);
7558	    else
7559	        xmlExpDumpInt(buf, c, 0);
7560	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
7561		rep[0] = '?';
7562		rep[1] = 0;
7563	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
7564		rep[0] = '*';
7565		rep[1] = 0;
7566	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
7567		rep[0] = '+';
7568		rep[1] = 0;
7569	    } else if (expr->exp_max == expr->exp_min) {
7570	        snprintf(rep, 39, "{%d}", expr->exp_min);
7571	    } else if (expr->exp_max < 0) {
7572	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
7573	    } else {
7574	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
7575	    }
7576	    rep[39] = 0;
7577	    xmlBufferWriteChar(buf, rep);
7578	    break;
7579	}
7580	default:
7581	    fprintf(stderr, "Error in tree\n");
7582    }
7583    if (glob)
7584        xmlBufferWriteChar(buf, ")");
7585}
7586/**
7587 * xmlExpDump:
7588 * @buf:  a buffer to receive the output
7589 * @expr:  the compiled expression
7590 *
7591 * Serialize the expression as compiled to the buffer
7592 */
7593void
7594xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
7595    if ((buf == NULL) || (expr == NULL))
7596        return;
7597    xmlExpDumpInt(buf, expr, 0);
7598}
7599
7600/**
7601 * xmlExpMaxToken:
7602 * @expr: a compiled expression
7603 *
7604 * Indicate the maximum number of input a expression can accept
7605 *
7606 * Returns the maximum length or -1 in case of error
7607 */
7608int
7609xmlExpMaxToken(xmlExpNodePtr expr) {
7610    if (expr == NULL)
7611        return(-1);
7612    return(expr->c_max);
7613}
7614
7615/**
7616 * xmlExpCtxtNbNodes:
7617 * @ctxt: an expression context
7618 *
7619 * Debugging facility provides the number of allocated nodes at a that point
7620 *
7621 * Returns the number of nodes in use or -1 in case of error
7622 */
7623int
7624xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
7625    if (ctxt == NULL)
7626        return(-1);
7627    return(ctxt->nb_nodes);
7628}
7629
7630/**
7631 * xmlExpCtxtNbCons:
7632 * @ctxt: an expression context
7633 *
7634 * Debugging facility provides the number of allocated nodes over lifetime
7635 *
7636 * Returns the number of nodes ever allocated or -1 in case of error
7637 */
7638int
7639xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
7640    if (ctxt == NULL)
7641        return(-1);
7642    return(ctxt->nb_cons);
7643}
7644
7645#endif /* LIBXML_EXPR_ENABLED */
7646#define bottom_xmlregexp
7647#include "elfgcchack.h"
7648#endif /* LIBXML_REGEXP_ENABLED */
7649