xmlregexp.c revision 4f82c8a161404d1fe6ca9fe6edb404df00e14f11
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications these include:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22/* #define DEBUG_ERR */
23
24#include <stdio.h>
25#include <string.h>
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <libxml/tree.h>
31#include <libxml/parserInternals.h>
32#include <libxml/xmlregexp.h>
33#include <libxml/xmlautomata.h>
34#include <libxml/xmlunicode.h>
35
36#ifndef INT_MAX
37#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38#endif
39
40/* #define DEBUG_REGEXP_GRAPH */
41/* #define DEBUG_REGEXP_EXEC */
42/* #define DEBUG_PUSH */
43/* #define DEBUG_COMPACTION */
44
45#define ERROR(str)							\
46    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
47    xmlRegexpErrCompile(ctxt, str);
48#define NEXT ctxt->cur++
49#define CUR (*(ctxt->cur))
50#define NXT(index) (ctxt->cur[index])
51
52#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
53#define NEXTL(l) ctxt->cur += l;
54#define XML_REG_STRING_SEPARATOR '|'
55
56/**
57 * TODO:
58 *
59 * macro to flag unimplemented blocks
60 */
61#define TODO 								\
62    xmlGenericError(xmlGenericErrorContext,				\
63	    "Unimplemented block at %s:%d\n",				\
64            __FILE__, __LINE__);
65
66/************************************************************************
67 * 									*
68 * 			Datatypes and structures			*
69 * 									*
70 ************************************************************************/
71
72typedef enum {
73    XML_REGEXP_EPSILON = 1,
74    XML_REGEXP_CHARVAL,
75    XML_REGEXP_RANGES,
76    XML_REGEXP_SUBREG,
77    XML_REGEXP_STRING,
78    XML_REGEXP_ANYCHAR, /* . */
79    XML_REGEXP_ANYSPACE, /* \s */
80    XML_REGEXP_NOTSPACE, /* \S */
81    XML_REGEXP_INITNAME, /* \l */
82    XML_REGEXP_NOTINITNAME, /* \l */
83    XML_REGEXP_NAMECHAR, /* \c */
84    XML_REGEXP_NOTNAMECHAR, /* \C */
85    XML_REGEXP_DECIMAL, /* \d */
86    XML_REGEXP_NOTDECIMAL, /* \d */
87    XML_REGEXP_REALCHAR, /* \w */
88    XML_REGEXP_NOTREALCHAR, /* \w */
89    XML_REGEXP_LETTER,
90    XML_REGEXP_LETTER_UPPERCASE,
91    XML_REGEXP_LETTER_LOWERCASE,
92    XML_REGEXP_LETTER_TITLECASE,
93    XML_REGEXP_LETTER_MODIFIER,
94    XML_REGEXP_LETTER_OTHERS,
95    XML_REGEXP_MARK,
96    XML_REGEXP_MARK_NONSPACING,
97    XML_REGEXP_MARK_SPACECOMBINING,
98    XML_REGEXP_MARK_ENCLOSING,
99    XML_REGEXP_NUMBER,
100    XML_REGEXP_NUMBER_DECIMAL,
101    XML_REGEXP_NUMBER_LETTER,
102    XML_REGEXP_NUMBER_OTHERS,
103    XML_REGEXP_PUNCT,
104    XML_REGEXP_PUNCT_CONNECTOR,
105    XML_REGEXP_PUNCT_DASH,
106    XML_REGEXP_PUNCT_OPEN,
107    XML_REGEXP_PUNCT_CLOSE,
108    XML_REGEXP_PUNCT_INITQUOTE,
109    XML_REGEXP_PUNCT_FINQUOTE,
110    XML_REGEXP_PUNCT_OTHERS,
111    XML_REGEXP_SEPAR,
112    XML_REGEXP_SEPAR_SPACE,
113    XML_REGEXP_SEPAR_LINE,
114    XML_REGEXP_SEPAR_PARA,
115    XML_REGEXP_SYMBOL,
116    XML_REGEXP_SYMBOL_MATH,
117    XML_REGEXP_SYMBOL_CURRENCY,
118    XML_REGEXP_SYMBOL_MODIFIER,
119    XML_REGEXP_SYMBOL_OTHERS,
120    XML_REGEXP_OTHER,
121    XML_REGEXP_OTHER_CONTROL,
122    XML_REGEXP_OTHER_FORMAT,
123    XML_REGEXP_OTHER_PRIVATE,
124    XML_REGEXP_OTHER_NA,
125    XML_REGEXP_BLOCK_NAME
126} xmlRegAtomType;
127
128typedef enum {
129    XML_REGEXP_QUANT_EPSILON = 1,
130    XML_REGEXP_QUANT_ONCE,
131    XML_REGEXP_QUANT_OPT,
132    XML_REGEXP_QUANT_MULT,
133    XML_REGEXP_QUANT_PLUS,
134    XML_REGEXP_QUANT_ONCEONLY,
135    XML_REGEXP_QUANT_ALL,
136    XML_REGEXP_QUANT_RANGE
137} xmlRegQuantType;
138
139typedef enum {
140    XML_REGEXP_START_STATE = 1,
141    XML_REGEXP_FINAL_STATE,
142    XML_REGEXP_TRANS_STATE,
143    XML_REGEXP_SINK_STATE
144} xmlRegStateType;
145
146typedef enum {
147    XML_REGEXP_MARK_NORMAL = 0,
148    XML_REGEXP_MARK_START,
149    XML_REGEXP_MARK_VISITED
150} xmlRegMarkedType;
151
152typedef struct _xmlRegRange xmlRegRange;
153typedef xmlRegRange *xmlRegRangePtr;
154
155struct _xmlRegRange {
156    int neg;		/* 0 normal, 1 not, 2 exclude */
157    xmlRegAtomType type;
158    int start;
159    int end;
160    xmlChar *blockName;
161};
162
163typedef struct _xmlRegAtom xmlRegAtom;
164typedef xmlRegAtom *xmlRegAtomPtr;
165
166typedef struct _xmlAutomataState xmlRegState;
167typedef xmlRegState *xmlRegStatePtr;
168
169struct _xmlRegAtom {
170    int no;
171    xmlRegAtomType type;
172    xmlRegQuantType quant;
173    int min;
174    int max;
175
176    void *valuep;
177    void *valuep2;
178    int neg;
179    int codepoint;
180    xmlRegStatePtr start;
181    xmlRegStatePtr stop;
182    int maxRanges;
183    int nbRanges;
184    xmlRegRangePtr *ranges;
185    void *data;
186};
187
188typedef struct _xmlRegCounter xmlRegCounter;
189typedef xmlRegCounter *xmlRegCounterPtr;
190
191struct _xmlRegCounter {
192    int min;
193    int max;
194};
195
196typedef struct _xmlRegTrans xmlRegTrans;
197typedef xmlRegTrans *xmlRegTransPtr;
198
199struct _xmlRegTrans {
200    xmlRegAtomPtr atom;
201    int to;
202    int counter;
203    int count;
204};
205
206struct _xmlAutomataState {
207    xmlRegStateType type;
208    xmlRegMarkedType mark;
209    xmlRegMarkedType reached;
210    int no;
211    int maxTrans;
212    int nbTrans;
213    xmlRegTrans *trans;
214    /*  knowing states ponting to us can speed things up */
215    int maxTransTo;
216    int nbTransTo;
217    int *transTo;
218};
219
220typedef struct _xmlAutomata xmlRegParserCtxt;
221typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
222
223struct _xmlAutomata {
224    xmlChar *string;
225    xmlChar *cur;
226
227    int error;
228    int neg;
229
230    xmlRegStatePtr start;
231    xmlRegStatePtr end;
232    xmlRegStatePtr state;
233
234    xmlRegAtomPtr atom;
235
236    int maxAtoms;
237    int nbAtoms;
238    xmlRegAtomPtr *atoms;
239
240    int maxStates;
241    int nbStates;
242    xmlRegStatePtr *states;
243
244    int maxCounters;
245    int nbCounters;
246    xmlRegCounter *counters;
247
248    int determinist;
249    int negs;
250};
251
252struct _xmlRegexp {
253    xmlChar *string;
254    int nbStates;
255    xmlRegStatePtr *states;
256    int nbAtoms;
257    xmlRegAtomPtr *atoms;
258    int nbCounters;
259    xmlRegCounter *counters;
260    int determinist;
261    /*
262     * That's the compact form for determinists automatas
263     */
264    int nbstates;
265    int *compact;
266    void **transdata;
267    int nbstrings;
268    xmlChar **stringMap;
269};
270
271typedef struct _xmlRegExecRollback xmlRegExecRollback;
272typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
273
274struct _xmlRegExecRollback {
275    xmlRegStatePtr state;/* the current state */
276    int index;		/* the index in the input stack */
277    int nextbranch;	/* the next transition to explore in that state */
278    int *counts;	/* save the automata state if it has some */
279};
280
281typedef struct _xmlRegInputToken xmlRegInputToken;
282typedef xmlRegInputToken *xmlRegInputTokenPtr;
283
284struct _xmlRegInputToken {
285    xmlChar *value;
286    void *data;
287};
288
289struct _xmlRegExecCtxt {
290    int status;		/* execution status != 0 indicate an error */
291    int determinist;	/* did we find an indeterministic behaviour */
292    xmlRegexpPtr comp;	/* the compiled regexp */
293    xmlRegExecCallbacks callback;
294    void *data;
295
296    xmlRegStatePtr state;/* the current state */
297    int transno;	/* the current transition on that state */
298    int transcount;	/* the number of chars in char counted transitions */
299
300    /*
301     * A stack of rollback states
302     */
303    int maxRollbacks;
304    int nbRollbacks;
305    xmlRegExecRollback *rollbacks;
306
307    /*
308     * The state of the automata if any
309     */
310    int *counts;
311
312    /*
313     * The input stack
314     */
315    int inputStackMax;
316    int inputStackNr;
317    int index;
318    int *charStack;
319    const xmlChar *inputString; /* when operating on characters */
320    xmlRegInputTokenPtr inputStack;/* when operating on strings */
321
322    /*
323     * error handling
324     */
325    int errStateNo;		/* the error state number */
326    xmlRegStatePtr errState;    /* the error state */
327    xmlChar *errString;		/* the string raising the error */
328    int *errCounts;		/* counters at the error state */
329};
330
331#define REGEXP_ALL_COUNTER	0x123456
332#define REGEXP_ALL_LAX_COUNTER	0x123457
333
334static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
335static void xmlRegFreeState(xmlRegStatePtr state);
336static void xmlRegFreeAtom(xmlRegAtomPtr atom);
337static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
338
339/************************************************************************
340 *									*
341 * 		Regexp memory error handler				*
342 *									*
343 ************************************************************************/
344/**
345 * xmlRegexpErrMemory:
346 * @extra:  extra information
347 *
348 * Handle an out of memory condition
349 */
350static void
351xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
352{
353    const char *regexp = NULL;
354    if (ctxt != NULL) {
355        regexp = (const char *) ctxt->string;
356	ctxt->error = XML_ERR_NO_MEMORY;
357    }
358    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
359		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
360		    regexp, NULL, 0, 0,
361		    "Memory allocation failed : %s\n", extra);
362}
363
364/**
365 * xmlRegexpErrCompile:
366 * @extra:  extra information
367 *
368 * Handle a compilation failure
369 */
370static void
371xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
372{
373    const char *regexp = NULL;
374    int idx = 0;
375
376    if (ctxt != NULL) {
377        regexp = (const char *) ctxt->string;
378	idx = ctxt->cur - ctxt->string;
379	ctxt->error = XML_REGEXP_COMPILE_ERROR;
380    }
381    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
382		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
383		    regexp, NULL, idx, 0,
384		    "failed to compile: %s\n", extra);
385}
386
387/************************************************************************
388 * 									*
389 * 			Allocation/Deallocation				*
390 * 									*
391 ************************************************************************/
392
393static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
394/**
395 * xmlRegEpxFromParse:
396 * @ctxt:  the parser context used to build it
397 *
398 * Allocate a new regexp and fill it with the result from the parser
399 *
400 * Returns the new regexp or NULL in case of error
401 */
402static xmlRegexpPtr
403xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
404    xmlRegexpPtr ret;
405
406    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
407    if (ret == NULL) {
408	xmlRegexpErrMemory(ctxt, "compiling regexp");
409	return(NULL);
410    }
411    memset(ret, 0, sizeof(xmlRegexp));
412    ret->string = ctxt->string;
413    ret->nbStates = ctxt->nbStates;
414    ret->states = ctxt->states;
415    ret->nbAtoms = ctxt->nbAtoms;
416    ret->atoms = ctxt->atoms;
417    ret->nbCounters = ctxt->nbCounters;
418    ret->counters = ctxt->counters;
419    ret->determinist = ctxt->determinist;
420
421    if ((ret->determinist != 0) &&
422	(ret->nbCounters == 0) &&
423	(ctxt->negs == 0) &&
424	(ret->atoms != NULL) &&
425	(ret->atoms[0] != NULL) &&
426	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
427	int i, j, nbstates = 0, nbatoms = 0;
428	int *stateRemap;
429	int *stringRemap;
430	int *transitions;
431	void **transdata;
432	xmlChar **stringMap;
433        xmlChar *value;
434
435	/*
436	 * Switch to a compact representation
437	 * 1/ counting the effective number of states left
438	 * 2/ counting the unique number of atoms, and check that
439	 *    they are all of the string type
440	 * 3/ build a table state x atom for the transitions
441	 */
442
443	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
444	if (stateRemap == NULL) {
445	    xmlRegexpErrMemory(ctxt, "compiling regexp");
446	    xmlFree(ret);
447	    return(NULL);
448	}
449	for (i = 0;i < ret->nbStates;i++) {
450	    if (ret->states[i] != NULL) {
451		stateRemap[i] = nbstates;
452		nbstates++;
453	    } else {
454		stateRemap[i] = -1;
455	    }
456	}
457#ifdef DEBUG_COMPACTION
458	printf("Final: %d states\n", nbstates);
459#endif
460	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
461	if (stringMap == NULL) {
462	    xmlRegexpErrMemory(ctxt, "compiling regexp");
463	    xmlFree(stateRemap);
464	    xmlFree(ret);
465	    return(NULL);
466	}
467	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
468	if (stringRemap == NULL) {
469	    xmlRegexpErrMemory(ctxt, "compiling regexp");
470	    xmlFree(stringMap);
471	    xmlFree(stateRemap);
472	    xmlFree(ret);
473	    return(NULL);
474	}
475	for (i = 0;i < ret->nbAtoms;i++) {
476	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
477		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
478		value = ret->atoms[i]->valuep;
479                for (j = 0;j < nbatoms;j++) {
480		    if (xmlStrEqual(stringMap[j], value)) {
481			stringRemap[i] = j;
482			break;
483		    }
484		}
485		if (j >= nbatoms) {
486		    stringRemap[i] = nbatoms;
487		    stringMap[nbatoms] = xmlStrdup(value);
488		    if (stringMap[nbatoms] == NULL) {
489			for (i = 0;i < nbatoms;i++)
490			    xmlFree(stringMap[i]);
491			xmlFree(stringRemap);
492			xmlFree(stringMap);
493			xmlFree(stateRemap);
494			xmlFree(ret);
495			return(NULL);
496		    }
497		    nbatoms++;
498		}
499	    } else {
500		xmlFree(stateRemap);
501		xmlFree(stringRemap);
502		for (i = 0;i < nbatoms;i++)
503		    xmlFree(stringMap[i]);
504		xmlFree(stringMap);
505		xmlFree(ret);
506		return(NULL);
507	    }
508	}
509#ifdef DEBUG_COMPACTION
510	printf("Final: %d atoms\n", nbatoms);
511#endif
512	transitions = (int *) xmlMalloc((nbstates + 1) *
513	                                (nbatoms + 1) * sizeof(int));
514	if (transitions == NULL) {
515	    xmlFree(stateRemap);
516	    xmlFree(stringRemap);
517	    xmlFree(stringMap);
518	    xmlFree(ret);
519	    return(NULL);
520	}
521	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
522
523	/*
524	 * Allocate the transition table. The first entry for each
525	 * state corresponds to the state type.
526	 */
527	transdata = NULL;
528
529	for (i = 0;i < ret->nbStates;i++) {
530	    int stateno, atomno, targetno, prev;
531	    xmlRegStatePtr state;
532	    xmlRegTransPtr trans;
533
534	    stateno = stateRemap[i];
535	    if (stateno == -1)
536		continue;
537	    state = ret->states[i];
538
539	    transitions[stateno * (nbatoms + 1)] = state->type;
540
541	    for (j = 0;j < state->nbTrans;j++) {
542		trans = &(state->trans[j]);
543		if ((trans->to == -1) || (trans->atom == NULL))
544		    continue;
545                atomno = stringRemap[trans->atom->no];
546		if ((trans->atom->data != NULL) && (transdata == NULL)) {
547		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
548			                            sizeof(void *));
549		    if (transdata != NULL)
550			memset(transdata, 0,
551			       nbstates * nbatoms * sizeof(void *));
552		    else {
553			xmlRegexpErrMemory(ctxt, "compiling regexp");
554			break;
555		    }
556		}
557		targetno = stateRemap[trans->to];
558		/*
559		 * if the same atom can generate transitions to 2 different
560		 * states then it means the automata is not determinist and
561		 * the compact form can't be used !
562		 */
563		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
564		if (prev != 0) {
565		    if (prev != targetno + 1) {
566			ret->determinist = 0;
567#ifdef DEBUG_COMPACTION
568			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
569			       i, j, trans->atom->no, trans->to, atomno, targetno);
570			printf("       previous to is %d\n", prev);
571#endif
572			ret->determinist = 0;
573			if (transdata != NULL)
574			    xmlFree(transdata);
575			xmlFree(transitions);
576			xmlFree(stateRemap);
577			xmlFree(stringRemap);
578			for (i = 0;i < nbatoms;i++)
579			    xmlFree(stringMap[i]);
580			xmlFree(stringMap);
581			goto not_determ;
582		    }
583		} else {
584#if 0
585		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
586			   i, j, trans->atom->no, trans->to, atomno, targetno);
587#endif
588		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
589			targetno + 1; /* to avoid 0 */
590		    if (transdata != NULL)
591			transdata[stateno * nbatoms + atomno] =
592			    trans->atom->data;
593		}
594	    }
595	}
596	ret->determinist = 1;
597#ifdef DEBUG_COMPACTION
598	/*
599	 * Debug
600	 */
601	for (i = 0;i < nbstates;i++) {
602	    for (j = 0;j < nbatoms + 1;j++) {
603                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
604	    }
605	    printf("\n");
606	}
607	printf("\n");
608#endif
609	/*
610	 * Cleanup of the old data
611	 */
612	if (ret->states != NULL) {
613	    for (i = 0;i < ret->nbStates;i++)
614		xmlRegFreeState(ret->states[i]);
615	    xmlFree(ret->states);
616	}
617	ret->states = NULL;
618	ret->nbStates = 0;
619	if (ret->atoms != NULL) {
620	    for (i = 0;i < ret->nbAtoms;i++)
621		xmlRegFreeAtom(ret->atoms[i]);
622	    xmlFree(ret->atoms);
623	}
624	ret->atoms = NULL;
625	ret->nbAtoms = 0;
626
627	ret->compact = transitions;
628	ret->transdata = transdata;
629	ret->stringMap = stringMap;
630	ret->nbstrings = nbatoms;
631	ret->nbstates = nbstates;
632	xmlFree(stateRemap);
633	xmlFree(stringRemap);
634    }
635not_determ:
636    ctxt->string = NULL;
637    ctxt->nbStates = 0;
638    ctxt->states = NULL;
639    ctxt->nbAtoms = 0;
640    ctxt->atoms = NULL;
641    ctxt->nbCounters = 0;
642    ctxt->counters = NULL;
643    return(ret);
644}
645
646/**
647 * xmlRegNewParserCtxt:
648 * @string:  the string to parse
649 *
650 * Allocate a new regexp parser context
651 *
652 * Returns the new context or NULL in case of error
653 */
654static xmlRegParserCtxtPtr
655xmlRegNewParserCtxt(const xmlChar *string) {
656    xmlRegParserCtxtPtr ret;
657
658    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
659    if (ret == NULL)
660	return(NULL);
661    memset(ret, 0, sizeof(xmlRegParserCtxt));
662    if (string != NULL)
663	ret->string = xmlStrdup(string);
664    ret->cur = ret->string;
665    ret->neg = 0;
666    ret->negs = 0;
667    ret->error = 0;
668    ret->determinist = -1;
669    return(ret);
670}
671
672/**
673 * xmlRegNewRange:
674 * @ctxt:  the regexp parser context
675 * @neg:  is that negative
676 * @type:  the type of range
677 * @start:  the start codepoint
678 * @end:  the end codepoint
679 *
680 * Allocate a new regexp range
681 *
682 * Returns the new range or NULL in case of error
683 */
684static xmlRegRangePtr
685xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
686	       int neg, xmlRegAtomType type, int start, int end) {
687    xmlRegRangePtr ret;
688
689    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
690    if (ret == NULL) {
691	xmlRegexpErrMemory(ctxt, "allocating range");
692	return(NULL);
693    }
694    ret->neg = neg;
695    ret->type = type;
696    ret->start = start;
697    ret->end = end;
698    return(ret);
699}
700
701/**
702 * xmlRegFreeRange:
703 * @range:  the regexp range
704 *
705 * Free a regexp range
706 */
707static void
708xmlRegFreeRange(xmlRegRangePtr range) {
709    if (range == NULL)
710	return;
711
712    if (range->blockName != NULL)
713	xmlFree(range->blockName);
714    xmlFree(range);
715}
716
717/**
718 * xmlRegNewAtom:
719 * @ctxt:  the regexp parser context
720 * @type:  the type of atom
721 *
722 * Allocate a new regexp range
723 *
724 * Returns the new atom or NULL in case of error
725 */
726static xmlRegAtomPtr
727xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
728    xmlRegAtomPtr ret;
729
730    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
731    if (ret == NULL) {
732	xmlRegexpErrMemory(ctxt, "allocating atom");
733	return(NULL);
734    }
735    memset(ret, 0, sizeof(xmlRegAtom));
736    ret->type = type;
737    ret->quant = XML_REGEXP_QUANT_ONCE;
738    ret->min = 0;
739    ret->max = 0;
740    return(ret);
741}
742
743/**
744 * xmlRegFreeAtom:
745 * @atom:  the regexp atom
746 *
747 * Free a regexp atom
748 */
749static void
750xmlRegFreeAtom(xmlRegAtomPtr atom) {
751    int i;
752
753    if (atom == NULL)
754	return;
755
756    for (i = 0;i < atom->nbRanges;i++)
757	xmlRegFreeRange(atom->ranges[i]);
758    if (atom->ranges != NULL)
759	xmlFree(atom->ranges);
760    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
761	xmlFree(atom->valuep);
762    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
763	xmlFree(atom->valuep2);
764    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
765	xmlFree(atom->valuep);
766    xmlFree(atom);
767}
768
769static xmlRegStatePtr
770xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
771    xmlRegStatePtr ret;
772
773    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
774    if (ret == NULL) {
775	xmlRegexpErrMemory(ctxt, "allocating state");
776	return(NULL);
777    }
778    memset(ret, 0, sizeof(xmlRegState));
779    ret->type = XML_REGEXP_TRANS_STATE;
780    ret->mark = XML_REGEXP_MARK_NORMAL;
781    return(ret);
782}
783
784/**
785 * xmlRegFreeState:
786 * @state:  the regexp state
787 *
788 * Free a regexp state
789 */
790static void
791xmlRegFreeState(xmlRegStatePtr state) {
792    if (state == NULL)
793	return;
794
795    if (state->trans != NULL)
796	xmlFree(state->trans);
797    if (state->transTo != NULL)
798	xmlFree(state->transTo);
799    xmlFree(state);
800}
801
802/**
803 * xmlRegFreeParserCtxt:
804 * @ctxt:  the regexp parser context
805 *
806 * Free a regexp parser context
807 */
808static void
809xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
810    int i;
811    if (ctxt == NULL)
812	return;
813
814    if (ctxt->string != NULL)
815	xmlFree(ctxt->string);
816    if (ctxt->states != NULL) {
817	for (i = 0;i < ctxt->nbStates;i++)
818	    xmlRegFreeState(ctxt->states[i]);
819	xmlFree(ctxt->states);
820    }
821    if (ctxt->atoms != NULL) {
822	for (i = 0;i < ctxt->nbAtoms;i++)
823	    xmlRegFreeAtom(ctxt->atoms[i]);
824	xmlFree(ctxt->atoms);
825    }
826    if (ctxt->counters != NULL)
827	xmlFree(ctxt->counters);
828    xmlFree(ctxt);
829}
830
831/************************************************************************
832 * 									*
833 * 			Display of Data structures			*
834 * 									*
835 ************************************************************************/
836
837static void
838xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
839    switch (type) {
840        case XML_REGEXP_EPSILON:
841	    fprintf(output, "epsilon "); break;
842        case XML_REGEXP_CHARVAL:
843	    fprintf(output, "charval "); break;
844        case XML_REGEXP_RANGES:
845	    fprintf(output, "ranges "); break;
846        case XML_REGEXP_SUBREG:
847	    fprintf(output, "subexpr "); break;
848        case XML_REGEXP_STRING:
849	    fprintf(output, "string "); break;
850        case XML_REGEXP_ANYCHAR:
851	    fprintf(output, "anychar "); break;
852        case XML_REGEXP_ANYSPACE:
853	    fprintf(output, "anyspace "); break;
854        case XML_REGEXP_NOTSPACE:
855	    fprintf(output, "notspace "); break;
856        case XML_REGEXP_INITNAME:
857	    fprintf(output, "initname "); break;
858        case XML_REGEXP_NOTINITNAME:
859	    fprintf(output, "notinitname "); break;
860        case XML_REGEXP_NAMECHAR:
861	    fprintf(output, "namechar "); break;
862        case XML_REGEXP_NOTNAMECHAR:
863	    fprintf(output, "notnamechar "); break;
864        case XML_REGEXP_DECIMAL:
865	    fprintf(output, "decimal "); break;
866        case XML_REGEXP_NOTDECIMAL:
867	    fprintf(output, "notdecimal "); break;
868        case XML_REGEXP_REALCHAR:
869	    fprintf(output, "realchar "); break;
870        case XML_REGEXP_NOTREALCHAR:
871	    fprintf(output, "notrealchar "); break;
872        case XML_REGEXP_LETTER:
873            fprintf(output, "LETTER "); break;
874        case XML_REGEXP_LETTER_UPPERCASE:
875            fprintf(output, "LETTER_UPPERCASE "); break;
876        case XML_REGEXP_LETTER_LOWERCASE:
877            fprintf(output, "LETTER_LOWERCASE "); break;
878        case XML_REGEXP_LETTER_TITLECASE:
879            fprintf(output, "LETTER_TITLECASE "); break;
880        case XML_REGEXP_LETTER_MODIFIER:
881            fprintf(output, "LETTER_MODIFIER "); break;
882        case XML_REGEXP_LETTER_OTHERS:
883            fprintf(output, "LETTER_OTHERS "); break;
884        case XML_REGEXP_MARK:
885            fprintf(output, "MARK "); break;
886        case XML_REGEXP_MARK_NONSPACING:
887            fprintf(output, "MARK_NONSPACING "); break;
888        case XML_REGEXP_MARK_SPACECOMBINING:
889            fprintf(output, "MARK_SPACECOMBINING "); break;
890        case XML_REGEXP_MARK_ENCLOSING:
891            fprintf(output, "MARK_ENCLOSING "); break;
892        case XML_REGEXP_NUMBER:
893            fprintf(output, "NUMBER "); break;
894        case XML_REGEXP_NUMBER_DECIMAL:
895            fprintf(output, "NUMBER_DECIMAL "); break;
896        case XML_REGEXP_NUMBER_LETTER:
897            fprintf(output, "NUMBER_LETTER "); break;
898        case XML_REGEXP_NUMBER_OTHERS:
899            fprintf(output, "NUMBER_OTHERS "); break;
900        case XML_REGEXP_PUNCT:
901            fprintf(output, "PUNCT "); break;
902        case XML_REGEXP_PUNCT_CONNECTOR:
903            fprintf(output, "PUNCT_CONNECTOR "); break;
904        case XML_REGEXP_PUNCT_DASH:
905            fprintf(output, "PUNCT_DASH "); break;
906        case XML_REGEXP_PUNCT_OPEN:
907            fprintf(output, "PUNCT_OPEN "); break;
908        case XML_REGEXP_PUNCT_CLOSE:
909            fprintf(output, "PUNCT_CLOSE "); break;
910        case XML_REGEXP_PUNCT_INITQUOTE:
911            fprintf(output, "PUNCT_INITQUOTE "); break;
912        case XML_REGEXP_PUNCT_FINQUOTE:
913            fprintf(output, "PUNCT_FINQUOTE "); break;
914        case XML_REGEXP_PUNCT_OTHERS:
915            fprintf(output, "PUNCT_OTHERS "); break;
916        case XML_REGEXP_SEPAR:
917            fprintf(output, "SEPAR "); break;
918        case XML_REGEXP_SEPAR_SPACE:
919            fprintf(output, "SEPAR_SPACE "); break;
920        case XML_REGEXP_SEPAR_LINE:
921            fprintf(output, "SEPAR_LINE "); break;
922        case XML_REGEXP_SEPAR_PARA:
923            fprintf(output, "SEPAR_PARA "); break;
924        case XML_REGEXP_SYMBOL:
925            fprintf(output, "SYMBOL "); break;
926        case XML_REGEXP_SYMBOL_MATH:
927            fprintf(output, "SYMBOL_MATH "); break;
928        case XML_REGEXP_SYMBOL_CURRENCY:
929            fprintf(output, "SYMBOL_CURRENCY "); break;
930        case XML_REGEXP_SYMBOL_MODIFIER:
931            fprintf(output, "SYMBOL_MODIFIER "); break;
932        case XML_REGEXP_SYMBOL_OTHERS:
933            fprintf(output, "SYMBOL_OTHERS "); break;
934        case XML_REGEXP_OTHER:
935            fprintf(output, "OTHER "); break;
936        case XML_REGEXP_OTHER_CONTROL:
937            fprintf(output, "OTHER_CONTROL "); break;
938        case XML_REGEXP_OTHER_FORMAT:
939            fprintf(output, "OTHER_FORMAT "); break;
940        case XML_REGEXP_OTHER_PRIVATE:
941            fprintf(output, "OTHER_PRIVATE "); break;
942        case XML_REGEXP_OTHER_NA:
943            fprintf(output, "OTHER_NA "); break;
944        case XML_REGEXP_BLOCK_NAME:
945	    fprintf(output, "BLOCK "); break;
946    }
947}
948
949static void
950xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
951    switch (type) {
952        case XML_REGEXP_QUANT_EPSILON:
953	    fprintf(output, "epsilon "); break;
954        case XML_REGEXP_QUANT_ONCE:
955	    fprintf(output, "once "); break;
956        case XML_REGEXP_QUANT_OPT:
957	    fprintf(output, "? "); break;
958        case XML_REGEXP_QUANT_MULT:
959	    fprintf(output, "* "); break;
960        case XML_REGEXP_QUANT_PLUS:
961	    fprintf(output, "+ "); break;
962	case XML_REGEXP_QUANT_RANGE:
963	    fprintf(output, "range "); break;
964	case XML_REGEXP_QUANT_ONCEONLY:
965	    fprintf(output, "onceonly "); break;
966	case XML_REGEXP_QUANT_ALL:
967	    fprintf(output, "all "); break;
968    }
969}
970static void
971xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
972    fprintf(output, "  range: ");
973    if (range->neg)
974	fprintf(output, "negative ");
975    xmlRegPrintAtomType(output, range->type);
976    fprintf(output, "%c - %c\n", range->start, range->end);
977}
978
979static void
980xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
981    fprintf(output, " atom: ");
982    if (atom == NULL) {
983	fprintf(output, "NULL\n");
984	return;
985    }
986    if (atom->neg)
987        fprintf(output, "not ");
988    xmlRegPrintAtomType(output, atom->type);
989    xmlRegPrintQuantType(output, atom->quant);
990    if (atom->quant == XML_REGEXP_QUANT_RANGE)
991	fprintf(output, "%d-%d ", atom->min, atom->max);
992    if (atom->type == XML_REGEXP_STRING)
993	fprintf(output, "'%s' ", (char *) atom->valuep);
994    if (atom->type == XML_REGEXP_CHARVAL)
995	fprintf(output, "char %c\n", atom->codepoint);
996    else if (atom->type == XML_REGEXP_RANGES) {
997	int i;
998	fprintf(output, "%d entries\n", atom->nbRanges);
999	for (i = 0; i < atom->nbRanges;i++)
1000	    xmlRegPrintRange(output, atom->ranges[i]);
1001    } else if (atom->type == XML_REGEXP_SUBREG) {
1002	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1003    } else {
1004	fprintf(output, "\n");
1005    }
1006}
1007
1008static void
1009xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1010    fprintf(output, "  trans: ");
1011    if (trans == NULL) {
1012	fprintf(output, "NULL\n");
1013	return;
1014    }
1015    if (trans->to < 0) {
1016	fprintf(output, "removed\n");
1017	return;
1018    }
1019    if (trans->counter >= 0) {
1020	fprintf(output, "counted %d, ", trans->counter);
1021    }
1022    if (trans->count == REGEXP_ALL_COUNTER) {
1023	fprintf(output, "all transition, ");
1024    } else if (trans->count >= 0) {
1025	fprintf(output, "count based %d, ", trans->count);
1026    }
1027    if (trans->atom == NULL) {
1028	fprintf(output, "epsilon to %d\n", trans->to);
1029	return;
1030    }
1031    if (trans->atom->type == XML_REGEXP_CHARVAL)
1032	fprintf(output, "char %c ", trans->atom->codepoint);
1033    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1034}
1035
1036static void
1037xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1038    int i;
1039
1040    fprintf(output, " state: ");
1041    if (state == NULL) {
1042	fprintf(output, "NULL\n");
1043	return;
1044    }
1045    if (state->type == XML_REGEXP_START_STATE)
1046	fprintf(output, "START ");
1047    if (state->type == XML_REGEXP_FINAL_STATE)
1048	fprintf(output, "FINAL ");
1049
1050    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1051    for (i = 0;i < state->nbTrans; i++) {
1052	xmlRegPrintTrans(output, &(state->trans[i]));
1053    }
1054}
1055
1056#ifdef DEBUG_REGEXP_GRAPH
1057static void
1058xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1059    int i;
1060
1061    fprintf(output, " ctxt: ");
1062    if (ctxt == NULL) {
1063	fprintf(output, "NULL\n");
1064	return;
1065    }
1066    fprintf(output, "'%s' ", ctxt->string);
1067    if (ctxt->error)
1068	fprintf(output, "error ");
1069    if (ctxt->neg)
1070	fprintf(output, "neg ");
1071    fprintf(output, "\n");
1072    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1073    for (i = 0;i < ctxt->nbAtoms; i++) {
1074	fprintf(output, " %02d ", i);
1075	xmlRegPrintAtom(output, ctxt->atoms[i]);
1076    }
1077    if (ctxt->atom != NULL) {
1078	fprintf(output, "current atom:\n");
1079	xmlRegPrintAtom(output, ctxt->atom);
1080    }
1081    fprintf(output, "%d states:", ctxt->nbStates);
1082    if (ctxt->start != NULL)
1083	fprintf(output, " start: %d", ctxt->start->no);
1084    if (ctxt->end != NULL)
1085	fprintf(output, " end: %d", ctxt->end->no);
1086    fprintf(output, "\n");
1087    for (i = 0;i < ctxt->nbStates; i++) {
1088	xmlRegPrintState(output, ctxt->states[i]);
1089    }
1090    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1091    for (i = 0;i < ctxt->nbCounters; i++) {
1092	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1093		                                ctxt->counters[i].max);
1094    }
1095}
1096#endif
1097
1098/************************************************************************
1099 * 									*
1100 *		 Finite Automata structures manipulations		*
1101 * 									*
1102 ************************************************************************/
1103
1104static void
1105xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1106	           int neg, xmlRegAtomType type, int start, int end,
1107		   xmlChar *blockName) {
1108    xmlRegRangePtr range;
1109
1110    if (atom == NULL) {
1111	ERROR("add range: atom is NULL");
1112	return;
1113    }
1114    if (atom->type != XML_REGEXP_RANGES) {
1115	ERROR("add range: atom is not ranges");
1116	return;
1117    }
1118    if (atom->maxRanges == 0) {
1119	atom->maxRanges = 4;
1120	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1121		                             sizeof(xmlRegRangePtr));
1122	if (atom->ranges == NULL) {
1123	    xmlRegexpErrMemory(ctxt, "adding ranges");
1124	    atom->maxRanges = 0;
1125	    return;
1126	}
1127    } else if (atom->nbRanges >= atom->maxRanges) {
1128	xmlRegRangePtr *tmp;
1129	atom->maxRanges *= 2;
1130	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1131		                             sizeof(xmlRegRangePtr));
1132	if (tmp == NULL) {
1133	    xmlRegexpErrMemory(ctxt, "adding ranges");
1134	    atom->maxRanges /= 2;
1135	    return;
1136	}
1137	atom->ranges = tmp;
1138    }
1139    range = xmlRegNewRange(ctxt, neg, type, start, end);
1140    if (range == NULL)
1141	return;
1142    range->blockName = blockName;
1143    atom->ranges[atom->nbRanges++] = range;
1144
1145}
1146
1147static int
1148xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1149    if (ctxt->maxCounters == 0) {
1150	ctxt->maxCounters = 4;
1151	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1152		                             sizeof(xmlRegCounter));
1153	if (ctxt->counters == NULL) {
1154	    xmlRegexpErrMemory(ctxt, "allocating counter");
1155	    ctxt->maxCounters = 0;
1156	    return(-1);
1157	}
1158    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1159	xmlRegCounter *tmp;
1160	ctxt->maxCounters *= 2;
1161	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1162		                           sizeof(xmlRegCounter));
1163	if (tmp == NULL) {
1164	    xmlRegexpErrMemory(ctxt, "allocating counter");
1165	    ctxt->maxCounters /= 2;
1166	    return(-1);
1167	}
1168	ctxt->counters = tmp;
1169    }
1170    ctxt->counters[ctxt->nbCounters].min = -1;
1171    ctxt->counters[ctxt->nbCounters].max = -1;
1172    return(ctxt->nbCounters++);
1173}
1174
1175static int
1176xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1177    if (atom == NULL) {
1178	ERROR("atom push: atom is NULL");
1179	return(-1);
1180    }
1181    if (ctxt->maxAtoms == 0) {
1182	ctxt->maxAtoms = 4;
1183	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1184		                             sizeof(xmlRegAtomPtr));
1185	if (ctxt->atoms == NULL) {
1186	    xmlRegexpErrMemory(ctxt, "pushing atom");
1187	    ctxt->maxAtoms = 0;
1188	    return(-1);
1189	}
1190    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1191	xmlRegAtomPtr *tmp;
1192	ctxt->maxAtoms *= 2;
1193	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1194		                             sizeof(xmlRegAtomPtr));
1195	if (tmp == NULL) {
1196	    xmlRegexpErrMemory(ctxt, "allocating counter");
1197	    ctxt->maxAtoms /= 2;
1198	    return(-1);
1199	}
1200	ctxt->atoms = tmp;
1201    }
1202    atom->no = ctxt->nbAtoms;
1203    ctxt->atoms[ctxt->nbAtoms++] = atom;
1204    return(0);
1205}
1206
1207static void
1208xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1209                      int from) {
1210    if (target->maxTransTo == 0) {
1211	target->maxTransTo = 8;
1212	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1213		                             sizeof(int));
1214	if (target->transTo == NULL) {
1215	    xmlRegexpErrMemory(ctxt, "adding transition");
1216	    target->maxTransTo = 0;
1217	    return;
1218	}
1219    } else if (target->nbTransTo >= target->maxTransTo) {
1220	int *tmp;
1221	target->maxTransTo *= 2;
1222	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1223		                             sizeof(int));
1224	if (tmp == NULL) {
1225	    xmlRegexpErrMemory(ctxt, "adding transition");
1226	    target->maxTransTo /= 2;
1227	    return;
1228	}
1229	target->transTo = tmp;
1230    }
1231    target->transTo[target->nbTransTo] = from;
1232    target->nbTransTo++;
1233}
1234
1235static void
1236xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1237	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1238		    int counter, int count, int nchk) {
1239
1240    int nrtrans;
1241
1242    if (state == NULL) {
1243	ERROR("add state: state is NULL");
1244	return;
1245    }
1246    if (target == NULL) {
1247	ERROR("add state: target is NULL");
1248	return;
1249    }
1250    /*
1251     * Other routines follow the philosophy 'When in doubt, add a transition'
1252     * so we check here whether such a transition is already present and, if
1253     * so, silently ignore this request.
1254     */
1255
1256    if (nchk == 0) {
1257	for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1258	    xmlRegTransPtr trans = &(state->trans[nrtrans]);
1259	    if ((trans->atom == atom) &&
1260		(trans->to == target->no) &&
1261		(trans->counter == counter) &&
1262		(trans->count == count)) {
1263#ifdef DEBUG_REGEXP_GRAPH
1264		printf("Ignoring duplicate transition from %d to %d\n",
1265			state->no, target->no);
1266#endif
1267		return;
1268	    }
1269	}
1270    }
1271
1272    if (state->maxTrans == 0) {
1273	state->maxTrans = 8;
1274	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1275		                             sizeof(xmlRegTrans));
1276	if (state->trans == NULL) {
1277	    xmlRegexpErrMemory(ctxt, "adding transition");
1278	    state->maxTrans = 0;
1279	    return;
1280	}
1281    } else if (state->nbTrans >= state->maxTrans) {
1282	xmlRegTrans *tmp;
1283	state->maxTrans *= 2;
1284	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1285		                             sizeof(xmlRegTrans));
1286	if (tmp == NULL) {
1287	    xmlRegexpErrMemory(ctxt, "adding transition");
1288	    state->maxTrans /= 2;
1289	    return;
1290	}
1291	state->trans = tmp;
1292    }
1293#ifdef DEBUG_REGEXP_GRAPH
1294    printf("Add trans from %d to %d ", state->no, target->no);
1295    if (count == REGEXP_ALL_COUNTER)
1296	printf("all transition\n");
1297    else if (count >= 0)
1298	printf("count based %d\n", count);
1299    else if (counter >= 0)
1300	printf("counted %d\n", counter);
1301    else if (atom == NULL)
1302	printf("epsilon transition\n");
1303    else if (atom != NULL)
1304        xmlRegPrintAtom(stdout, atom);
1305#endif
1306
1307    state->trans[state->nbTrans].atom = atom;
1308    state->trans[state->nbTrans].to = target->no;
1309    state->trans[state->nbTrans].counter = counter;
1310    state->trans[state->nbTrans].count = count;
1311    state->nbTrans++;
1312    xmlRegStateAddTransTo(ctxt, target, state->no);
1313}
1314
1315static int
1316xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1317    if (state == NULL) return(-1);
1318    if (ctxt->maxStates == 0) {
1319	ctxt->maxStates = 4;
1320	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1321		                             sizeof(xmlRegStatePtr));
1322	if (ctxt->states == NULL) {
1323	    xmlRegexpErrMemory(ctxt, "adding state");
1324	    ctxt->maxStates = 0;
1325	    return(-1);
1326	}
1327    } else if (ctxt->nbStates >= ctxt->maxStates) {
1328	xmlRegStatePtr *tmp;
1329	ctxt->maxStates *= 2;
1330	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1331		                             sizeof(xmlRegStatePtr));
1332	if (tmp == NULL) {
1333	    xmlRegexpErrMemory(ctxt, "adding state");
1334	    ctxt->maxStates /= 2;
1335	    return(-1);
1336	}
1337	ctxt->states = tmp;
1338    }
1339    state->no = ctxt->nbStates;
1340    ctxt->states[ctxt->nbStates++] = state;
1341    return(0);
1342}
1343
1344/**
1345 * xmlFAGenerateAllTransition:
1346 * @ctxt:  a regexp parser context
1347 * @from:  the from state
1348 * @to:  the target state or NULL for building a new one
1349 * @lax:
1350 *
1351 */
1352static void
1353xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1354			   xmlRegStatePtr from, xmlRegStatePtr to,
1355			   int lax) {
1356    if (to == NULL) {
1357	to = xmlRegNewState(ctxt);
1358	xmlRegStatePush(ctxt, to);
1359	ctxt->state = to;
1360    }
1361    if (lax)
1362	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER, 0);
1363    else
1364	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER, 0);
1365}
1366
1367/**
1368 * xmlFAGenerateEpsilonTransition:
1369 * @ctxt:  a regexp parser context
1370 * @from:  the from state
1371 * @to:  the target state or NULL for building a new one
1372 *
1373 */
1374static void
1375xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1376			       xmlRegStatePtr from, xmlRegStatePtr to) {
1377    if (to == NULL) {
1378	to = xmlRegNewState(ctxt);
1379	xmlRegStatePush(ctxt, to);
1380	ctxt->state = to;
1381    }
1382    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1, 0);
1383}
1384
1385/**
1386 * xmlFAGenerateCountedEpsilonTransition:
1387 * @ctxt:  a regexp parser context
1388 * @from:  the from state
1389 * @to:  the target state or NULL for building a new one
1390 * counter:  the counter for that transition
1391 *
1392 */
1393static void
1394xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1395	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1396    if (to == NULL) {
1397	to = xmlRegNewState(ctxt);
1398	xmlRegStatePush(ctxt, to);
1399	ctxt->state = to;
1400    }
1401    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1, 0);
1402}
1403
1404/**
1405 * xmlFAGenerateCountedTransition:
1406 * @ctxt:  a regexp parser context
1407 * @from:  the from state
1408 * @to:  the target state or NULL for building a new one
1409 * counter:  the counter for that transition
1410 *
1411 */
1412static void
1413xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1414	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1415    if (to == NULL) {
1416	to = xmlRegNewState(ctxt);
1417	xmlRegStatePush(ctxt, to);
1418	ctxt->state = to;
1419    }
1420    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter, 0);
1421}
1422
1423/**
1424 * xmlFAGenerateTransitions:
1425 * @ctxt:  a regexp parser context
1426 * @from:  the from state
1427 * @to:  the target state or NULL for building a new one
1428 * @atom:  the atom generating the transition
1429 *
1430 * Returns 0 if success and -1 in case of error.
1431 */
1432static int
1433xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1434	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1435    if (atom == NULL) {
1436	ERROR("genrate transition: atom == NULL");
1437	return(-1);
1438    }
1439    if (atom->type == XML_REGEXP_SUBREG) {
1440	/*
1441	 * this is a subexpression handling one should not need to
1442	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1443	 */
1444	if (xmlRegAtomPush(ctxt, atom) < 0) {
1445	    return(-1);
1446	}
1447	if ((to != NULL) && (atom->stop != to) &&
1448	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1449	    /*
1450	     * Generate an epsilon transition to link to the target
1451	     */
1452	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1453	}
1454	switch (atom->quant) {
1455	    case XML_REGEXP_QUANT_OPT:
1456		atom->quant = XML_REGEXP_QUANT_ONCE;
1457		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1458		break;
1459	    case XML_REGEXP_QUANT_MULT:
1460		atom->quant = XML_REGEXP_QUANT_ONCE;
1461		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1462		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1463		break;
1464	    case XML_REGEXP_QUANT_PLUS:
1465		atom->quant = XML_REGEXP_QUANT_ONCE;
1466		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1467		break;
1468	    case XML_REGEXP_QUANT_RANGE: {
1469		int counter;
1470		xmlRegStatePtr newstate;
1471
1472		/*
1473		 * This one is nasty:
1474		 *   1/ if range has minOccurs == 0, create a new state
1475		 *	and create epsilon transitions from atom->start
1476		 *	to atom->stop, as well as atom->start to the new
1477		 *	state
1478		 *   2/ register a new counter
1479		 *   3/ register an epsilon transition associated to
1480		 *      this counter going from atom->stop to atom->start
1481		 *   4/ create a new state
1482		 *   5/ generate a counted transition from atom->stop to
1483		 *      that state
1484		 */
1485		if (atom->min == 0) {
1486		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1487			atom->stop);
1488		    newstate = xmlRegNewState(ctxt);
1489		    xmlRegStatePush(ctxt, newstate);
1490		    ctxt->state = newstate;
1491		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1492			newstate);
1493		}
1494		counter = xmlRegGetCounter(ctxt);
1495		ctxt->counters[counter].min = atom->min - 1;
1496		ctxt->counters[counter].max = atom->max - 1;
1497		atom->min = 0;
1498		atom->max = 0;
1499		atom->quant = XML_REGEXP_QUANT_ONCE;
1500		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1501			                              atom->start, counter);
1502		if (to != NULL) {
1503		    newstate = to;
1504		} else {
1505		    newstate = xmlRegNewState(ctxt);
1506		    xmlRegStatePush(ctxt, newstate);
1507		    ctxt->state = newstate;
1508		}
1509		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1510			                       newstate, counter);
1511	    }
1512	    default:
1513		break;
1514	}
1515	return(0);
1516    } else if ((atom->min == 0) && (atom->max == 0) &&
1517               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1518        /*
1519	 * we can discard the atom and generate an epsilon transition instead
1520	 */
1521	if (to == NULL) {
1522	    to = xmlRegNewState(ctxt);
1523	    if (to != NULL)
1524		xmlRegStatePush(ctxt, to);
1525	    else {
1526		return(-1);
1527	    }
1528	}
1529	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1530	ctxt->state = to;
1531	xmlRegFreeAtom(atom);
1532	return(0);
1533    } else {
1534	if (to == NULL) {
1535	    to = xmlRegNewState(ctxt);
1536	    if (to != NULL)
1537		xmlRegStatePush(ctxt, to);
1538	    else {
1539		return(-1);
1540	    }
1541	}
1542	if (xmlRegAtomPush(ctxt, atom) < 0) {
1543	    return(-1);
1544	}
1545	xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1, 0);
1546	ctxt->state = to;
1547    }
1548    switch (atom->quant) {
1549	case XML_REGEXP_QUANT_OPT:
1550	    atom->quant = XML_REGEXP_QUANT_ONCE;
1551	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1552	    break;
1553	case XML_REGEXP_QUANT_MULT:
1554	    atom->quant = XML_REGEXP_QUANT_ONCE;
1555	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1556	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
1557	    break;
1558	case XML_REGEXP_QUANT_PLUS:
1559	    atom->quant = XML_REGEXP_QUANT_ONCE;
1560	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
1561	    break;
1562	default:
1563	    break;
1564    }
1565    return(0);
1566}
1567
1568/**
1569 * xmlFAReduceEpsilonTransitions:
1570 * @ctxt:  a regexp parser context
1571 * @fromnr:  the from state
1572 * @tonr:  the to state
1573 * @counter:  should that transition be associated to a counted
1574 *
1575 */
1576static void
1577xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1578	                      int tonr, int counter) {
1579    int transnr;
1580    xmlRegStatePtr from;
1581    xmlRegStatePtr to;
1582
1583#ifdef DEBUG_REGEXP_GRAPH
1584    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1585#endif
1586    from = ctxt->states[fromnr];
1587    if (from == NULL)
1588	return;
1589    to = ctxt->states[tonr];
1590    if (to == NULL)
1591	return;
1592    if ((to->mark == XML_REGEXP_MARK_START) ||
1593	(to->mark == XML_REGEXP_MARK_VISITED))
1594	return;
1595
1596    to->mark = XML_REGEXP_MARK_VISITED;
1597    if (to->type == XML_REGEXP_FINAL_STATE) {
1598#ifdef DEBUG_REGEXP_GRAPH
1599	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1600#endif
1601	from->type = XML_REGEXP_FINAL_STATE;
1602    }
1603    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1604        if (to->trans[transnr].to < 0)
1605	    continue;
1606	if (to->trans[transnr].atom == NULL) {
1607	    /*
1608	     * Don't remove counted transitions
1609	     * Don't loop either
1610	     */
1611	    if (to->trans[transnr].to != fromnr) {
1612		if (to->trans[transnr].count >= 0) {
1613		    int newto = to->trans[transnr].to;
1614
1615		    xmlRegStateAddTrans(ctxt, from, NULL,
1616					ctxt->states[newto],
1617					-1, to->trans[transnr].count, 0);
1618		} else {
1619#ifdef DEBUG_REGEXP_GRAPH
1620		    printf("Found epsilon trans %d from %d to %d\n",
1621			   transnr, tonr, to->trans[transnr].to);
1622#endif
1623		    if (to->trans[transnr].counter >= 0) {
1624			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1625					      to->trans[transnr].to,
1626					      to->trans[transnr].counter);
1627		    } else {
1628			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1629					      to->trans[transnr].to,
1630					      counter);
1631		    }
1632		}
1633	    }
1634	} else {
1635	    int newto = to->trans[transnr].to;
1636
1637	    if (to->trans[transnr].counter >= 0) {
1638		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1639				    ctxt->states[newto],
1640				    to->trans[transnr].counter, -1, 1);
1641	    } else {
1642		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1643				    ctxt->states[newto], counter, -1, 1);
1644	    }
1645	}
1646    }
1647    to->mark = XML_REGEXP_MARK_NORMAL;
1648}
1649
1650/**
1651 * xmlFAEliminateSimpleEpsilonTransitions:
1652 * @ctxt:  a regexp parser context
1653 *
1654 * Eliminating general epsilon transitions can get costly in the general
1655 * algorithm due to the large amount of generated new transitions and
1656 * associated comparisons. However for simple epsilon transition used just
1657 * to separate building blocks when generating the automata this can be
1658 * reduced to state elimination:
1659 *    - if there exists an epsilon from X to Y
1660 *    - if there is no other transition from X
1661 * then X and Y are semantically equivalent and X can be eliminated
1662 * If X is the start state then make Y the start state, else replace the
1663 * target of all transitions to X by transitions to Y.
1664 */
1665static void
1666xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1667    int statenr, i, j, newto;
1668    xmlRegStatePtr state, tmp;
1669
1670    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1671	state = ctxt->states[statenr];
1672	if (state == NULL)
1673	    continue;
1674	if (state->nbTrans != 1)
1675	    continue;
1676	/* is the only transition out a basic transition */
1677	if ((state->trans[0].atom == NULL) &&
1678	    (state->trans[0].to >= 0) &&
1679	    (state->trans[0].to != statenr) &&
1680	    (state->trans[0].counter < 0) &&
1681	    (state->trans[0].count < 0)) {
1682	    newto = state->trans[0].to;
1683
1684            if (state->type == XML_REGEXP_START_STATE) {
1685#ifdef DEBUG_REGEXP_GRAPH
1686		printf("Found simple epsilon trans from start %d to %d\n",
1687		       statenr, newto);
1688#endif
1689            } else {
1690#ifdef DEBUG_REGEXP_GRAPH
1691		printf("Found simple epsilon trans from %d to %d\n",
1692		       statenr, newto);
1693#endif
1694	        for (i = 0;i < state->nbTransTo;i++) {
1695		    tmp = ctxt->states[state->transTo[i]];
1696		    for (j = 0;j < tmp->nbTrans;j++) {
1697			if (tmp->trans[j].to == statenr) {
1698			    tmp->trans[j].to = newto;
1699#ifdef DEBUG_REGEXP_GRAPH
1700			    printf("Changed transition %d on %d to go to %d\n",
1701				   j, tmp->no, newto);
1702#endif
1703                            xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
1704			                          tmp->no);
1705			}
1706		    }
1707		}
1708#if 0
1709	        for (i = 0;i < ctxt->nbStates;i++) {
1710		    tmp = ctxt->states[i];
1711		    for (j = 0;j < tmp->nbTrans;j++) {
1712			if (tmp->trans[j].to == statenr) {
1713			    tmp->trans[j].to = newto;
1714#ifdef DEBUG_REGEXP_GRAPH
1715			    printf("Changed transition %d on %d to go to %d\n",
1716				   j, tmp->no, newto);
1717#endif
1718			}
1719		    }
1720		}
1721#endif
1722		if (state->type == XML_REGEXP_FINAL_STATE)
1723		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1724		/* eliminate the transition completely */
1725		state->nbTrans = 0;
1726
1727
1728	    }
1729
1730	}
1731    }
1732}
1733/**
1734 * xmlFAEliminateEpsilonTransitions:
1735 * @ctxt:  a regexp parser context
1736 *
1737 */
1738static void
1739xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1740    int statenr, transnr;
1741    xmlRegStatePtr state;
1742    int has_epsilon;
1743
1744    if (ctxt->states == NULL) return;
1745
1746    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1747
1748    has_epsilon = 0;
1749
1750    /*
1751     * build the completed transitions bypassing the epsilons
1752     * Use a marking algorithm to avoid loops
1753     * mark sink states too.
1754     */
1755    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1756	state = ctxt->states[statenr];
1757	if (state == NULL)
1758	    continue;
1759	if ((state->nbTrans == 0) &&
1760	    (state->type != XML_REGEXP_FINAL_STATE)) {
1761	    state->type = XML_REGEXP_SINK_STATE;
1762	}
1763	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1764	    if ((state->trans[transnr].atom == NULL) &&
1765		(state->trans[transnr].to >= 0)) {
1766		if (state->trans[transnr].to == statenr) {
1767		    state->trans[transnr].to = -1;
1768#ifdef DEBUG_REGEXP_GRAPH
1769		    printf("Removed loopback epsilon trans %d on %d\n",
1770			   transnr, statenr);
1771#endif
1772		} else if (state->trans[transnr].count < 0) {
1773		    int newto = state->trans[transnr].to;
1774
1775#ifdef DEBUG_REGEXP_GRAPH
1776		    printf("Found epsilon trans %d from %d to %d\n",
1777			   transnr, statenr, newto);
1778#endif
1779		    state->mark = XML_REGEXP_MARK_START;
1780		    has_epsilon = 1;
1781		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1782				      newto, state->trans[transnr].counter);
1783		    state->mark = XML_REGEXP_MARK_NORMAL;
1784#ifdef DEBUG_REGEXP_GRAPH
1785		} else {
1786		    printf("Found counted transition %d on %d\n",
1787			   transnr, statenr);
1788#endif
1789	        }
1790	    }
1791	}
1792    }
1793    /*
1794     * Eliminate the epsilon transitions
1795     */
1796    if (has_epsilon) {
1797	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1798	    state = ctxt->states[statenr];
1799	    if (state == NULL)
1800		continue;
1801	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1802		xmlRegTransPtr trans = &(state->trans[transnr]);
1803		if ((trans->atom == NULL) &&
1804		    (trans->count < 0) &&
1805		    (trans->to >= 0)) {
1806		    trans->to = -1;
1807		}
1808	    }
1809	}
1810    }
1811
1812    /*
1813     * Use this pass to detect unreachable states too
1814     */
1815    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1816	state = ctxt->states[statenr];
1817	if (state != NULL)
1818	    state->reached = XML_REGEXP_MARK_NORMAL;
1819    }
1820    state = ctxt->states[0];
1821    if (state != NULL)
1822	state->reached = XML_REGEXP_MARK_START;
1823    while (state != NULL) {
1824	xmlRegStatePtr target = NULL;
1825	state->reached = XML_REGEXP_MARK_VISITED;
1826	/*
1827	 * Mark all states reachable from the current reachable state
1828	 */
1829	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1830	    if ((state->trans[transnr].to >= 0) &&
1831		((state->trans[transnr].atom != NULL) ||
1832		 (state->trans[transnr].count >= 0))) {
1833		int newto = state->trans[transnr].to;
1834
1835		if (ctxt->states[newto] == NULL)
1836		    continue;
1837		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
1838		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
1839		    target = ctxt->states[newto];
1840		}
1841	    }
1842	}
1843
1844	/*
1845	 * find the next accessible state not explored
1846	 */
1847	if (target == NULL) {
1848	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1849		state = ctxt->states[statenr];
1850		if ((state != NULL) && (state->reached ==
1851			XML_REGEXP_MARK_START)) {
1852		    target = state;
1853		    break;
1854		}
1855	    }
1856	}
1857	state = target;
1858    }
1859    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1860	state = ctxt->states[statenr];
1861	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
1862#ifdef DEBUG_REGEXP_GRAPH
1863	    printf("Removed unreachable state %d\n", statenr);
1864#endif
1865	    xmlRegFreeState(state);
1866	    ctxt->states[statenr] = NULL;
1867	}
1868    }
1869
1870}
1871
1872/**
1873 * xmlFACompareAtoms:
1874 * @atom1:  an atom
1875 * @atom2:  an atom
1876 *
1877 * Compares two atoms to check whether they are equivalents
1878 *
1879 * Returns 1 if yes and 0 otherwise
1880 */
1881static int
1882xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
1883    int ret;
1884
1885    if (atom1 == atom2)
1886	return(1);
1887    if ((atom1 == NULL) || (atom2 == NULL))
1888	return(0);
1889
1890    if (atom1->type != atom2->type)
1891	return(0);
1892    switch (atom1->type) {
1893        case XML_REGEXP_STRING:
1894	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
1895	                                 (xmlChar *)atom2->valuep);
1896	    break;
1897        case XML_REGEXP_EPSILON:
1898	    return(1);
1899        case XML_REGEXP_CHARVAL:
1900	    ret = atom1->codepoint == atom2->codepoint;
1901	    break;
1902        case XML_REGEXP_RANGES:
1903	    TODO;
1904	    return(0);
1905	default:
1906	    return(1);
1907    }
1908    if (atom1->neg != atom2->neg) {
1909        ret = !ret;
1910    }
1911    return(ret);
1912}
1913
1914/**
1915 * xmlFARecurseDeterminism:
1916 * @ctxt:  a regexp parser context
1917 *
1918 * Check whether the associated regexp is determinist,
1919 * should be called after xmlFAEliminateEpsilonTransitions()
1920 *
1921 */
1922static int
1923xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1924	                 int to, xmlRegAtomPtr atom) {
1925    int ret = 1;
1926    int transnr;
1927    xmlRegTransPtr t1;
1928
1929    if (state == NULL)
1930	return(ret);
1931    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1932	t1 = &(state->trans[transnr]);
1933	/*
1934	 * check transitions conflicting with the one looked at
1935	 */
1936	if (t1->atom == NULL) {
1937	    if (t1->to == -1)
1938		continue;
1939	    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1940		                           to, atom);
1941	    if (ret == 0)
1942		return(0);
1943	    continue;
1944	}
1945	if (t1->to != to)
1946	    continue;
1947	if (xmlFACompareAtoms(t1->atom, atom))
1948	    return(0);
1949    }
1950    return(ret);
1951}
1952
1953/**
1954 * xmlFAComputesDeterminism:
1955 * @ctxt:  a regexp parser context
1956 *
1957 * Check whether the associated regexp is determinist,
1958 * should be called after xmlFAEliminateEpsilonTransitions()
1959 *
1960 */
1961static int
1962xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
1963    int statenr, transnr;
1964    xmlRegStatePtr state;
1965    xmlRegTransPtr t1, t2;
1966    int i;
1967    int ret = 1;
1968
1969#ifdef DEBUG_REGEXP_GRAPH
1970    printf("xmlFAComputesDeterminism\n");
1971    xmlRegPrintCtxt(stdout, ctxt);
1972#endif
1973    if (ctxt->determinist != -1)
1974	return(ctxt->determinist);
1975
1976    /*
1977     * Check for all states that there aren't 2 transitions
1978     * with the same atom and a different target.
1979     */
1980    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1981	state = ctxt->states[statenr];
1982	if (state == NULL)
1983	    continue;
1984	if (state->nbTrans < 2)
1985	    continue;
1986	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1987	    t1 = &(state->trans[transnr]);
1988	    /*
1989	     * Determinism checks in case of counted or all transitions
1990	     * will have to be handled separately
1991	     */
1992	    if (t1->atom == NULL)
1993		continue;
1994	    if (t1->to == -1) /* eliminated */
1995		continue;
1996	    for (i = 0;i < transnr;i++) {
1997		t2 = &(state->trans[i]);
1998		if (t2->to == -1) /* eliminated */
1999		    continue;
2000		if (t2->atom != NULL) {
2001		    if (t1->to == t2->to) {
2002			if (xmlFACompareAtoms(t1->atom, t2->atom))
2003			    t2->to = -1; /* eliminated */
2004		    } else {
2005			/* not determinist ! */
2006			if (xmlFACompareAtoms(t1->atom, t2->atom))
2007			    ret = 0;
2008		    }
2009		} else if (t1->to != -1) {
2010		    /*
2011		     * do the closure in case of remaining specific
2012		     * epsilon transitions like choices or all
2013		     */
2014		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2015						   t2->to, t2->atom);
2016		    if (ret == 0)
2017			return(0);
2018		}
2019	    }
2020	    if (ret == 0)
2021		break;
2022	}
2023	if (ret == 0)
2024	    break;
2025    }
2026    ctxt->determinist = ret;
2027    return(ret);
2028}
2029
2030/************************************************************************
2031 * 									*
2032 *	Routines to check input against transition atoms		*
2033 * 									*
2034 ************************************************************************/
2035
2036static int
2037xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2038	                  int start, int end, const xmlChar *blockName) {
2039    int ret = 0;
2040
2041    switch (type) {
2042        case XML_REGEXP_STRING:
2043        case XML_REGEXP_SUBREG:
2044        case XML_REGEXP_RANGES:
2045        case XML_REGEXP_EPSILON:
2046	    return(-1);
2047        case XML_REGEXP_ANYCHAR:
2048	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2049	    break;
2050        case XML_REGEXP_CHARVAL:
2051	    ret = ((codepoint >= start) && (codepoint <= end));
2052	    break;
2053        case XML_REGEXP_NOTSPACE:
2054	    neg = !neg;
2055        case XML_REGEXP_ANYSPACE:
2056	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2057		   (codepoint == '\t') || (codepoint == ' '));
2058	    break;
2059        case XML_REGEXP_NOTINITNAME:
2060	    neg = !neg;
2061        case XML_REGEXP_INITNAME:
2062	    ret = (IS_LETTER(codepoint) ||
2063		   (codepoint == '_') || (codepoint == ':'));
2064	    break;
2065        case XML_REGEXP_NOTNAMECHAR:
2066	    neg = !neg;
2067        case XML_REGEXP_NAMECHAR:
2068	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2069		   (codepoint == '.') || (codepoint == '-') ||
2070		   (codepoint == '_') || (codepoint == ':') ||
2071		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2072	    break;
2073        case XML_REGEXP_NOTDECIMAL:
2074	    neg = !neg;
2075        case XML_REGEXP_DECIMAL:
2076	    ret = xmlUCSIsCatNd(codepoint);
2077	    break;
2078        case XML_REGEXP_REALCHAR:
2079	    neg = !neg;
2080        case XML_REGEXP_NOTREALCHAR:
2081	    ret = xmlUCSIsCatP(codepoint);
2082	    if (ret == 0)
2083		ret = xmlUCSIsCatZ(codepoint);
2084	    if (ret == 0)
2085		ret = xmlUCSIsCatC(codepoint);
2086	    break;
2087        case XML_REGEXP_LETTER:
2088	    ret = xmlUCSIsCatL(codepoint);
2089	    break;
2090        case XML_REGEXP_LETTER_UPPERCASE:
2091	    ret = xmlUCSIsCatLu(codepoint);
2092	    break;
2093        case XML_REGEXP_LETTER_LOWERCASE:
2094	    ret = xmlUCSIsCatLl(codepoint);
2095	    break;
2096        case XML_REGEXP_LETTER_TITLECASE:
2097	    ret = xmlUCSIsCatLt(codepoint);
2098	    break;
2099        case XML_REGEXP_LETTER_MODIFIER:
2100	    ret = xmlUCSIsCatLm(codepoint);
2101	    break;
2102        case XML_REGEXP_LETTER_OTHERS:
2103	    ret = xmlUCSIsCatLo(codepoint);
2104	    break;
2105        case XML_REGEXP_MARK:
2106	    ret = xmlUCSIsCatM(codepoint);
2107	    break;
2108        case XML_REGEXP_MARK_NONSPACING:
2109	    ret = xmlUCSIsCatMn(codepoint);
2110	    break;
2111        case XML_REGEXP_MARK_SPACECOMBINING:
2112	    ret = xmlUCSIsCatMc(codepoint);
2113	    break;
2114        case XML_REGEXP_MARK_ENCLOSING:
2115	    ret = xmlUCSIsCatMe(codepoint);
2116	    break;
2117        case XML_REGEXP_NUMBER:
2118	    ret = xmlUCSIsCatN(codepoint);
2119	    break;
2120        case XML_REGEXP_NUMBER_DECIMAL:
2121	    ret = xmlUCSIsCatNd(codepoint);
2122	    break;
2123        case XML_REGEXP_NUMBER_LETTER:
2124	    ret = xmlUCSIsCatNl(codepoint);
2125	    break;
2126        case XML_REGEXP_NUMBER_OTHERS:
2127	    ret = xmlUCSIsCatNo(codepoint);
2128	    break;
2129        case XML_REGEXP_PUNCT:
2130	    ret = xmlUCSIsCatP(codepoint);
2131	    break;
2132        case XML_REGEXP_PUNCT_CONNECTOR:
2133	    ret = xmlUCSIsCatPc(codepoint);
2134	    break;
2135        case XML_REGEXP_PUNCT_DASH:
2136	    ret = xmlUCSIsCatPd(codepoint);
2137	    break;
2138        case XML_REGEXP_PUNCT_OPEN:
2139	    ret = xmlUCSIsCatPs(codepoint);
2140	    break;
2141        case XML_REGEXP_PUNCT_CLOSE:
2142	    ret = xmlUCSIsCatPe(codepoint);
2143	    break;
2144        case XML_REGEXP_PUNCT_INITQUOTE:
2145	    ret = xmlUCSIsCatPi(codepoint);
2146	    break;
2147        case XML_REGEXP_PUNCT_FINQUOTE:
2148	    ret = xmlUCSIsCatPf(codepoint);
2149	    break;
2150        case XML_REGEXP_PUNCT_OTHERS:
2151	    ret = xmlUCSIsCatPo(codepoint);
2152	    break;
2153        case XML_REGEXP_SEPAR:
2154	    ret = xmlUCSIsCatZ(codepoint);
2155	    break;
2156        case XML_REGEXP_SEPAR_SPACE:
2157	    ret = xmlUCSIsCatZs(codepoint);
2158	    break;
2159        case XML_REGEXP_SEPAR_LINE:
2160	    ret = xmlUCSIsCatZl(codepoint);
2161	    break;
2162        case XML_REGEXP_SEPAR_PARA:
2163	    ret = xmlUCSIsCatZp(codepoint);
2164	    break;
2165        case XML_REGEXP_SYMBOL:
2166	    ret = xmlUCSIsCatS(codepoint);
2167	    break;
2168        case XML_REGEXP_SYMBOL_MATH:
2169	    ret = xmlUCSIsCatSm(codepoint);
2170	    break;
2171        case XML_REGEXP_SYMBOL_CURRENCY:
2172	    ret = xmlUCSIsCatSc(codepoint);
2173	    break;
2174        case XML_REGEXP_SYMBOL_MODIFIER:
2175	    ret = xmlUCSIsCatSk(codepoint);
2176	    break;
2177        case XML_REGEXP_SYMBOL_OTHERS:
2178	    ret = xmlUCSIsCatSo(codepoint);
2179	    break;
2180        case XML_REGEXP_OTHER:
2181	    ret = xmlUCSIsCatC(codepoint);
2182	    break;
2183        case XML_REGEXP_OTHER_CONTROL:
2184	    ret = xmlUCSIsCatCc(codepoint);
2185	    break;
2186        case XML_REGEXP_OTHER_FORMAT:
2187	    ret = xmlUCSIsCatCf(codepoint);
2188	    break;
2189        case XML_REGEXP_OTHER_PRIVATE:
2190	    ret = xmlUCSIsCatCo(codepoint);
2191	    break;
2192        case XML_REGEXP_OTHER_NA:
2193	    /* ret = xmlUCSIsCatCn(codepoint); */
2194	    /* Seems it doesn't exist anymore in recent Unicode releases */
2195	    ret = 0;
2196	    break;
2197        case XML_REGEXP_BLOCK_NAME:
2198	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2199	    break;
2200    }
2201    if (neg)
2202	return(!ret);
2203    return(ret);
2204}
2205
2206static int
2207xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2208    int i, ret = 0;
2209    xmlRegRangePtr range;
2210
2211    if ((atom == NULL) || (!IS_CHAR(codepoint)))
2212	return(-1);
2213
2214    switch (atom->type) {
2215        case XML_REGEXP_SUBREG:
2216        case XML_REGEXP_EPSILON:
2217	    return(-1);
2218        case XML_REGEXP_CHARVAL:
2219            return(codepoint == atom->codepoint);
2220        case XML_REGEXP_RANGES: {
2221	    int accept = 0;
2222
2223	    for (i = 0;i < atom->nbRanges;i++) {
2224		range = atom->ranges[i];
2225		if (range->neg == 2) {
2226		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2227						0, range->start, range->end,
2228						range->blockName);
2229		    if (ret != 0)
2230			return(0); /* excluded char */
2231		} else if (range->neg) {
2232		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2233						0, range->start, range->end,
2234						range->blockName);
2235		    if (ret == 0)
2236		        accept = 1;
2237		    else
2238		        return(0);
2239		} else {
2240		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2241						0, range->start, range->end,
2242						range->blockName);
2243		    if (ret != 0)
2244			accept = 1; /* might still be excluded */
2245		}
2246	    }
2247	    return(accept);
2248	}
2249        case XML_REGEXP_STRING:
2250	    printf("TODO: XML_REGEXP_STRING\n");
2251	    return(-1);
2252        case XML_REGEXP_ANYCHAR:
2253        case XML_REGEXP_ANYSPACE:
2254        case XML_REGEXP_NOTSPACE:
2255        case XML_REGEXP_INITNAME:
2256        case XML_REGEXP_NOTINITNAME:
2257        case XML_REGEXP_NAMECHAR:
2258        case XML_REGEXP_NOTNAMECHAR:
2259        case XML_REGEXP_DECIMAL:
2260        case XML_REGEXP_NOTDECIMAL:
2261        case XML_REGEXP_REALCHAR:
2262        case XML_REGEXP_NOTREALCHAR:
2263        case XML_REGEXP_LETTER:
2264        case XML_REGEXP_LETTER_UPPERCASE:
2265        case XML_REGEXP_LETTER_LOWERCASE:
2266        case XML_REGEXP_LETTER_TITLECASE:
2267        case XML_REGEXP_LETTER_MODIFIER:
2268        case XML_REGEXP_LETTER_OTHERS:
2269        case XML_REGEXP_MARK:
2270        case XML_REGEXP_MARK_NONSPACING:
2271        case XML_REGEXP_MARK_SPACECOMBINING:
2272        case XML_REGEXP_MARK_ENCLOSING:
2273        case XML_REGEXP_NUMBER:
2274        case XML_REGEXP_NUMBER_DECIMAL:
2275        case XML_REGEXP_NUMBER_LETTER:
2276        case XML_REGEXP_NUMBER_OTHERS:
2277        case XML_REGEXP_PUNCT:
2278        case XML_REGEXP_PUNCT_CONNECTOR:
2279        case XML_REGEXP_PUNCT_DASH:
2280        case XML_REGEXP_PUNCT_OPEN:
2281        case XML_REGEXP_PUNCT_CLOSE:
2282        case XML_REGEXP_PUNCT_INITQUOTE:
2283        case XML_REGEXP_PUNCT_FINQUOTE:
2284        case XML_REGEXP_PUNCT_OTHERS:
2285        case XML_REGEXP_SEPAR:
2286        case XML_REGEXP_SEPAR_SPACE:
2287        case XML_REGEXP_SEPAR_LINE:
2288        case XML_REGEXP_SEPAR_PARA:
2289        case XML_REGEXP_SYMBOL:
2290        case XML_REGEXP_SYMBOL_MATH:
2291        case XML_REGEXP_SYMBOL_CURRENCY:
2292        case XML_REGEXP_SYMBOL_MODIFIER:
2293        case XML_REGEXP_SYMBOL_OTHERS:
2294        case XML_REGEXP_OTHER:
2295        case XML_REGEXP_OTHER_CONTROL:
2296        case XML_REGEXP_OTHER_FORMAT:
2297        case XML_REGEXP_OTHER_PRIVATE:
2298        case XML_REGEXP_OTHER_NA:
2299	case XML_REGEXP_BLOCK_NAME:
2300	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
2301		                            (const xmlChar *)atom->valuep);
2302	    if (atom->neg)
2303		ret = !ret;
2304	    break;
2305    }
2306    return(ret);
2307}
2308
2309/************************************************************************
2310 * 									*
2311 *	Saving and restoring state of an execution context		*
2312 * 									*
2313 ************************************************************************/
2314
2315#ifdef DEBUG_REGEXP_EXEC
2316static void
2317xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
2318    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
2319    if (exec->inputStack != NULL) {
2320	int i;
2321	printf(": ");
2322	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
2323	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
2324    } else {
2325	printf(": %s", &(exec->inputString[exec->index]));
2326    }
2327    printf("\n");
2328}
2329#endif
2330
2331static void
2332xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
2333#ifdef DEBUG_REGEXP_EXEC
2334    printf("saving ");
2335    exec->transno++;
2336    xmlFARegDebugExec(exec);
2337    exec->transno--;
2338#endif
2339
2340    if (exec->maxRollbacks == 0) {
2341	exec->maxRollbacks = 4;
2342	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
2343		                             sizeof(xmlRegExecRollback));
2344	if (exec->rollbacks == NULL) {
2345	    xmlRegexpErrMemory(NULL, "saving regexp");
2346	    exec->maxRollbacks = 0;
2347	    return;
2348	}
2349	memset(exec->rollbacks, 0,
2350	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
2351    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
2352	xmlRegExecRollback *tmp;
2353	int len = exec->maxRollbacks;
2354
2355	exec->maxRollbacks *= 2;
2356	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
2357			exec->maxRollbacks * sizeof(xmlRegExecRollback));
2358	if (tmp == NULL) {
2359	    xmlRegexpErrMemory(NULL, "saving regexp");
2360	    exec->maxRollbacks /= 2;
2361	    return;
2362	}
2363	exec->rollbacks = tmp;
2364	tmp = &exec->rollbacks[len];
2365	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
2366    }
2367    exec->rollbacks[exec->nbRollbacks].state = exec->state;
2368    exec->rollbacks[exec->nbRollbacks].index = exec->index;
2369    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
2370    if (exec->comp->nbCounters > 0) {
2371	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2372	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
2373		xmlMalloc(exec->comp->nbCounters * sizeof(int));
2374	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2375		xmlRegexpErrMemory(NULL, "saving regexp");
2376		exec->status = -5;
2377		return;
2378	    }
2379	}
2380	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
2381	       exec->comp->nbCounters * sizeof(int));
2382    }
2383    exec->nbRollbacks++;
2384}
2385
2386static void
2387xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
2388    if (exec->nbRollbacks <= 0) {
2389	exec->status = -1;
2390#ifdef DEBUG_REGEXP_EXEC
2391	printf("rollback failed on empty stack\n");
2392#endif
2393	return;
2394    }
2395    exec->nbRollbacks--;
2396    exec->state = exec->rollbacks[exec->nbRollbacks].state;
2397    exec->index = exec->rollbacks[exec->nbRollbacks].index;
2398    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
2399    if (exec->comp->nbCounters > 0) {
2400	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2401	    fprintf(stderr, "exec save: allocation failed");
2402	    exec->status = -6;
2403	    return;
2404	}
2405	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
2406	       exec->comp->nbCounters * sizeof(int));
2407    }
2408
2409#ifdef DEBUG_REGEXP_EXEC
2410    printf("restored ");
2411    xmlFARegDebugExec(exec);
2412#endif
2413}
2414
2415/************************************************************************
2416 * 									*
2417 *	Verifier, running an input against a compiled regexp		*
2418 * 									*
2419 ************************************************************************/
2420
2421static int
2422xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
2423    xmlRegExecCtxt execval;
2424    xmlRegExecCtxtPtr exec = &execval;
2425    int ret, codepoint = 0, len;
2426
2427    exec->inputString = content;
2428    exec->index = 0;
2429    exec->determinist = 1;
2430    exec->maxRollbacks = 0;
2431    exec->nbRollbacks = 0;
2432    exec->rollbacks = NULL;
2433    exec->status = 0;
2434    exec->comp = comp;
2435    exec->state = comp->states[0];
2436    exec->transno = 0;
2437    exec->transcount = 0;
2438    exec->inputStack = NULL;
2439    exec->inputStackMax = 0;
2440    if (comp->nbCounters > 0) {
2441	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2442	if (exec->counts == NULL) {
2443	    xmlRegexpErrMemory(NULL, "running regexp");
2444	    return(-1);
2445	}
2446        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2447    } else
2448	exec->counts = NULL;
2449    while ((exec->status == 0) &&
2450	   ((exec->inputString[exec->index] != 0) ||
2451	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2452	xmlRegTransPtr trans;
2453	xmlRegAtomPtr atom;
2454
2455	/*
2456	 * If end of input on non-terminal state, rollback, however we may
2457	 * still have epsilon like transition for counted transitions
2458	 * on counters, in that case don't break too early.  Additionally,
2459	 * if we are working on a range like "AB{0,2}", where B is not present,
2460	 * we don't want to break.
2461	 */
2462	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
2463	    /*
2464	     * if there is a transition, we must check if
2465	     *  atom allows minOccurs of 0
2466	     */
2467	    if (exec->transno < exec->state->nbTrans) {
2468	        trans = &exec->state->trans[exec->transno];
2469		if (trans->to >=0) {
2470		    atom = trans->atom;
2471		    if (!((atom->min == 0) && (atom->max > 0)))
2472		        goto rollback;
2473		}
2474	    } else
2475	        goto rollback;
2476	}
2477
2478	exec->transcount = 0;
2479	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2480	    trans = &exec->state->trans[exec->transno];
2481	    if (trans->to < 0)
2482		continue;
2483	    atom = trans->atom;
2484	    ret = 0;
2485	    if (trans->count >= 0) {
2486		int count;
2487		xmlRegCounterPtr counter;
2488
2489		/*
2490		 * A counted transition.
2491		 */
2492
2493		count = exec->counts[trans->count];
2494		counter = &exec->comp->counters[trans->count];
2495#ifdef DEBUG_REGEXP_EXEC
2496		printf("testing count %d: val %d, min %d, max %d\n",
2497		       trans->count, count, counter->min,  counter->max);
2498#endif
2499		ret = ((count >= counter->min) && (count <= counter->max));
2500	    } else if (atom == NULL) {
2501		fprintf(stderr, "epsilon transition left at runtime\n");
2502		exec->status = -2;
2503		break;
2504	    } else if (exec->inputString[exec->index] != 0) {
2505                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2506		ret = xmlRegCheckCharacter(atom, codepoint);
2507		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
2508		    xmlRegStatePtr to = comp->states[trans->to];
2509
2510		    /*
2511		     * this is a multiple input sequence
2512		     */
2513		    if (exec->state->nbTrans > exec->transno + 1) {
2514			xmlFARegExecSave(exec);
2515		    }
2516		    exec->transcount = 1;
2517		    do {
2518			/*
2519			 * Try to progress as much as possible on the input
2520			 */
2521			if (exec->transcount == atom->max) {
2522			    break;
2523			}
2524			exec->index += len;
2525			/*
2526			 * End of input: stop here
2527			 */
2528			if (exec->inputString[exec->index] == 0) {
2529			    exec->index -= len;
2530			    break;
2531			}
2532			if (exec->transcount >= atom->min) {
2533			    int transno = exec->transno;
2534			    xmlRegStatePtr state = exec->state;
2535
2536			    /*
2537			     * The transition is acceptable save it
2538			     */
2539			    exec->transno = -1; /* trick */
2540			    exec->state = to;
2541			    xmlFARegExecSave(exec);
2542			    exec->transno = transno;
2543			    exec->state = state;
2544			}
2545			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2546				              len);
2547			ret = xmlRegCheckCharacter(atom, codepoint);
2548			exec->transcount++;
2549		    } while (ret == 1);
2550		    if (exec->transcount < atom->min)
2551			ret = 0;
2552
2553		    /*
2554		     * If the last check failed but one transition was found
2555		     * possible, rollback
2556		     */
2557		    if (ret < 0)
2558			ret = 0;
2559		    if (ret == 0) {
2560			goto rollback;
2561		    }
2562		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
2563		    /*
2564		     * we don't match on the codepoint, but minOccurs of 0
2565		     * says that's ok.  Setting len to 0 inhibits stepping
2566		     * over the codepoint.
2567		     */
2568		    exec->transcount = 1;
2569		    len = 0;
2570		    ret = 1;
2571		}
2572	    } else if ((atom->min == 0) && (atom->max > 0)) {
2573	        /* another spot to match when minOccurs is 0 */
2574		exec->transcount = 1;
2575		len = 0;
2576		ret = 1;
2577	    }
2578	    if (ret == 1) {
2579		if (exec->state->nbTrans > exec->transno + 1) {
2580		    xmlFARegExecSave(exec);
2581		}
2582		if (trans->counter >= 0) {
2583#ifdef DEBUG_REGEXP_EXEC
2584		    printf("Increasing count %d\n", trans->counter);
2585#endif
2586		    exec->counts[trans->counter]++;
2587		}
2588		if ((trans->count >= 0) &&
2589		    (trans->count < REGEXP_ALL_COUNTER)) {
2590#ifdef DEBUG_REGEXP_EXEC
2591		    printf("resetting count %d on transition\n",
2592		           trans->count);
2593#endif
2594		    exec->counts[trans->count] = 0;
2595		}
2596#ifdef DEBUG_REGEXP_EXEC
2597		printf("entering state %d\n", trans->to);
2598#endif
2599		exec->state = comp->states[trans->to];
2600		exec->transno = 0;
2601		if (trans->atom != NULL) {
2602		    exec->index += len;
2603		}
2604		goto progress;
2605	    } else if (ret < 0) {
2606		exec->status = -4;
2607		break;
2608	    }
2609	}
2610	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2611rollback:
2612	    /*
2613	     * Failed to find a way out
2614	     */
2615	    exec->determinist = 0;
2616	    xmlFARegExecRollBack(exec);
2617	}
2618progress:
2619	continue;
2620    }
2621    if (exec->rollbacks != NULL) {
2622	if (exec->counts != NULL) {
2623	    int i;
2624
2625	    for (i = 0;i < exec->maxRollbacks;i++)
2626		if (exec->rollbacks[i].counts != NULL)
2627		    xmlFree(exec->rollbacks[i].counts);
2628	}
2629	xmlFree(exec->rollbacks);
2630    }
2631    if (exec->counts != NULL)
2632	xmlFree(exec->counts);
2633    if (exec->status == 0)
2634	return(1);
2635    if (exec->status == -1)
2636	return(0);
2637    return(exec->status);
2638}
2639
2640/************************************************************************
2641 * 									*
2642 *	Progressive interface to the verifier one atom at a time	*
2643 * 									*
2644 ************************************************************************/
2645#ifdef DEBUG_ERR
2646static void testerr(xmlRegExecCtxtPtr exec);
2647#endif
2648
2649/**
2650 * xmlRegNewExecCtxt:
2651 * @comp: a precompiled regular expression
2652 * @callback: a callback function used for handling progresses in the
2653 *            automata matching phase
2654 * @data: the context data associated to the callback in this context
2655 *
2656 * Build a context used for progressive evaluation of a regexp.
2657 *
2658 * Returns the new context
2659 */
2660xmlRegExecCtxtPtr
2661xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
2662    xmlRegExecCtxtPtr exec;
2663
2664    if (comp == NULL)
2665	return(NULL);
2666    if ((comp->compact == NULL) && (comp->states == NULL))
2667        return(NULL);
2668    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
2669    if (exec == NULL) {
2670	xmlRegexpErrMemory(NULL, "creating execution context");
2671	return(NULL);
2672    }
2673    memset(exec, 0, sizeof(xmlRegExecCtxt));
2674    exec->inputString = NULL;
2675    exec->index = 0;
2676    exec->determinist = 1;
2677    exec->maxRollbacks = 0;
2678    exec->nbRollbacks = 0;
2679    exec->rollbacks = NULL;
2680    exec->status = 0;
2681    exec->comp = comp;
2682    if (comp->compact == NULL)
2683	exec->state = comp->states[0];
2684    exec->transno = 0;
2685    exec->transcount = 0;
2686    exec->callback = callback;
2687    exec->data = data;
2688    if (comp->nbCounters > 0) {
2689        /*
2690	 * For error handling, exec->counts is allocated twice the size
2691	 * the second half is used to store the data in case of rollback
2692	 */
2693	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
2694	                                 * 2);
2695	if (exec->counts == NULL) {
2696	    xmlRegexpErrMemory(NULL, "creating execution context");
2697	    xmlFree(exec);
2698	    return(NULL);
2699	}
2700        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
2701	exec->errCounts = &exec->counts[comp->nbCounters];
2702    } else {
2703	exec->counts = NULL;
2704	exec->errCounts = NULL;
2705    }
2706    exec->inputStackMax = 0;
2707    exec->inputStackNr = 0;
2708    exec->inputStack = NULL;
2709    exec->errStateNo = -1;
2710    exec->errString = NULL;
2711    return(exec);
2712}
2713
2714/**
2715 * xmlRegFreeExecCtxt:
2716 * @exec: a regular expression evaulation context
2717 *
2718 * Free the structures associated to a regular expression evaulation context.
2719 */
2720void
2721xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
2722    if (exec == NULL)
2723	return;
2724
2725    if (exec->rollbacks != NULL) {
2726	if (exec->counts != NULL) {
2727	    int i;
2728
2729	    for (i = 0;i < exec->maxRollbacks;i++)
2730		if (exec->rollbacks[i].counts != NULL)
2731		    xmlFree(exec->rollbacks[i].counts);
2732	}
2733	xmlFree(exec->rollbacks);
2734    }
2735    if (exec->counts != NULL)
2736	xmlFree(exec->counts);
2737    if (exec->inputStack != NULL) {
2738	int i;
2739
2740	for (i = 0;i < exec->inputStackNr;i++) {
2741	    if (exec->inputStack[i].value != NULL)
2742		xmlFree(exec->inputStack[i].value);
2743	}
2744	xmlFree(exec->inputStack);
2745    }
2746    if (exec->errString != NULL)
2747        xmlFree(exec->errString);
2748    xmlFree(exec);
2749}
2750
2751static void
2752xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2753	                    void *data) {
2754#ifdef DEBUG_PUSH
2755    printf("saving value: %d:%s\n", exec->inputStackNr, value);
2756#endif
2757    if (exec->inputStackMax == 0) {
2758	exec->inputStackMax = 4;
2759	exec->inputStack = (xmlRegInputTokenPtr)
2760	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
2761	if (exec->inputStack == NULL) {
2762	    xmlRegexpErrMemory(NULL, "pushing input string");
2763	    exec->inputStackMax = 0;
2764	    return;
2765	}
2766    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
2767	xmlRegInputTokenPtr tmp;
2768
2769	exec->inputStackMax *= 2;
2770	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
2771			exec->inputStackMax * sizeof(xmlRegInputToken));
2772	if (tmp == NULL) {
2773	    xmlRegexpErrMemory(NULL, "pushing input string");
2774	    exec->inputStackMax /= 2;
2775	    return;
2776	}
2777	exec->inputStack = tmp;
2778    }
2779    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
2780    exec->inputStack[exec->inputStackNr].data = data;
2781    exec->inputStackNr++;
2782    exec->inputStack[exec->inputStackNr].value = NULL;
2783    exec->inputStack[exec->inputStackNr].data = NULL;
2784}
2785
2786/**
2787 * xmlRegStrEqualWildcard:
2788 * @expStr:  the string to be evaluated
2789 * @valStr:  the validation string
2790 *
2791 * Checks if both strings are equal or have the same content. "*"
2792 * can be used as a wildcard in @valStr; "|" is used as a seperator of
2793 * substrings in both @expStr and @valStr.
2794 *
2795 * Returns 1 if the comparison is satisfied and the number of substrings
2796 * is equal, 0 otherwise.
2797 */
2798
2799static int
2800xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
2801    if (expStr == valStr) return(1);
2802    if (expStr == NULL) return(0);
2803    if (valStr == NULL) return(0);
2804    do {
2805	/*
2806	* Eval if we have a wildcard for the current item.
2807	*/
2808        if (*expStr != *valStr) {
2809	    /* if one of them starts with a wildcard make valStr be it */
2810	    if (*valStr == '*') {
2811	        const xmlChar *tmp;
2812
2813		tmp = valStr;
2814		valStr = expStr;
2815		expStr = tmp;
2816	    }
2817	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
2818		do {
2819		    if (*valStr == XML_REG_STRING_SEPARATOR)
2820			break;
2821		    valStr++;
2822		} while (*valStr != 0);
2823		continue;
2824	    } else
2825		return(0);
2826	}
2827	expStr++;
2828	valStr++;
2829    } while (*valStr != 0);
2830    if (*expStr != 0)
2831	return (0);
2832    else
2833	return (1);
2834}
2835
2836/**
2837 * xmlRegCompactPushString:
2838 * @exec: a regexp execution context
2839 * @comp:  the precompiled exec with a compact table
2840 * @value: a string token input
2841 * @data: data associated to the token to reuse in callbacks
2842 *
2843 * Push one input token in the execution context
2844 *
2845 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2846 *     a negative value in case of error.
2847 */
2848static int
2849xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
2850	                xmlRegexpPtr comp,
2851	                const xmlChar *value,
2852	                void *data) {
2853    int state = exec->index;
2854    int i, target;
2855
2856    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
2857	return(-1);
2858
2859    if (value == NULL) {
2860	/*
2861	 * are we at a final state ?
2862	 */
2863	if (comp->compact[state * (comp->nbstrings + 1)] ==
2864            XML_REGEXP_FINAL_STATE)
2865	    return(1);
2866	return(0);
2867    }
2868
2869#ifdef DEBUG_PUSH
2870    printf("value pushed: %s\n", value);
2871#endif
2872
2873    /*
2874     * Examine all outside transitions from current state
2875     */
2876    for (i = 0;i < comp->nbstrings;i++) {
2877	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
2878	if ((target > 0) && (target <= comp->nbstates)) {
2879	    target--; /* to avoid 0 */
2880	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
2881		exec->index = target;
2882		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
2883		    exec->callback(exec->data, value,
2884			  comp->transdata[state * comp->nbstrings + i], data);
2885		}
2886#ifdef DEBUG_PUSH
2887		printf("entering state %d\n", target);
2888#endif
2889		if (comp->compact[target * (comp->nbstrings + 1)] ==
2890		    XML_REGEXP_SINK_STATE)
2891		    goto error;
2892
2893		if (comp->compact[target * (comp->nbstrings + 1)] ==
2894		    XML_REGEXP_FINAL_STATE)
2895		    return(1);
2896		return(0);
2897	    }
2898	}
2899    }
2900    /*
2901     * Failed to find an exit transition out from current state for the
2902     * current token
2903     */
2904#ifdef DEBUG_PUSH
2905    printf("failed to find a transition for %s on state %d\n", value, state);
2906#endif
2907error:
2908    if (exec->errString != NULL)
2909        xmlFree(exec->errString);
2910    exec->errString = xmlStrdup(value);
2911    exec->errStateNo = state;
2912    exec->status = -1;
2913#ifdef DEBUG_ERR
2914    testerr(exec);
2915#endif
2916    return(-1);
2917}
2918
2919/**
2920 * xmlRegExecPushStringInternal:
2921 * @exec: a regexp execution context or NULL to indicate the end
2922 * @value: a string token input
2923 * @data: data associated to the token to reuse in callbacks
2924 * @compound: value was assembled from 2 strings
2925 *
2926 * Push one input token in the execution context
2927 *
2928 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2929 *     a negative value in case of error.
2930 */
2931static int
2932xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
2933	                     void *data, int compound) {
2934    xmlRegTransPtr trans;
2935    xmlRegAtomPtr atom;
2936    int ret;
2937    int final = 0;
2938    int progress = 1;
2939
2940    if (exec == NULL)
2941	return(-1);
2942    if (exec->comp == NULL)
2943	return(-1);
2944    if (exec->status != 0)
2945	return(exec->status);
2946
2947    if (exec->comp->compact != NULL)
2948	return(xmlRegCompactPushString(exec, exec->comp, value, data));
2949
2950    if (value == NULL) {
2951        if (exec->state->type == XML_REGEXP_FINAL_STATE)
2952	    return(1);
2953	final = 1;
2954    }
2955
2956#ifdef DEBUG_PUSH
2957    printf("value pushed: %s\n", value);
2958#endif
2959    /*
2960     * If we have an active rollback stack push the new value there
2961     * and get back to where we were left
2962     */
2963    if ((value != NULL) && (exec->inputStackNr > 0)) {
2964	xmlFARegExecSaveInputString(exec, value, data);
2965	value = exec->inputStack[exec->index].value;
2966	data = exec->inputStack[exec->index].data;
2967#ifdef DEBUG_PUSH
2968	printf("value loaded: %s\n", value);
2969#endif
2970    }
2971
2972    while ((exec->status == 0) &&
2973	   ((value != NULL) ||
2974	    ((final == 1) &&
2975	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
2976
2977	/*
2978	 * End of input on non-terminal state, rollback, however we may
2979	 * still have epsilon like transition for counted transitions
2980	 * on counters, in that case don't break too early.
2981	 */
2982	if ((value == NULL) && (exec->counts == NULL))
2983	    goto rollback;
2984
2985	exec->transcount = 0;
2986	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2987	    trans = &exec->state->trans[exec->transno];
2988	    if (trans->to < 0)
2989		continue;
2990	    atom = trans->atom;
2991	    ret = 0;
2992	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
2993		int i;
2994		int count;
2995		xmlRegTransPtr t;
2996		xmlRegCounterPtr counter;
2997
2998		ret = 0;
2999
3000#ifdef DEBUG_PUSH
3001		printf("testing all lax %d\n", trans->count);
3002#endif
3003		/*
3004		 * Check all counted transitions from the current state
3005		 */
3006		if ((value == NULL) && (final)) {
3007		    ret = 1;
3008		} else if (value != NULL) {
3009		    for (i = 0;i < exec->state->nbTrans;i++) {
3010			t = &exec->state->trans[i];
3011			if ((t->counter < 0) || (t == trans))
3012			    continue;
3013			counter = &exec->comp->counters[t->counter];
3014			count = exec->counts[t->counter];
3015			if ((count < counter->max) &&
3016		            (t->atom != NULL) &&
3017			    (xmlStrEqual(value, t->atom->valuep))) {
3018			    ret = 0;
3019			    break;
3020			}
3021			if ((count >= counter->min) &&
3022			    (count < counter->max) &&
3023			    (xmlStrEqual(value, t->atom->valuep))) {
3024			    ret = 1;
3025			    break;
3026			}
3027		    }
3028		}
3029	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3030		int i;
3031		int count;
3032		xmlRegTransPtr t;
3033		xmlRegCounterPtr counter;
3034
3035		ret = 1;
3036
3037#ifdef DEBUG_PUSH
3038		printf("testing all %d\n", trans->count);
3039#endif
3040		/*
3041		 * Check all counted transitions from the current state
3042		 */
3043		for (i = 0;i < exec->state->nbTrans;i++) {
3044                    t = &exec->state->trans[i];
3045		    if ((t->counter < 0) || (t == trans))
3046			continue;
3047                    counter = &exec->comp->counters[t->counter];
3048		    count = exec->counts[t->counter];
3049		    if ((count < counter->min) || (count > counter->max)) {
3050			ret = 0;
3051			break;
3052		    }
3053		}
3054	    } else if (trans->count >= 0) {
3055		int count;
3056		xmlRegCounterPtr counter;
3057
3058		/*
3059		 * A counted transition.
3060		 */
3061
3062		count = exec->counts[trans->count];
3063		counter = &exec->comp->counters[trans->count];
3064#ifdef DEBUG_PUSH
3065		printf("testing count %d: val %d, min %d, max %d\n",
3066		       trans->count, count, counter->min,  counter->max);
3067#endif
3068		ret = ((count >= counter->min) && (count <= counter->max));
3069	    } else if (atom == NULL) {
3070		fprintf(stderr, "epsilon transition left at runtime\n");
3071		exec->status = -2;
3072		break;
3073	    } else if (value != NULL) {
3074		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3075		if (atom->neg) {
3076		    ret = !ret;
3077		    if (!compound)
3078		        ret = 0;
3079		}
3080		if ((ret == 1) && (trans->counter >= 0)) {
3081		    xmlRegCounterPtr counter;
3082		    int count;
3083
3084		    count = exec->counts[trans->counter];
3085		    counter = &exec->comp->counters[trans->counter];
3086		    if (count >= counter->max)
3087			ret = 0;
3088		}
3089
3090		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3091		    xmlRegStatePtr to = exec->comp->states[trans->to];
3092
3093		    /*
3094		     * this is a multiple input sequence
3095		     */
3096		    if (exec->state->nbTrans > exec->transno + 1) {
3097			if (exec->inputStackNr <= 0) {
3098			    xmlFARegExecSaveInputString(exec, value, data);
3099			}
3100			xmlFARegExecSave(exec);
3101		    }
3102		    exec->transcount = 1;
3103		    do {
3104			/*
3105			 * Try to progress as much as possible on the input
3106			 */
3107			if (exec->transcount == atom->max) {
3108			    break;
3109			}
3110			exec->index++;
3111			value = exec->inputStack[exec->index].value;
3112			data = exec->inputStack[exec->index].data;
3113#ifdef DEBUG_PUSH
3114			printf("value loaded: %s\n", value);
3115#endif
3116
3117			/*
3118			 * End of input: stop here
3119			 */
3120			if (value == NULL) {
3121			    exec->index --;
3122			    break;
3123			}
3124			if (exec->transcount >= atom->min) {
3125			    int transno = exec->transno;
3126			    xmlRegStatePtr state = exec->state;
3127
3128			    /*
3129			     * The transition is acceptable save it
3130			     */
3131			    exec->transno = -1; /* trick */
3132			    exec->state = to;
3133			    if (exec->inputStackNr <= 0) {
3134				xmlFARegExecSaveInputString(exec, value, data);
3135			    }
3136			    xmlFARegExecSave(exec);
3137			    exec->transno = transno;
3138			    exec->state = state;
3139			}
3140			ret = xmlStrEqual(value, atom->valuep);
3141			exec->transcount++;
3142		    } while (ret == 1);
3143		    if (exec->transcount < atom->min)
3144			ret = 0;
3145
3146		    /*
3147		     * If the last check failed but one transition was found
3148		     * possible, rollback
3149		     */
3150		    if (ret < 0)
3151			ret = 0;
3152		    if (ret == 0) {
3153			goto rollback;
3154		    }
3155		}
3156	    }
3157	    if (ret == 1) {
3158		if ((exec->callback != NULL) && (atom != NULL) &&
3159			(data != NULL)) {
3160		    exec->callback(exec->data, atom->valuep,
3161			           atom->data, data);
3162		}
3163		if (exec->state->nbTrans > exec->transno + 1) {
3164		    if (exec->inputStackNr <= 0) {
3165			xmlFARegExecSaveInputString(exec, value, data);
3166		    }
3167		    xmlFARegExecSave(exec);
3168		}
3169		if (trans->counter >= 0) {
3170#ifdef DEBUG_PUSH
3171		    printf("Increasing count %d\n", trans->counter);
3172#endif
3173		    exec->counts[trans->counter]++;
3174		}
3175		if ((trans->count >= 0) &&
3176		    (trans->count < REGEXP_ALL_COUNTER)) {
3177#ifdef DEBUG_REGEXP_EXEC
3178		    printf("resetting count %d on transition\n",
3179		           trans->count);
3180#endif
3181		    exec->counts[trans->count] = 0;
3182		}
3183#ifdef DEBUG_PUSH
3184		printf("entering state %d\n", trans->to);
3185#endif
3186                if ((exec->comp->states[trans->to] != NULL) &&
3187		    (exec->comp->states[trans->to]->type ==
3188		     XML_REGEXP_SINK_STATE)) {
3189		    /*
3190		     * entering a sink state, save the current state as error
3191		     * state.
3192		     */
3193		    if (exec->errString != NULL)
3194			xmlFree(exec->errString);
3195		    exec->errString = xmlStrdup(value);
3196		    exec->errState = exec->state;
3197		    memcpy(exec->errCounts, exec->counts,
3198			   exec->comp->nbCounters * sizeof(int));
3199		}
3200		exec->state = exec->comp->states[trans->to];
3201		exec->transno = 0;
3202		if (trans->atom != NULL) {
3203		    if (exec->inputStack != NULL) {
3204			exec->index++;
3205			if (exec->index < exec->inputStackNr) {
3206			    value = exec->inputStack[exec->index].value;
3207			    data = exec->inputStack[exec->index].data;
3208#ifdef DEBUG_PUSH
3209			    printf("value loaded: %s\n", value);
3210#endif
3211			} else {
3212			    value = NULL;
3213			    data = NULL;
3214#ifdef DEBUG_PUSH
3215			    printf("end of input\n");
3216#endif
3217			}
3218		    } else {
3219			value = NULL;
3220			data = NULL;
3221#ifdef DEBUG_PUSH
3222			printf("end of input\n");
3223#endif
3224		    }
3225		}
3226		goto progress;
3227	    } else if (ret < 0) {
3228		exec->status = -4;
3229		break;
3230	    }
3231	}
3232	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3233rollback:
3234            /*
3235	     * if we didn't yet rollback on the current input
3236	     * store the current state as the error state.
3237	     */
3238	    if ((progress) && (exec->state != NULL) &&
3239	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
3240	        progress = 0;
3241		if (exec->errString != NULL)
3242		    xmlFree(exec->errString);
3243		exec->errString = xmlStrdup(value);
3244		exec->errState = exec->state;
3245		memcpy(exec->errCounts, exec->counts,
3246		       exec->comp->nbCounters * sizeof(int));
3247	    }
3248
3249	    /*
3250	     * Failed to find a way out
3251	     */
3252	    exec->determinist = 0;
3253	    xmlFARegExecRollBack(exec);
3254	    if (exec->status == 0) {
3255		value = exec->inputStack[exec->index].value;
3256		data = exec->inputStack[exec->index].data;
3257#ifdef DEBUG_PUSH
3258		printf("value loaded: %s\n", value);
3259#endif
3260	    }
3261	}
3262	continue;
3263progress:
3264        progress = 1;
3265	continue;
3266    }
3267    if (exec->status == 0) {
3268        return(exec->state->type == XML_REGEXP_FINAL_STATE);
3269    }
3270#ifdef DEBUG_ERR
3271    if (exec->status < 0) {
3272	testerr(exec);
3273    }
3274#endif
3275    return(exec->status);
3276}
3277
3278/**
3279 * xmlRegExecPushString:
3280 * @exec: a regexp execution context or NULL to indicate the end
3281 * @value: a string token input
3282 * @data: data associated to the token to reuse in callbacks
3283 *
3284 * Push one input token in the execution context
3285 *
3286 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3287 *     a negative value in case of error.
3288 */
3289int
3290xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
3291	             void *data) {
3292    return(xmlRegExecPushStringInternal(exec, value, data, 0));
3293}
3294
3295/**
3296 * xmlRegExecPushString2:
3297 * @exec: a regexp execution context or NULL to indicate the end
3298 * @value: the first string token input
3299 * @value2: the second string token input
3300 * @data: data associated to the token to reuse in callbacks
3301 *
3302 * Push one input token in the execution context
3303 *
3304 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3305 *     a negative value in case of error.
3306 */
3307int
3308xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
3309                      const xmlChar *value2, void *data) {
3310    xmlChar buf[150];
3311    int lenn, lenp, ret;
3312    xmlChar *str;
3313
3314    if (exec == NULL)
3315	return(-1);
3316    if (exec->comp == NULL)
3317	return(-1);
3318    if (exec->status != 0)
3319	return(exec->status);
3320
3321    if (value2 == NULL)
3322        return(xmlRegExecPushString(exec, value, data));
3323
3324    lenn = strlen((char *) value2);
3325    lenp = strlen((char *) value);
3326
3327    if (150 < lenn + lenp + 2) {
3328	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
3329	if (str == NULL) {
3330	    exec->status = -1;
3331	    return(-1);
3332	}
3333    } else {
3334	str = buf;
3335    }
3336    memcpy(&str[0], value, lenp);
3337    str[lenp] = XML_REG_STRING_SEPARATOR;
3338    memcpy(&str[lenp + 1], value2, lenn);
3339    str[lenn + lenp + 1] = 0;
3340
3341    if (exec->comp->compact != NULL)
3342	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
3343    else
3344        ret = xmlRegExecPushStringInternal(exec, str, data, 1);
3345
3346    if (str != buf)
3347        xmlFree(buf);
3348    return(ret);
3349}
3350
3351/**
3352 * xmlRegExecGetValues:
3353 * @exec: a regexp execution context
3354 * @err: error extraction or normal one
3355 * @nbval: pointer to the number of accepted values IN/OUT
3356 * @nbneg: return number of negative transitions
3357 * @values: pointer to the array of acceptable values
3358 * @terminal: return value if this was a terminal state
3359 *
3360 * Extract informations from the regexp execution, internal routine to
3361 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
3362 *
3363 * Returns: 0 in case of success or -1 in case of error.
3364 */
3365static int
3366xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
3367                    int *nbval, int *nbneg,
3368		    xmlChar **values, int *terminal) {
3369    int maxval;
3370    int nb = 0;
3371
3372    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
3373        (values == NULL) || (*nbval <= 0))
3374        return(-1);
3375
3376    maxval = *nbval;
3377    *nbval = 0;
3378    *nbneg = 0;
3379    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
3380        xmlRegexpPtr comp;
3381	int target, i, state;
3382
3383        comp = exec->comp;
3384
3385	if (err) {
3386	    if (exec->errStateNo == -1) return(-1);
3387	    state = exec->errStateNo;
3388	} else {
3389	    state = exec->index;
3390	}
3391	if (terminal != NULL) {
3392	    if (comp->compact[state * (comp->nbstrings + 1)] ==
3393	        XML_REGEXP_FINAL_STATE)
3394		*terminal = 1;
3395	    else
3396		*terminal = 0;
3397	}
3398	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3399	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3400	    if ((target > 0) && (target <= comp->nbstates) &&
3401	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
3402		 XML_REGEXP_SINK_STATE)) {
3403	        values[nb++] = comp->stringMap[i];
3404		(*nbval)++;
3405	    }
3406	}
3407	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3408	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3409	    if ((target > 0) && (target <= comp->nbstates) &&
3410	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
3411		 XML_REGEXP_SINK_STATE)) {
3412	        values[nb++] = comp->stringMap[i];
3413		(*nbneg)++;
3414	    }
3415	}
3416    } else {
3417        int transno;
3418	xmlRegTransPtr trans;
3419	xmlRegAtomPtr atom;
3420	xmlRegStatePtr state;
3421
3422	if (terminal != NULL) {
3423	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
3424		*terminal = 1;
3425	    else
3426		*terminal = 0;
3427	}
3428
3429	if (err) {
3430	    if (exec->errState == NULL) return(-1);
3431	    state = exec->errState;
3432	} else {
3433	    if (exec->state == NULL) return(-1);
3434	    state = exec->state;
3435	}
3436	for (transno = 0;
3437	     (transno < state->nbTrans) && (nb < maxval);
3438	     transno++) {
3439	    trans = &state->trans[transno];
3440	    if (trans->to < 0)
3441		continue;
3442	    atom = trans->atom;
3443	    if ((atom == NULL) || (atom->valuep == NULL))
3444		continue;
3445	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3446	        /* this should not be reached but ... */
3447	        TODO;
3448	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3449	        /* this should not be reached but ... */
3450	        TODO;
3451	    } else if (trans->counter >= 0) {
3452		xmlRegCounterPtr counter;
3453		int count;
3454
3455		if (err)
3456		    count = exec->errCounts[trans->counter];
3457		else
3458		    count = exec->counts[trans->counter];
3459		counter = &exec->comp->counters[trans->counter];
3460		if (count < counter->max) {
3461		    if (atom->neg)
3462			values[nb++] = (xmlChar *) atom->valuep2;
3463		    else
3464			values[nb++] = (xmlChar *) atom->valuep;
3465		    (*nbval)++;
3466		}
3467	    } else {
3468                if ((exec->comp->states[trans->to] != NULL) &&
3469		    (exec->comp->states[trans->to]->type !=
3470		     XML_REGEXP_SINK_STATE)) {
3471		    if (atom->neg)
3472			values[nb++] = (xmlChar *) atom->valuep2;
3473		    else
3474			values[nb++] = (xmlChar *) atom->valuep;
3475		    (*nbval)++;
3476		}
3477	    }
3478	}
3479	for (transno = 0;
3480	     (transno < state->nbTrans) && (nb < maxval);
3481	     transno++) {
3482	    trans = &state->trans[transno];
3483	    if (trans->to < 0)
3484		continue;
3485	    atom = trans->atom;
3486	    if ((atom == NULL) || (atom->valuep == NULL))
3487		continue;
3488	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3489	        continue;
3490	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3491	        continue;
3492	    } else if (trans->counter >= 0) {
3493	        continue;
3494	    } else {
3495                if ((exec->comp->states[trans->to] != NULL) &&
3496		    (exec->comp->states[trans->to]->type ==
3497		     XML_REGEXP_SINK_STATE)) {
3498		    if (atom->neg)
3499			values[nb++] = (xmlChar *) atom->valuep2;
3500		    else
3501			values[nb++] = (xmlChar *) atom->valuep;
3502		    (*nbneg)++;
3503		}
3504	    }
3505	}
3506    }
3507    return(0);
3508}
3509
3510/**
3511 * xmlRegExecNextValues:
3512 * @exec: a regexp execution context
3513 * @nbval: pointer to the number of accepted values IN/OUT
3514 * @nbneg: return number of negative transitions
3515 * @values: pointer to the array of acceptable values
3516 * @terminal: return value if this was a terminal state
3517 *
3518 * Extract informations from the regexp execution,
3519 * the parameter @values must point to an array of @nbval string pointers
3520 * on return nbval will contain the number of possible strings in that
3521 * state and the @values array will be updated with them. The string values
3522 * returned will be freed with the @exec context and don't need to be
3523 * deallocated.
3524 *
3525 * Returns: 0 in case of success or -1 in case of error.
3526 */
3527int
3528xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
3529                     xmlChar **values, int *terminal) {
3530    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
3531}
3532
3533/**
3534 * xmlRegExecErrInfo:
3535 * @exec: a regexp execution context generating an error
3536 * @string: return value for the error string
3537 * @nbval: pointer to the number of accepted values IN/OUT
3538 * @nbneg: return number of negative transitions
3539 * @values: pointer to the array of acceptable values
3540 * @terminal: return value if this was a terminal state
3541 *
3542 * Extract error informations from the regexp execution, the parameter
3543 * @string will be updated with the value pushed and not accepted,
3544 * the parameter @values must point to an array of @nbval string pointers
3545 * on return nbval will contain the number of possible strings in that
3546 * state and the @values array will be updated with them. The string values
3547 * returned will be freed with the @exec context and don't need to be
3548 * deallocated.
3549 *
3550 * Returns: 0 in case of success or -1 in case of error.
3551 */
3552int
3553xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
3554                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
3555    if (exec == NULL)
3556        return(-1);
3557    if (string != NULL) {
3558        if (exec->status != 0)
3559	    *string = exec->errString;
3560	else
3561	    *string = NULL;
3562    }
3563    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
3564}
3565
3566#ifdef DEBUG_ERR
3567static void testerr(xmlRegExecCtxtPtr exec) {
3568    const xmlChar *string;
3569    xmlChar *values[5];
3570    int nb = 5;
3571    int nbneg;
3572    int terminal;
3573    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
3574}
3575#endif
3576
3577#if 0
3578static int
3579xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
3580    xmlRegTransPtr trans;
3581    xmlRegAtomPtr atom;
3582    int ret;
3583    int codepoint, len;
3584
3585    if (exec == NULL)
3586	return(-1);
3587    if (exec->status != 0)
3588	return(exec->status);
3589
3590    while ((exec->status == 0) &&
3591	   ((exec->inputString[exec->index] != 0) ||
3592	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
3593
3594	/*
3595	 * End of input on non-terminal state, rollback, however we may
3596	 * still have epsilon like transition for counted transitions
3597	 * on counters, in that case don't break too early.
3598	 */
3599	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
3600	    goto rollback;
3601
3602	exec->transcount = 0;
3603	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3604	    trans = &exec->state->trans[exec->transno];
3605	    if (trans->to < 0)
3606		continue;
3607	    atom = trans->atom;
3608	    ret = 0;
3609	    if (trans->count >= 0) {
3610		int count;
3611		xmlRegCounterPtr counter;
3612
3613		/*
3614		 * A counted transition.
3615		 */
3616
3617		count = exec->counts[trans->count];
3618		counter = &exec->comp->counters[trans->count];
3619#ifdef DEBUG_REGEXP_EXEC
3620		printf("testing count %d: val %d, min %d, max %d\n",
3621		       trans->count, count, counter->min,  counter->max);
3622#endif
3623		ret = ((count >= counter->min) && (count <= counter->max));
3624	    } else if (atom == NULL) {
3625		fprintf(stderr, "epsilon transition left at runtime\n");
3626		exec->status = -2;
3627		break;
3628	    } else if (exec->inputString[exec->index] != 0) {
3629                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3630		ret = xmlRegCheckCharacter(atom, codepoint);
3631		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3632		    xmlRegStatePtr to = exec->comp->states[trans->to];
3633
3634		    /*
3635		     * this is a multiple input sequence
3636		     */
3637		    if (exec->state->nbTrans > exec->transno + 1) {
3638			xmlFARegExecSave(exec);
3639		    }
3640		    exec->transcount = 1;
3641		    do {
3642			/*
3643			 * Try to progress as much as possible on the input
3644			 */
3645			if (exec->transcount == atom->max) {
3646			    break;
3647			}
3648			exec->index += len;
3649			/*
3650			 * End of input: stop here
3651			 */
3652			if (exec->inputString[exec->index] == 0) {
3653			    exec->index -= len;
3654			    break;
3655			}
3656			if (exec->transcount >= atom->min) {
3657			    int transno = exec->transno;
3658			    xmlRegStatePtr state = exec->state;
3659
3660			    /*
3661			     * The transition is acceptable save it
3662			     */
3663			    exec->transno = -1; /* trick */
3664			    exec->state = to;
3665			    xmlFARegExecSave(exec);
3666			    exec->transno = transno;
3667			    exec->state = state;
3668			}
3669			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3670				              len);
3671			ret = xmlRegCheckCharacter(atom, codepoint);
3672			exec->transcount++;
3673		    } while (ret == 1);
3674		    if (exec->transcount < atom->min)
3675			ret = 0;
3676
3677		    /*
3678		     * If the last check failed but one transition was found
3679		     * possible, rollback
3680		     */
3681		    if (ret < 0)
3682			ret = 0;
3683		    if (ret == 0) {
3684			goto rollback;
3685		    }
3686		}
3687	    }
3688	    if (ret == 1) {
3689		if (exec->state->nbTrans > exec->transno + 1) {
3690		    xmlFARegExecSave(exec);
3691		}
3692		if (trans->counter >= 0) {
3693#ifdef DEBUG_REGEXP_EXEC
3694		    printf("Increasing count %d\n", trans->counter);
3695#endif
3696		    exec->counts[trans->counter]++;
3697		}
3698#ifdef DEBUG_REGEXP_EXEC
3699		printf("entering state %d\n", trans->to);
3700#endif
3701		exec->state = exec->comp->states[trans->to];
3702		exec->transno = 0;
3703		if (trans->atom != NULL) {
3704		    exec->index += len;
3705		}
3706		goto progress;
3707	    } else if (ret < 0) {
3708		exec->status = -4;
3709		break;
3710	    }
3711	}
3712	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3713rollback:
3714	    /*
3715	     * Failed to find a way out
3716	     */
3717	    exec->determinist = 0;
3718	    xmlFARegExecRollBack(exec);
3719	}
3720progress:
3721	continue;
3722    }
3723}
3724#endif
3725/************************************************************************
3726 * 									*
3727 *	Parser for the Schemas Datatype Regular Expressions		*
3728 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
3729 * 									*
3730 ************************************************************************/
3731
3732/**
3733 * xmlFAIsChar:
3734 * @ctxt:  a regexp parser context
3735 *
3736 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
3737 */
3738static int
3739xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
3740    int cur;
3741    int len;
3742
3743    cur = CUR_SCHAR(ctxt->cur, len);
3744    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
3745	(cur == '*') || (cur == '+') || (cur == '(') ||
3746	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
3747	(cur == 0x5D) || (cur == 0))
3748	return(-1);
3749    return(cur);
3750}
3751
3752/**
3753 * xmlFAParseCharProp:
3754 * @ctxt:  a regexp parser context
3755 *
3756 * [27]   charProp   ::=   IsCategory | IsBlock
3757 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
3758 *                       Separators | Symbols | Others
3759 * [29]   Letters   ::=   'L' [ultmo]?
3760 * [30]   Marks   ::=   'M' [nce]?
3761 * [31]   Numbers   ::=   'N' [dlo]?
3762 * [32]   Punctuation   ::=   'P' [cdseifo]?
3763 * [33]   Separators   ::=   'Z' [slp]?
3764 * [34]   Symbols   ::=   'S' [mcko]?
3765 * [35]   Others   ::=   'C' [cfon]?
3766 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
3767 */
3768static void
3769xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
3770    int cur;
3771    xmlRegAtomType type = (xmlRegAtomType) 0;
3772    xmlChar *blockName = NULL;
3773
3774    cur = CUR;
3775    if (cur == 'L') {
3776	NEXT;
3777	cur = CUR;
3778	if (cur == 'u') {
3779	    NEXT;
3780	    type = XML_REGEXP_LETTER_UPPERCASE;
3781	} else if (cur == 'l') {
3782	    NEXT;
3783	    type = XML_REGEXP_LETTER_LOWERCASE;
3784	} else if (cur == 't') {
3785	    NEXT;
3786	    type = XML_REGEXP_LETTER_TITLECASE;
3787	} else if (cur == 'm') {
3788	    NEXT;
3789	    type = XML_REGEXP_LETTER_MODIFIER;
3790	} else if (cur == 'o') {
3791	    NEXT;
3792	    type = XML_REGEXP_LETTER_OTHERS;
3793	} else {
3794	    type = XML_REGEXP_LETTER;
3795	}
3796    } else if (cur == 'M') {
3797	NEXT;
3798	cur = CUR;
3799	if (cur == 'n') {
3800	    NEXT;
3801	    /* nonspacing */
3802	    type = XML_REGEXP_MARK_NONSPACING;
3803	} else if (cur == 'c') {
3804	    NEXT;
3805	    /* spacing combining */
3806	    type = XML_REGEXP_MARK_SPACECOMBINING;
3807	} else if (cur == 'e') {
3808	    NEXT;
3809	    /* enclosing */
3810	    type = XML_REGEXP_MARK_ENCLOSING;
3811	} else {
3812	    /* all marks */
3813	    type = XML_REGEXP_MARK;
3814	}
3815    } else if (cur == 'N') {
3816	NEXT;
3817	cur = CUR;
3818	if (cur == 'd') {
3819	    NEXT;
3820	    /* digital */
3821	    type = XML_REGEXP_NUMBER_DECIMAL;
3822	} else if (cur == 'l') {
3823	    NEXT;
3824	    /* letter */
3825	    type = XML_REGEXP_NUMBER_LETTER;
3826	} else if (cur == 'o') {
3827	    NEXT;
3828	    /* other */
3829	    type = XML_REGEXP_NUMBER_OTHERS;
3830	} else {
3831	    /* all numbers */
3832	    type = XML_REGEXP_NUMBER;
3833	}
3834    } else if (cur == 'P') {
3835	NEXT;
3836	cur = CUR;
3837	if (cur == 'c') {
3838	    NEXT;
3839	    /* connector */
3840	    type = XML_REGEXP_PUNCT_CONNECTOR;
3841	} else if (cur == 'd') {
3842	    NEXT;
3843	    /* dash */
3844	    type = XML_REGEXP_PUNCT_DASH;
3845	} else if (cur == 's') {
3846	    NEXT;
3847	    /* open */
3848	    type = XML_REGEXP_PUNCT_OPEN;
3849	} else if (cur == 'e') {
3850	    NEXT;
3851	    /* close */
3852	    type = XML_REGEXP_PUNCT_CLOSE;
3853	} else if (cur == 'i') {
3854	    NEXT;
3855	    /* initial quote */
3856	    type = XML_REGEXP_PUNCT_INITQUOTE;
3857	} else if (cur == 'f') {
3858	    NEXT;
3859	    /* final quote */
3860	    type = XML_REGEXP_PUNCT_FINQUOTE;
3861	} else if (cur == 'o') {
3862	    NEXT;
3863	    /* other */
3864	    type = XML_REGEXP_PUNCT_OTHERS;
3865	} else {
3866	    /* all punctuation */
3867	    type = XML_REGEXP_PUNCT;
3868	}
3869    } else if (cur == 'Z') {
3870	NEXT;
3871	cur = CUR;
3872	if (cur == 's') {
3873	    NEXT;
3874	    /* space */
3875	    type = XML_REGEXP_SEPAR_SPACE;
3876	} else if (cur == 'l') {
3877	    NEXT;
3878	    /* line */
3879	    type = XML_REGEXP_SEPAR_LINE;
3880	} else if (cur == 'p') {
3881	    NEXT;
3882	    /* paragraph */
3883	    type = XML_REGEXP_SEPAR_PARA;
3884	} else {
3885	    /* all separators */
3886	    type = XML_REGEXP_SEPAR;
3887	}
3888    } else if (cur == 'S') {
3889	NEXT;
3890	cur = CUR;
3891	if (cur == 'm') {
3892	    NEXT;
3893	    type = XML_REGEXP_SYMBOL_MATH;
3894	    /* math */
3895	} else if (cur == 'c') {
3896	    NEXT;
3897	    type = XML_REGEXP_SYMBOL_CURRENCY;
3898	    /* currency */
3899	} else if (cur == 'k') {
3900	    NEXT;
3901	    type = XML_REGEXP_SYMBOL_MODIFIER;
3902	    /* modifiers */
3903	} else if (cur == 'o') {
3904	    NEXT;
3905	    type = XML_REGEXP_SYMBOL_OTHERS;
3906	    /* other */
3907	} else {
3908	    /* all symbols */
3909	    type = XML_REGEXP_SYMBOL;
3910	}
3911    } else if (cur == 'C') {
3912	NEXT;
3913	cur = CUR;
3914	if (cur == 'c') {
3915	    NEXT;
3916	    /* control */
3917	    type = XML_REGEXP_OTHER_CONTROL;
3918	} else if (cur == 'f') {
3919	    NEXT;
3920	    /* format */
3921	    type = XML_REGEXP_OTHER_FORMAT;
3922	} else if (cur == 'o') {
3923	    NEXT;
3924	    /* private use */
3925	    type = XML_REGEXP_OTHER_PRIVATE;
3926	} else if (cur == 'n') {
3927	    NEXT;
3928	    /* not assigned */
3929	    type = XML_REGEXP_OTHER_NA;
3930	} else {
3931	    /* all others */
3932	    type = XML_REGEXP_OTHER;
3933	}
3934    } else if (cur == 'I') {
3935	const xmlChar *start;
3936	NEXT;
3937	cur = CUR;
3938	if (cur != 's') {
3939	    ERROR("IsXXXX expected");
3940	    return;
3941	}
3942	NEXT;
3943	start = ctxt->cur;
3944	cur = CUR;
3945	if (((cur >= 'a') && (cur <= 'z')) ||
3946	    ((cur >= 'A') && (cur <= 'Z')) ||
3947	    ((cur >= '0') && (cur <= '9')) ||
3948	    (cur == 0x2D)) {
3949	    NEXT;
3950	    cur = CUR;
3951	    while (((cur >= 'a') && (cur <= 'z')) ||
3952		((cur >= 'A') && (cur <= 'Z')) ||
3953		((cur >= '0') && (cur <= '9')) ||
3954		(cur == 0x2D)) {
3955		NEXT;
3956		cur = CUR;
3957	    }
3958	}
3959	type = XML_REGEXP_BLOCK_NAME;
3960	blockName = xmlStrndup(start, ctxt->cur - start);
3961    } else {
3962	ERROR("Unknown char property");
3963	return;
3964    }
3965    if (ctxt->atom == NULL) {
3966	ctxt->atom = xmlRegNewAtom(ctxt, type);
3967	if (ctxt->atom != NULL)
3968	    ctxt->atom->valuep = blockName;
3969    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3970        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3971		           type, 0, 0, blockName);
3972    }
3973}
3974
3975/**
3976 * xmlFAParseCharClassEsc:
3977 * @ctxt:  a regexp parser context
3978 *
3979 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
3980 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
3981 * [25] catEsc   ::=   '\p{' charProp '}'
3982 * [26] complEsc ::=   '\P{' charProp '}'
3983 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
3984 */
3985static void
3986xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
3987    int cur;
3988
3989    if (CUR == '.') {
3990	if (ctxt->atom == NULL) {
3991	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
3992	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3993	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3994			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
3995	}
3996	NEXT;
3997	return;
3998    }
3999    if (CUR != '\\') {
4000	ERROR("Escaped sequence: expecting \\");
4001	return;
4002    }
4003    NEXT;
4004    cur = CUR;
4005    if (cur == 'p') {
4006	NEXT;
4007	if (CUR != '{') {
4008	    ERROR("Expecting '{'");
4009	    return;
4010	}
4011	NEXT;
4012	xmlFAParseCharProp(ctxt);
4013	if (CUR != '}') {
4014	    ERROR("Expecting '}'");
4015	    return;
4016	}
4017	NEXT;
4018    } else if (cur == 'P') {
4019	NEXT;
4020	if (CUR != '{') {
4021	    ERROR("Expecting '{'");
4022	    return;
4023	}
4024	NEXT;
4025	xmlFAParseCharProp(ctxt);
4026	ctxt->atom->neg = 1;
4027	if (CUR != '}') {
4028	    ERROR("Expecting '}'");
4029	    return;
4030	}
4031	NEXT;
4032    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
4033	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
4034	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
4035	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
4036	(cur == 0x5E)) {
4037	if (ctxt->atom == NULL) {
4038	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4039	    if (ctxt->atom != NULL) {
4040	        switch (cur) {
4041		    case 'n':
4042		        ctxt->atom->codepoint = '\n';
4043			break;
4044		    case 'r':
4045		        ctxt->atom->codepoint = '\r';
4046			break;
4047		    case 't':
4048		        ctxt->atom->codepoint = '\t';
4049			break;
4050		    default:
4051			ctxt->atom->codepoint = cur;
4052		}
4053	    }
4054	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4055	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4056			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4057	}
4058	NEXT;
4059    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4060	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4061	(cur == 'w') || (cur == 'W')) {
4062	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4063
4064	switch (cur) {
4065	    case 's':
4066		type = XML_REGEXP_ANYSPACE;
4067		break;
4068	    case 'S':
4069		type = XML_REGEXP_NOTSPACE;
4070		break;
4071	    case 'i':
4072		type = XML_REGEXP_INITNAME;
4073		break;
4074	    case 'I':
4075		type = XML_REGEXP_NOTINITNAME;
4076		break;
4077	    case 'c':
4078		type = XML_REGEXP_NAMECHAR;
4079		break;
4080	    case 'C':
4081		type = XML_REGEXP_NOTNAMECHAR;
4082		break;
4083	    case 'd':
4084		type = XML_REGEXP_DECIMAL;
4085		break;
4086	    case 'D':
4087		type = XML_REGEXP_NOTDECIMAL;
4088		break;
4089	    case 'w':
4090		type = XML_REGEXP_REALCHAR;
4091		break;
4092	    case 'W':
4093		type = XML_REGEXP_NOTREALCHAR;
4094		break;
4095	}
4096	NEXT;
4097	if (ctxt->atom == NULL) {
4098	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4099	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4100	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4101			       type, 0, 0, NULL);
4102	}
4103    }
4104}
4105
4106/**
4107 * xmlFAParseCharRef:
4108 * @ctxt:  a regexp parser context
4109 *
4110 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
4111 */
4112static int
4113xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
4114    int ret = 0, cur;
4115
4116    if ((CUR != '&') || (NXT(1) != '#'))
4117	return(-1);
4118    NEXT;
4119    NEXT;
4120    cur = CUR;
4121    if (cur == 'x') {
4122	NEXT;
4123	cur = CUR;
4124	if (((cur >= '0') && (cur <= '9')) ||
4125	    ((cur >= 'a') && (cur <= 'f')) ||
4126	    ((cur >= 'A') && (cur <= 'F'))) {
4127	    while (((cur >= '0') && (cur <= '9')) ||
4128		   ((cur >= 'A') && (cur <= 'F'))) {
4129		if ((cur >= '0') && (cur <= '9'))
4130		    ret = ret * 16 + cur - '0';
4131		else if ((cur >= 'a') && (cur <= 'f'))
4132		    ret = ret * 16 + 10 + (cur - 'a');
4133		else
4134		    ret = ret * 16 + 10 + (cur - 'A');
4135		NEXT;
4136		cur = CUR;
4137	    }
4138	} else {
4139	    ERROR("Char ref: expecting [0-9A-F]");
4140	    return(-1);
4141	}
4142    } else {
4143	if ((cur >= '0') && (cur <= '9')) {
4144	    while ((cur >= '0') && (cur <= '9')) {
4145		ret = ret * 10 + cur - '0';
4146		NEXT;
4147		cur = CUR;
4148	    }
4149	} else {
4150	    ERROR("Char ref: expecting [0-9]");
4151	    return(-1);
4152	}
4153    }
4154    if (cur != ';') {
4155	ERROR("Char ref: expecting ';'");
4156	return(-1);
4157    } else {
4158	NEXT;
4159    }
4160    return(ret);
4161}
4162
4163/**
4164 * xmlFAParseCharRange:
4165 * @ctxt:  a regexp parser context
4166 *
4167 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
4168 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
4169 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
4170 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
4171 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
4172 */
4173static void
4174xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4175    int cur, len;
4176    int start = -1;
4177    int end = -1;
4178
4179    if ((CUR == '&') && (NXT(1) == '#')) {
4180	end = start = xmlFAParseCharRef(ctxt);
4181        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4182	                   XML_REGEXP_CHARVAL, start, end, NULL);
4183	return;
4184    }
4185    cur = CUR;
4186    if (cur == '\\') {
4187	NEXT;
4188	cur = CUR;
4189	switch (cur) {
4190	    case 'n': start = 0xA; break;
4191	    case 'r': start = 0xD; break;
4192	    case 't': start = 0x9; break;
4193	    case '\\': case '|': case '.': case '-': case '^': case '?':
4194	    case '*': case '+': case '{': case '}': case '(': case ')':
4195	    case '[': case ']':
4196		start = cur; break;
4197	    default:
4198		ERROR("Invalid escape value");
4199		return;
4200	}
4201	end = start;
4202        len = 1;
4203    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4204        end = start = CUR_SCHAR(ctxt->cur, len);
4205    } else {
4206	ERROR("Expecting a char range");
4207	return;
4208    }
4209    NEXTL(len);
4210    if (start == '-') {
4211	return;
4212    }
4213    cur = CUR;
4214    if ((cur != '-') || (NXT(1) == ']')) {
4215        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4216		              XML_REGEXP_CHARVAL, start, end, NULL);
4217	return;
4218    }
4219    NEXT;
4220    cur = CUR;
4221    if (cur == '\\') {
4222	NEXT;
4223	cur = CUR;
4224	switch (cur) {
4225	    case 'n': end = 0xA; break;
4226	    case 'r': end = 0xD; break;
4227	    case 't': end = 0x9; break;
4228	    case '\\': case '|': case '.': case '-': case '^': case '?':
4229	    case '*': case '+': case '{': case '}': case '(': case ')':
4230	    case '[': case ']':
4231		end = cur; break;
4232	    default:
4233		ERROR("Invalid escape value");
4234		return;
4235	}
4236        len = 1;
4237    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4238        end = CUR_SCHAR(ctxt->cur, len);
4239    } else {
4240	ERROR("Expecting the end of a char range");
4241	return;
4242    }
4243    NEXTL(len);
4244    /* TODO check that the values are acceptable character ranges for XML */
4245    if (end < start) {
4246	ERROR("End of range is before start of range");
4247    } else {
4248        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4249		           XML_REGEXP_CHARVAL, start, end, NULL);
4250    }
4251    return;
4252}
4253
4254/**
4255 * xmlFAParsePosCharGroup:
4256 * @ctxt:  a regexp parser context
4257 *
4258 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
4259 */
4260static void
4261xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
4262    do {
4263	if ((CUR == '\\') || (CUR == '.')) {
4264	    xmlFAParseCharClassEsc(ctxt);
4265	} else {
4266	    xmlFAParseCharRange(ctxt);
4267	}
4268    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
4269	     (ctxt->error == 0));
4270}
4271
4272/**
4273 * xmlFAParseCharGroup:
4274 * @ctxt:  a regexp parser context
4275 *
4276 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
4277 * [15]   negCharGroup ::= '^' posCharGroup
4278 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
4279 * [12]   charClassExpr ::= '[' charGroup ']'
4280 */
4281static void
4282xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
4283    int n = ctxt->neg;
4284    while ((CUR != ']') && (ctxt->error == 0)) {
4285	if (CUR == '^') {
4286	    int neg = ctxt->neg;
4287
4288	    NEXT;
4289	    ctxt->neg = !ctxt->neg;
4290	    xmlFAParsePosCharGroup(ctxt);
4291	    ctxt->neg = neg;
4292	} else if ((CUR == '-') && (NXT(1) == '[')) {
4293	    int neg = ctxt->neg;
4294	    ctxt->neg = 2;
4295	    NEXT;	/* eat the '-' */
4296	    NEXT;	/* eat the '[' */
4297	    xmlFAParseCharGroup(ctxt);
4298	    if (CUR == ']') {
4299		NEXT;
4300	    } else {
4301		ERROR("charClassExpr: ']' expected");
4302		break;
4303	    }
4304	    ctxt->neg = neg;
4305	    break;
4306	} else if (CUR != ']') {
4307	    xmlFAParsePosCharGroup(ctxt);
4308	}
4309    }
4310    ctxt->neg = n;
4311}
4312
4313/**
4314 * xmlFAParseCharClass:
4315 * @ctxt:  a regexp parser context
4316 *
4317 * [11]   charClass   ::=     charClassEsc | charClassExpr
4318 * [12]   charClassExpr   ::=   '[' charGroup ']'
4319 */
4320static void
4321xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
4322    if (CUR == '[') {
4323	NEXT;
4324	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
4325	if (ctxt->atom == NULL)
4326	    return;
4327	xmlFAParseCharGroup(ctxt);
4328	if (CUR == ']') {
4329	    NEXT;
4330	} else {
4331	    ERROR("xmlFAParseCharClass: ']' expected");
4332	}
4333    } else {
4334	xmlFAParseCharClassEsc(ctxt);
4335    }
4336}
4337
4338/**
4339 * xmlFAParseQuantExact:
4340 * @ctxt:  a regexp parser context
4341 *
4342 * [8]   QuantExact   ::=   [0-9]+
4343 *
4344 * Returns 0 if success or -1 in case of error
4345 */
4346static int
4347xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
4348    int ret = 0;
4349    int ok = 0;
4350
4351    while ((CUR >= '0') && (CUR <= '9')) {
4352	ret = ret * 10 + (CUR - '0');
4353	ok = 1;
4354	NEXT;
4355    }
4356    if (ok != 1) {
4357	return(-1);
4358    }
4359    return(ret);
4360}
4361
4362/**
4363 * xmlFAParseQuantifier:
4364 * @ctxt:  a regexp parser context
4365 *
4366 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
4367 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
4368 * [6]   quantRange   ::=   QuantExact ',' QuantExact
4369 * [7]   quantMin   ::=   QuantExact ','
4370 * [8]   QuantExact   ::=   [0-9]+
4371 */
4372static int
4373xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
4374    int cur;
4375
4376    cur = CUR;
4377    if ((cur == '?') || (cur == '*') || (cur == '+')) {
4378	if (ctxt->atom != NULL) {
4379	    if (cur == '?')
4380		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
4381	    else if (cur == '*')
4382		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
4383	    else if (cur == '+')
4384		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
4385	}
4386	NEXT;
4387	return(1);
4388    }
4389    if (cur == '{') {
4390	int min = 0, max = 0;
4391
4392	NEXT;
4393	cur = xmlFAParseQuantExact(ctxt);
4394	if (cur >= 0)
4395	    min = cur;
4396	if (CUR == ',') {
4397	    NEXT;
4398	    if (CUR == '}')
4399	        max = INT_MAX;
4400	    else {
4401	        cur = xmlFAParseQuantExact(ctxt);
4402	        if (cur >= 0)
4403		    max = cur;
4404		else {
4405		    ERROR("Improper quantifier");
4406		}
4407	    }
4408	}
4409	if (CUR == '}') {
4410	    NEXT;
4411	} else {
4412	    ERROR("Unterminated quantifier");
4413	}
4414	if (max == 0)
4415	    max = min;
4416	if (ctxt->atom != NULL) {
4417	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
4418	    ctxt->atom->min = min;
4419	    ctxt->atom->max = max;
4420	}
4421	return(1);
4422    }
4423    return(0);
4424}
4425
4426/**
4427 * xmlFAParseAtom:
4428 * @ctxt:  a regexp parser context
4429 *
4430 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
4431 */
4432static int
4433xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
4434    int codepoint, len;
4435
4436    codepoint = xmlFAIsChar(ctxt);
4437    if (codepoint > 0) {
4438	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4439	if (ctxt->atom == NULL)
4440	    return(-1);
4441	codepoint = CUR_SCHAR(ctxt->cur, len);
4442	ctxt->atom->codepoint = codepoint;
4443	NEXTL(len);
4444	return(1);
4445    } else if (CUR == '|') {
4446	return(0);
4447    } else if (CUR == 0) {
4448	return(0);
4449    } else if (CUR == ')') {
4450	return(0);
4451    } else if (CUR == '(') {
4452	xmlRegStatePtr start, oldend;
4453
4454	NEXT;
4455	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
4456	start = ctxt->state;
4457	oldend = ctxt->end;
4458	ctxt->end = NULL;
4459	ctxt->atom = NULL;
4460	xmlFAParseRegExp(ctxt, 0);
4461	if (CUR == ')') {
4462	    NEXT;
4463	} else {
4464	    ERROR("xmlFAParseAtom: expecting ')'");
4465	}
4466	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
4467	if (ctxt->atom == NULL)
4468	    return(-1);
4469	ctxt->atom->start = start;
4470	ctxt->atom->stop = ctxt->state;
4471	ctxt->end = oldend;
4472	return(1);
4473    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
4474	xmlFAParseCharClass(ctxt);
4475	return(1);
4476    }
4477    return(0);
4478}
4479
4480/**
4481 * xmlFAParsePiece:
4482 * @ctxt:  a regexp parser context
4483 *
4484 * [3]   piece   ::=   atom quantifier?
4485 */
4486static int
4487xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
4488    int ret;
4489
4490    ctxt->atom = NULL;
4491    ret = xmlFAParseAtom(ctxt);
4492    if (ret == 0)
4493	return(0);
4494    if (ctxt->atom == NULL) {
4495	ERROR("internal: no atom generated");
4496    }
4497    xmlFAParseQuantifier(ctxt);
4498    return(1);
4499}
4500
4501/**
4502 * xmlFAParseBranch:
4503 * @ctxt:  a regexp parser context
4504 *
4505 * [2]   branch   ::=   piece*
4506 8
4507 */
4508static int
4509xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) {
4510    xmlRegStatePtr previous;
4511    int ret;
4512
4513    previous = ctxt->state;
4514    ret = xmlFAParsePiece(ctxt);
4515    if (ret != 0) {
4516	if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0)
4517	    return(-1);
4518	previous = ctxt->state;
4519	ctxt->atom = NULL;
4520    }
4521    while ((ret != 0) && (ctxt->error == 0)) {
4522	ret = xmlFAParsePiece(ctxt);
4523	if (ret != 0) {
4524	    if (xmlFAGenerateTransitions(ctxt, previous, NULL,
4525					 ctxt->atom) < 0)
4526		    return(-1);
4527	    previous = ctxt->state;
4528	    ctxt->atom = NULL;
4529	}
4530    }
4531    return(0);
4532}
4533
4534/**
4535 * xmlFAParseRegExp:
4536 * @ctxt:  a regexp parser context
4537 * @top:  is this the top-level expression ?
4538 *
4539 * [1]   regExp   ::=     branch  ( '|' branch )*
4540 */
4541static void
4542xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
4543    xmlRegStatePtr start, end;
4544
4545    /* if not top start should have been generated by an epsilon trans */
4546    start = ctxt->state;
4547    ctxt->end = NULL;
4548    xmlFAParseBranch(ctxt);
4549    if (top) {
4550#ifdef DEBUG_REGEXP_GRAPH
4551	printf("State %d is final\n", ctxt->state->no);
4552#endif
4553	ctxt->state->type = XML_REGEXP_FINAL_STATE;
4554    }
4555    if (CUR != '|') {
4556	ctxt->end = ctxt->state;
4557	return;
4558    }
4559    end = ctxt->state;
4560    while ((CUR == '|') && (ctxt->error == 0)) {
4561	NEXT;
4562	ctxt->state = start;
4563	ctxt->end = NULL;
4564	xmlFAParseBranch(ctxt);
4565	if (top) {
4566	    ctxt->state->type = XML_REGEXP_FINAL_STATE;
4567#ifdef DEBUG_REGEXP_GRAPH
4568	    printf("State %d is final\n", ctxt->state->no);
4569#endif
4570	} else {
4571	    xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end);
4572	}
4573    }
4574    if (!top) {
4575	ctxt->state = end;
4576	ctxt->end = end;
4577    }
4578}
4579
4580/************************************************************************
4581 * 									*
4582 * 			The basic API					*
4583 * 									*
4584 ************************************************************************/
4585
4586/**
4587 * xmlRegexpPrint:
4588 * @output: the file for the output debug
4589 * @regexp: the compiled regexp
4590 *
4591 * Print the content of the compiled regular expression
4592 */
4593void
4594xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
4595    int i;
4596
4597    if (output == NULL)
4598        return;
4599    fprintf(output, " regexp: ");
4600    if (regexp == NULL) {
4601	fprintf(output, "NULL\n");
4602	return;
4603    }
4604    fprintf(output, "'%s' ", regexp->string);
4605    fprintf(output, "\n");
4606    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
4607    for (i = 0;i < regexp->nbAtoms; i++) {
4608	fprintf(output, " %02d ", i);
4609	xmlRegPrintAtom(output, regexp->atoms[i]);
4610    }
4611    fprintf(output, "%d states:", regexp->nbStates);
4612    fprintf(output, "\n");
4613    for (i = 0;i < regexp->nbStates; i++) {
4614	xmlRegPrintState(output, regexp->states[i]);
4615    }
4616    fprintf(output, "%d counters:\n", regexp->nbCounters);
4617    for (i = 0;i < regexp->nbCounters; i++) {
4618	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
4619		                                regexp->counters[i].max);
4620    }
4621}
4622
4623/**
4624 * xmlRegexpCompile:
4625 * @regexp:  a regular expression string
4626 *
4627 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
4628 * Appendix F and builds an automata suitable for testing strings against
4629 * that regular expression
4630 *
4631 * Returns the compiled expression or NULL in case of error
4632 */
4633xmlRegexpPtr
4634xmlRegexpCompile(const xmlChar *regexp) {
4635    xmlRegexpPtr ret;
4636    xmlRegParserCtxtPtr ctxt;
4637
4638    ctxt = xmlRegNewParserCtxt(regexp);
4639    if (ctxt == NULL)
4640	return(NULL);
4641
4642    /* initialize the parser */
4643    ctxt->end = NULL;
4644    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
4645    xmlRegStatePush(ctxt, ctxt->start);
4646
4647    /* parse the expression building an automata */
4648    xmlFAParseRegExp(ctxt, 1);
4649    if (CUR != 0) {
4650	ERROR("xmlFAParseRegExp: extra characters");
4651    }
4652    ctxt->end = ctxt->state;
4653    ctxt->start->type = XML_REGEXP_START_STATE;
4654    ctxt->end->type = XML_REGEXP_FINAL_STATE;
4655
4656    /* remove the Epsilon except for counted transitions */
4657    xmlFAEliminateEpsilonTransitions(ctxt);
4658
4659
4660    if (ctxt->error != 0) {
4661	xmlRegFreeParserCtxt(ctxt);
4662	return(NULL);
4663    }
4664    ret = xmlRegEpxFromParse(ctxt);
4665    xmlRegFreeParserCtxt(ctxt);
4666    return(ret);
4667}
4668
4669/**
4670 * xmlRegexpExec:
4671 * @comp:  the compiled regular expression
4672 * @content:  the value to check against the regular expression
4673 *
4674 * Check if the regular expression generates the value
4675 *
4676 * Returns 1 if it matches, 0 if not and a negative value in case of error
4677 */
4678int
4679xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
4680    if ((comp == NULL) || (content == NULL))
4681	return(-1);
4682    return(xmlFARegExec(comp, content));
4683}
4684
4685/**
4686 * xmlRegexpIsDeterminist:
4687 * @comp:  the compiled regular expression
4688 *
4689 * Check if the regular expression is determinist
4690 *
4691 * Returns 1 if it yes, 0 if not and a negative value in case of error
4692 */
4693int
4694xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
4695    xmlAutomataPtr am;
4696    int ret;
4697
4698    if (comp == NULL)
4699	return(-1);
4700    if (comp->determinist != -1)
4701	return(comp->determinist);
4702
4703    am = xmlNewAutomata();
4704    if (am->states != NULL) {
4705	int i;
4706
4707	for (i = 0;i < am->nbStates;i++)
4708	    xmlRegFreeState(am->states[i]);
4709	xmlFree(am->states);
4710    }
4711    am->nbAtoms = comp->nbAtoms;
4712    am->atoms = comp->atoms;
4713    am->nbStates = comp->nbStates;
4714    am->states = comp->states;
4715    am->determinist = -1;
4716    ret = xmlFAComputesDeterminism(am);
4717    am->atoms = NULL;
4718    am->states = NULL;
4719    xmlFreeAutomata(am);
4720    return(ret);
4721}
4722
4723/**
4724 * xmlRegFreeRegexp:
4725 * @regexp:  the regexp
4726 *
4727 * Free a regexp
4728 */
4729void
4730xmlRegFreeRegexp(xmlRegexpPtr regexp) {
4731    int i;
4732    if (regexp == NULL)
4733	return;
4734
4735    if (regexp->string != NULL)
4736	xmlFree(regexp->string);
4737    if (regexp->states != NULL) {
4738	for (i = 0;i < regexp->nbStates;i++)
4739	    xmlRegFreeState(regexp->states[i]);
4740	xmlFree(regexp->states);
4741    }
4742    if (regexp->atoms != NULL) {
4743	for (i = 0;i < regexp->nbAtoms;i++)
4744	    xmlRegFreeAtom(regexp->atoms[i]);
4745	xmlFree(regexp->atoms);
4746    }
4747    if (regexp->counters != NULL)
4748	xmlFree(regexp->counters);
4749    if (regexp->compact != NULL)
4750	xmlFree(regexp->compact);
4751    if (regexp->transdata != NULL)
4752	xmlFree(regexp->transdata);
4753    if (regexp->stringMap != NULL) {
4754	for (i = 0; i < regexp->nbstrings;i++)
4755	    xmlFree(regexp->stringMap[i]);
4756	xmlFree(regexp->stringMap);
4757    }
4758
4759    xmlFree(regexp);
4760}
4761
4762#ifdef LIBXML_AUTOMATA_ENABLED
4763/************************************************************************
4764 * 									*
4765 * 			The Automata interface				*
4766 * 									*
4767 ************************************************************************/
4768
4769/**
4770 * xmlNewAutomata:
4771 *
4772 * Create a new automata
4773 *
4774 * Returns the new object or NULL in case of failure
4775 */
4776xmlAutomataPtr
4777xmlNewAutomata(void) {
4778    xmlAutomataPtr ctxt;
4779
4780    ctxt = xmlRegNewParserCtxt(NULL);
4781    if (ctxt == NULL)
4782	return(NULL);
4783
4784    /* initialize the parser */
4785    ctxt->end = NULL;
4786    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
4787    ctxt->start->type = XML_REGEXP_START_STATE;
4788    if (ctxt->start == NULL) {
4789	xmlFreeAutomata(ctxt);
4790	return(NULL);
4791    }
4792    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
4793        xmlRegFreeState(ctxt->start);
4794	xmlFreeAutomata(ctxt);
4795	return(NULL);
4796    }
4797
4798    return(ctxt);
4799}
4800
4801/**
4802 * xmlFreeAutomata:
4803 * @am: an automata
4804 *
4805 * Free an automata
4806 */
4807void
4808xmlFreeAutomata(xmlAutomataPtr am) {
4809    if (am == NULL)
4810	return;
4811    xmlRegFreeParserCtxt(am);
4812}
4813
4814/**
4815 * xmlAutomataGetInitState:
4816 * @am: an automata
4817 *
4818 * Initial state lookup
4819 *
4820 * Returns the initial state of the automata
4821 */
4822xmlAutomataStatePtr
4823xmlAutomataGetInitState(xmlAutomataPtr am) {
4824    if (am == NULL)
4825	return(NULL);
4826    return(am->start);
4827}
4828
4829/**
4830 * xmlAutomataSetFinalState:
4831 * @am: an automata
4832 * @state: a state in this automata
4833 *
4834 * Makes that state a final state
4835 *
4836 * Returns 0 or -1 in case of error
4837 */
4838int
4839xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
4840    if ((am == NULL) || (state == NULL))
4841	return(-1);
4842    state->type = XML_REGEXP_FINAL_STATE;
4843    return(0);
4844}
4845
4846/**
4847 * xmlAutomataNewTransition:
4848 * @am: an automata
4849 * @from: the starting point of the transition
4850 * @to: the target point of the transition or NULL
4851 * @token: the input string associated to that transition
4852 * @data: data passed to the callback function if the transition is activated
4853 *
4854 * If @to is NULL, this creates first a new target state in the automata
4855 * and then adds a transition from the @from state to the target state
4856 * activated by the value of @token
4857 *
4858 * Returns the target state or NULL in case of error
4859 */
4860xmlAutomataStatePtr
4861xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
4862			 xmlAutomataStatePtr to, const xmlChar *token,
4863			 void *data) {
4864    xmlRegAtomPtr atom;
4865
4866    if ((am == NULL) || (from == NULL) || (token == NULL))
4867	return(NULL);
4868    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4869    if (atom == NULL)
4870        return(NULL);
4871    atom->data = data;
4872    if (atom == NULL)
4873	return(NULL);
4874    atom->valuep = xmlStrdup(token);
4875
4876    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
4877        xmlRegFreeAtom(atom);
4878	return(NULL);
4879    }
4880    if (to == NULL)
4881	return(am->state);
4882    return(to);
4883}
4884
4885/**
4886 * xmlAutomataNewTransition2:
4887 * @am: an automata
4888 * @from: the starting point of the transition
4889 * @to: the target point of the transition or NULL
4890 * @token: the first input string associated to that transition
4891 * @token2: the second input string associated to that transition
4892 * @data: data passed to the callback function if the transition is activated
4893 *
4894 * If @to is NULL, this creates first a new target state in the automata
4895 * and then adds a transition from the @from state to the target state
4896 * activated by the value of @token
4897 *
4898 * Returns the target state or NULL in case of error
4899 */
4900xmlAutomataStatePtr
4901xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
4902			  xmlAutomataStatePtr to, const xmlChar *token,
4903			  const xmlChar *token2, void *data) {
4904    xmlRegAtomPtr atom;
4905
4906    if ((am == NULL) || (from == NULL) || (token == NULL))
4907	return(NULL);
4908    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4909    atom->data = data;
4910    if (atom == NULL)
4911	return(NULL);
4912    if ((token2 == NULL) || (*token2 == 0)) {
4913	atom->valuep = xmlStrdup(token);
4914    } else {
4915	int lenn, lenp;
4916	xmlChar *str;
4917
4918	lenn = strlen((char *) token2);
4919	lenp = strlen((char *) token);
4920
4921	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4922	if (str == NULL) {
4923	    xmlRegFreeAtom(atom);
4924	    return(NULL);
4925	}
4926	memcpy(&str[0], token, lenp);
4927	str[lenp] = '|';
4928	memcpy(&str[lenp + 1], token2, lenn);
4929	str[lenn + lenp + 1] = 0;
4930
4931	atom->valuep = str;
4932    }
4933
4934    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
4935        xmlRegFreeAtom(atom);
4936	return(NULL);
4937    }
4938    if (to == NULL)
4939	return(am->state);
4940    return(to);
4941}
4942
4943/**
4944 * xmlAutomataNewNegTrans:
4945 * @am: an automata
4946 * @from: the starting point of the transition
4947 * @to: the target point of the transition or NULL
4948 * @token: the first input string associated to that transition
4949 * @token2: the second input string associated to that transition
4950 * @data: data passed to the callback function if the transition is activated
4951 *
4952 * If @to is NULL, this creates first a new target state in the automata
4953 * and then adds a transition from the @from state to the target state
4954 * activated by any value except (@token,@token2)
4955 * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
4956 # the semantic of XSD ##other
4957 *
4958 * Returns the target state or NULL in case of error
4959 */
4960xmlAutomataStatePtr
4961xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4962		       xmlAutomataStatePtr to, const xmlChar *token,
4963		       const xmlChar *token2, void *data) {
4964    xmlRegAtomPtr atom;
4965    xmlChar err_msg[200];
4966
4967    if ((am == NULL) || (from == NULL) || (token == NULL))
4968	return(NULL);
4969    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4970    if (atom == NULL)
4971	return(NULL);
4972    atom->data = data;
4973    atom->neg = 1;
4974    if ((token2 == NULL) || (*token2 == 0)) {
4975	atom->valuep = xmlStrdup(token);
4976    } else {
4977	int lenn, lenp;
4978	xmlChar *str;
4979
4980	lenn = strlen((char *) token2);
4981	lenp = strlen((char *) token);
4982
4983	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4984	if (str == NULL) {
4985	    xmlRegFreeAtom(atom);
4986	    return(NULL);
4987	}
4988	memcpy(&str[0], token, lenp);
4989	str[lenp] = '|';
4990	memcpy(&str[lenp + 1], token2, lenn);
4991	str[lenn + lenp + 1] = 0;
4992
4993	atom->valuep = str;
4994    }
4995    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
4996    err_msg[199] = 0;
4997    atom->valuep2 = xmlStrdup(err_msg);
4998
4999    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
5000        xmlRegFreeAtom(atom);
5001	return(NULL);
5002    }
5003    am->negs++;
5004    if (to == NULL)
5005	return(am->state);
5006    return(to);
5007}
5008
5009/**
5010 * xmlAutomataNewCountTrans2:
5011 * @am: an automata
5012 * @from: the starting point of the transition
5013 * @to: the target point of the transition or NULL
5014 * @token: the input string associated to that transition
5015 * @token2: the second input string associated to that transition
5016 * @min:  the minimum successive occurences of token
5017 * @max:  the maximum successive occurences of token
5018 * @data:  data associated to the transition
5019 *
5020 * If @to is NULL, this creates first a new target state in the automata
5021 * and then adds a transition from the @from state to the target state
5022 * activated by a succession of input of value @token and @token2 and
5023 * whose number is between @min and @max
5024 *
5025 * Returns the target state or NULL in case of error
5026 */
5027xmlAutomataStatePtr
5028xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5029			 xmlAutomataStatePtr to, const xmlChar *token,
5030			 const xmlChar *token2,
5031			 int min, int max, void *data) {
5032    xmlRegAtomPtr atom;
5033    int counter;
5034
5035    if ((am == NULL) || (from == NULL) || (token == NULL))
5036	return(NULL);
5037    if (min < 0)
5038	return(NULL);
5039    if ((max < min) || (max < 1))
5040	return(NULL);
5041    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5042    if (atom == NULL)
5043	return(NULL);
5044    if ((token2 == NULL) || (*token2 == 0)) {
5045	atom->valuep = xmlStrdup(token);
5046    } else {
5047	int lenn, lenp;
5048	xmlChar *str;
5049
5050	lenn = strlen((char *) token2);
5051	lenp = strlen((char *) token);
5052
5053	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5054	if (str == NULL) {
5055	    xmlRegFreeAtom(atom);
5056	    return(NULL);
5057	}
5058	memcpy(&str[0], token, lenp);
5059	str[lenp] = '|';
5060	memcpy(&str[lenp + 1], token2, lenn);
5061	str[lenn + lenp + 1] = 0;
5062
5063	atom->valuep = str;
5064    }
5065    atom->data = data;
5066    if (min == 0)
5067	atom->min = 1;
5068    else
5069	atom->min = min;
5070    atom->max = max;
5071
5072    /*
5073     * associate a counter to the transition.
5074     */
5075    counter = xmlRegGetCounter(am);
5076    am->counters[counter].min = min;
5077    am->counters[counter].max = max;
5078
5079    /* xmlFAGenerateTransitions(am, from, to, atom); */
5080    if (to == NULL) {
5081        to = xmlRegNewState(am);
5082	xmlRegStatePush(am, to);
5083    }
5084    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5085    xmlRegAtomPush(am, atom);
5086    am->state = to;
5087
5088    if (to == NULL)
5089	to = am->state;
5090    if (to == NULL)
5091	return(NULL);
5092    if (min == 0)
5093	xmlFAGenerateEpsilonTransition(am, from, to);
5094    return(to);
5095}
5096
5097/**
5098 * xmlAutomataNewCountTrans:
5099 * @am: an automata
5100 * @from: the starting point of the transition
5101 * @to: the target point of the transition or NULL
5102 * @token: the input string associated to that transition
5103 * @min:  the minimum successive occurences of token
5104 * @max:  the maximum successive occurences of token
5105 * @data:  data associated to the transition
5106 *
5107 * If @to is NULL, this creates first a new target state in the automata
5108 * and then adds a transition from the @from state to the target state
5109 * activated by a succession of input of value @token and whose number
5110 * is between @min and @max
5111 *
5112 * Returns the target state or NULL in case of error
5113 */
5114xmlAutomataStatePtr
5115xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5116			 xmlAutomataStatePtr to, const xmlChar *token,
5117			 int min, int max, void *data) {
5118    xmlRegAtomPtr atom;
5119    int counter;
5120
5121    if ((am == NULL) || (from == NULL) || (token == NULL))
5122	return(NULL);
5123    if (min < 0)
5124	return(NULL);
5125    if ((max < min) || (max < 1))
5126	return(NULL);
5127    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5128    if (atom == NULL)
5129	return(NULL);
5130    atom->valuep = xmlStrdup(token);
5131    atom->data = data;
5132    if (min == 0)
5133	atom->min = 1;
5134    else
5135	atom->min = min;
5136    atom->max = max;
5137
5138    /*
5139     * associate a counter to the transition.
5140     */
5141    counter = xmlRegGetCounter(am);
5142    am->counters[counter].min = min;
5143    am->counters[counter].max = max;
5144
5145    /* xmlFAGenerateTransitions(am, from, to, atom); */
5146    if (to == NULL) {
5147        to = xmlRegNewState(am);
5148	xmlRegStatePush(am, to);
5149    }
5150    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5151    xmlRegAtomPush(am, atom);
5152    am->state = to;
5153
5154    if (to == NULL)
5155	to = am->state;
5156    if (to == NULL)
5157	return(NULL);
5158    if (min == 0)
5159	xmlFAGenerateEpsilonTransition(am, from, to);
5160    return(to);
5161}
5162
5163/**
5164 * xmlAutomataNewOnceTrans2:
5165 * @am: an automata
5166 * @from: the starting point of the transition
5167 * @to: the target point of the transition or NULL
5168 * @token: the input string associated to that transition
5169 * @token2: the second input string associated to that transition
5170 * @min:  the minimum successive occurences of token
5171 * @max:  the maximum successive occurences of token
5172 * @data:  data associated to the transition
5173 *
5174 * If @to is NULL, this creates first a new target state in the automata
5175 * and then adds a transition from the @from state to the target state
5176 * activated by a succession of input of value @token and @token2 and whose
5177 * number is between @min and @max, moreover that transition can only be
5178 * crossed once.
5179 *
5180 * Returns the target state or NULL in case of error
5181 */
5182xmlAutomataStatePtr
5183xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5184			 xmlAutomataStatePtr to, const xmlChar *token,
5185			 const xmlChar *token2,
5186			 int min, int max, void *data) {
5187    xmlRegAtomPtr atom;
5188    int counter;
5189
5190    if ((am == NULL) || (from == NULL) || (token == NULL))
5191	return(NULL);
5192    if (min < 1)
5193	return(NULL);
5194    if ((max < min) || (max < 1))
5195	return(NULL);
5196    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5197    if (atom == NULL)
5198	return(NULL);
5199    if ((token2 == NULL) || (*token2 == 0)) {
5200	atom->valuep = xmlStrdup(token);
5201    } else {
5202	int lenn, lenp;
5203	xmlChar *str;
5204
5205	lenn = strlen((char *) token2);
5206	lenp = strlen((char *) token);
5207
5208	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5209	if (str == NULL) {
5210	    xmlRegFreeAtom(atom);
5211	    return(NULL);
5212	}
5213	memcpy(&str[0], token, lenp);
5214	str[lenp] = '|';
5215	memcpy(&str[lenp + 1], token2, lenn);
5216	str[lenn + lenp + 1] = 0;
5217
5218	atom->valuep = str;
5219    }
5220    atom->data = data;
5221    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5222    if (min == 0)
5223	atom->min = 1;
5224    else
5225	atom->min = min;
5226    atom->max = max;
5227    /*
5228     * associate a counter to the transition.
5229     */
5230    counter = xmlRegGetCounter(am);
5231    am->counters[counter].min = 1;
5232    am->counters[counter].max = 1;
5233
5234    /* xmlFAGenerateTransitions(am, from, to, atom); */
5235    if (to == NULL) {
5236	to = xmlRegNewState(am);
5237	xmlRegStatePush(am, to);
5238    }
5239    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5240    xmlRegAtomPush(am, atom);
5241    am->state = to;
5242    return(to);
5243}
5244
5245
5246
5247/**
5248 * xmlAutomataNewOnceTrans:
5249 * @am: an automata
5250 * @from: the starting point of the transition
5251 * @to: the target point of the transition or NULL
5252 * @token: the input string associated to that transition
5253 * @min:  the minimum successive occurences of token
5254 * @max:  the maximum successive occurences of token
5255 * @data:  data associated to the transition
5256 *
5257 * If @to is NULL, this creates first a new target state in the automata
5258 * and then adds a transition from the @from state to the target state
5259 * activated by a succession of input of value @token and whose number
5260 * is between @min and @max, moreover that transition can only be crossed
5261 * once.
5262 *
5263 * Returns the target state or NULL in case of error
5264 */
5265xmlAutomataStatePtr
5266xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5267			 xmlAutomataStatePtr to, const xmlChar *token,
5268			 int min, int max, void *data) {
5269    xmlRegAtomPtr atom;
5270    int counter;
5271
5272    if ((am == NULL) || (from == NULL) || (token == NULL))
5273	return(NULL);
5274    if (min < 1)
5275	return(NULL);
5276    if ((max < min) || (max < 1))
5277	return(NULL);
5278    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5279    if (atom == NULL)
5280	return(NULL);
5281    atom->valuep = xmlStrdup(token);
5282    atom->data = data;
5283    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5284    if (min == 0)
5285	atom->min = 1;
5286    else
5287	atom->min = min;
5288    atom->max = max;
5289    /*
5290     * associate a counter to the transition.
5291     */
5292    counter = xmlRegGetCounter(am);
5293    am->counters[counter].min = 1;
5294    am->counters[counter].max = 1;
5295
5296    /* xmlFAGenerateTransitions(am, from, to, atom); */
5297    if (to == NULL) {
5298	to = xmlRegNewState(am);
5299	xmlRegStatePush(am, to);
5300    }
5301    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5302    xmlRegAtomPush(am, atom);
5303    am->state = to;
5304    return(to);
5305}
5306
5307/**
5308 * xmlAutomataNewState:
5309 * @am: an automata
5310 *
5311 * Create a new disconnected state in the automata
5312 *
5313 * Returns the new state or NULL in case of error
5314 */
5315xmlAutomataStatePtr
5316xmlAutomataNewState(xmlAutomataPtr am) {
5317    xmlAutomataStatePtr to;
5318
5319    if (am == NULL)
5320	return(NULL);
5321    to = xmlRegNewState(am);
5322    xmlRegStatePush(am, to);
5323    return(to);
5324}
5325
5326/**
5327 * xmlAutomataNewEpsilon:
5328 * @am: an automata
5329 * @from: the starting point of the transition
5330 * @to: the target point of the transition or NULL
5331 *
5332 * If @to is NULL, this creates first a new target state in the automata
5333 * and then adds an epsilon transition from the @from state to the
5334 * target state
5335 *
5336 * Returns the target state or NULL in case of error
5337 */
5338xmlAutomataStatePtr
5339xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
5340		      xmlAutomataStatePtr to) {
5341    if ((am == NULL) || (from == NULL))
5342	return(NULL);
5343    xmlFAGenerateEpsilonTransition(am, from, to);
5344    if (to == NULL)
5345	return(am->state);
5346    return(to);
5347}
5348
5349/**
5350 * xmlAutomataNewAllTrans:
5351 * @am: an automata
5352 * @from: the starting point of the transition
5353 * @to: the target point of the transition or NULL
5354 * @lax: allow to transition if not all all transitions have been activated
5355 *
5356 * If @to is NULL, this creates first a new target state in the automata
5357 * and then adds a an ALL transition from the @from state to the
5358 * target state. That transition is an epsilon transition allowed only when
5359 * all transitions from the @from node have been activated.
5360 *
5361 * Returns the target state or NULL in case of error
5362 */
5363xmlAutomataStatePtr
5364xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5365		       xmlAutomataStatePtr to, int lax) {
5366    if ((am == NULL) || (from == NULL))
5367	return(NULL);
5368    xmlFAGenerateAllTransition(am, from, to, lax);
5369    if (to == NULL)
5370	return(am->state);
5371    return(to);
5372}
5373
5374/**
5375 * xmlAutomataNewCounter:
5376 * @am: an automata
5377 * @min:  the minimal value on the counter
5378 * @max:  the maximal value on the counter
5379 *
5380 * Create a new counter
5381 *
5382 * Returns the counter number or -1 in case of error
5383 */
5384int
5385xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
5386    int ret;
5387
5388    if (am == NULL)
5389	return(-1);
5390
5391    ret = xmlRegGetCounter(am);
5392    if (ret < 0)
5393	return(-1);
5394    am->counters[ret].min = min;
5395    am->counters[ret].max = max;
5396    return(ret);
5397}
5398
5399/**
5400 * xmlAutomataNewCountedTrans:
5401 * @am: an automata
5402 * @from: the starting point of the transition
5403 * @to: the target point of the transition or NULL
5404 * @counter: the counter associated to that transition
5405 *
5406 * If @to is NULL, this creates first a new target state in the automata
5407 * and then adds an epsilon transition from the @from state to the target state
5408 * which will increment the counter provided
5409 *
5410 * Returns the target state or NULL in case of error
5411 */
5412xmlAutomataStatePtr
5413xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5414		xmlAutomataStatePtr to, int counter) {
5415    if ((am == NULL) || (from == NULL) || (counter < 0))
5416	return(NULL);
5417    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
5418    if (to == NULL)
5419	return(am->state);
5420    return(to);
5421}
5422
5423/**
5424 * xmlAutomataNewCounterTrans:
5425 * @am: an automata
5426 * @from: the starting point of the transition
5427 * @to: the target point of the transition or NULL
5428 * @counter: the counter associated to that transition
5429 *
5430 * If @to is NULL, this creates first a new target state in the automata
5431 * and then adds an epsilon transition from the @from state to the target state
5432 * which will be allowed only if the counter is within the right range.
5433 *
5434 * Returns the target state or NULL in case of error
5435 */
5436xmlAutomataStatePtr
5437xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5438		xmlAutomataStatePtr to, int counter) {
5439    if ((am == NULL) || (from == NULL) || (counter < 0))
5440	return(NULL);
5441    xmlFAGenerateCountedTransition(am, from, to, counter);
5442    if (to == NULL)
5443	return(am->state);
5444    return(to);
5445}
5446
5447/**
5448 * xmlAutomataCompile:
5449 * @am: an automata
5450 *
5451 * Compile the automata into a Reg Exp ready for being executed.
5452 * The automata should be free after this point.
5453 *
5454 * Returns the compiled regexp or NULL in case of error
5455 */
5456xmlRegexpPtr
5457xmlAutomataCompile(xmlAutomataPtr am) {
5458    xmlRegexpPtr ret;
5459
5460    if ((am == NULL) || (am->error != 0)) return(NULL);
5461    xmlFAEliminateEpsilonTransitions(am);
5462    /* xmlFAComputesDeterminism(am); */
5463    ret = xmlRegEpxFromParse(am);
5464
5465    return(ret);
5466}
5467
5468/**
5469 * xmlAutomataIsDeterminist:
5470 * @am: an automata
5471 *
5472 * Checks if an automata is determinist.
5473 *
5474 * Returns 1 if true, 0 if not, and -1 in case of error
5475 */
5476int
5477xmlAutomataIsDeterminist(xmlAutomataPtr am) {
5478    int ret;
5479
5480    if (am == NULL)
5481	return(-1);
5482
5483    ret = xmlFAComputesDeterminism(am);
5484    return(ret);
5485}
5486#endif /* LIBXML_AUTOMATA_ENABLED */
5487#define bottom_xmlregexp
5488#include "elfgcchack.h"
5489#endif /* LIBXML_REGEXP_ENABLED */
5490