xmlregexp.c revision db68b74dc7ec531361a736de7769a3e8ce881f79
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications these include:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22/* #define DEBUG_ERR */
23
24#include <stdio.h>
25#include <string.h>
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <libxml/tree.h>
31#include <libxml/parserInternals.h>
32#include <libxml/xmlregexp.h>
33#include <libxml/xmlautomata.h>
34#include <libxml/xmlunicode.h>
35
36#ifndef INT_MAX
37#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
38#endif
39
40/* #define DEBUG_REGEXP_GRAPH */
41/* #define DEBUG_REGEXP_EXEC */
42/* #define DEBUG_PUSH */
43/* #define DEBUG_COMPACTION */
44
45#define ERROR(str)							\
46    ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
47    xmlRegexpErrCompile(ctxt, str);
48#define NEXT ctxt->cur++
49#define CUR (*(ctxt->cur))
50#define NXT(index) (ctxt->cur[index])
51
52#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
53#define NEXTL(l) ctxt->cur += l;
54#define XML_REG_STRING_SEPARATOR '|'
55
56/**
57 * TODO:
58 *
59 * macro to flag unimplemented blocks
60 */
61#define TODO 								\
62    xmlGenericError(xmlGenericErrorContext,				\
63	    "Unimplemented block at %s:%d\n",				\
64            __FILE__, __LINE__);
65
66/************************************************************************
67 * 									*
68 * 			Datatypes and structures			*
69 * 									*
70 ************************************************************************/
71
72typedef enum {
73    XML_REGEXP_EPSILON = 1,
74    XML_REGEXP_CHARVAL,
75    XML_REGEXP_RANGES,
76    XML_REGEXP_SUBREG,
77    XML_REGEXP_STRING,
78    XML_REGEXP_ANYCHAR, /* . */
79    XML_REGEXP_ANYSPACE, /* \s */
80    XML_REGEXP_NOTSPACE, /* \S */
81    XML_REGEXP_INITNAME, /* \l */
82    XML_REGEXP_NOTINITNAME, /* \l */
83    XML_REGEXP_NAMECHAR, /* \c */
84    XML_REGEXP_NOTNAMECHAR, /* \C */
85    XML_REGEXP_DECIMAL, /* \d */
86    XML_REGEXP_NOTDECIMAL, /* \d */
87    XML_REGEXP_REALCHAR, /* \w */
88    XML_REGEXP_NOTREALCHAR, /* \w */
89    XML_REGEXP_LETTER,
90    XML_REGEXP_LETTER_UPPERCASE,
91    XML_REGEXP_LETTER_LOWERCASE,
92    XML_REGEXP_LETTER_TITLECASE,
93    XML_REGEXP_LETTER_MODIFIER,
94    XML_REGEXP_LETTER_OTHERS,
95    XML_REGEXP_MARK,
96    XML_REGEXP_MARK_NONSPACING,
97    XML_REGEXP_MARK_SPACECOMBINING,
98    XML_REGEXP_MARK_ENCLOSING,
99    XML_REGEXP_NUMBER,
100    XML_REGEXP_NUMBER_DECIMAL,
101    XML_REGEXP_NUMBER_LETTER,
102    XML_REGEXP_NUMBER_OTHERS,
103    XML_REGEXP_PUNCT,
104    XML_REGEXP_PUNCT_CONNECTOR,
105    XML_REGEXP_PUNCT_DASH,
106    XML_REGEXP_PUNCT_OPEN,
107    XML_REGEXP_PUNCT_CLOSE,
108    XML_REGEXP_PUNCT_INITQUOTE,
109    XML_REGEXP_PUNCT_FINQUOTE,
110    XML_REGEXP_PUNCT_OTHERS,
111    XML_REGEXP_SEPAR,
112    XML_REGEXP_SEPAR_SPACE,
113    XML_REGEXP_SEPAR_LINE,
114    XML_REGEXP_SEPAR_PARA,
115    XML_REGEXP_SYMBOL,
116    XML_REGEXP_SYMBOL_MATH,
117    XML_REGEXP_SYMBOL_CURRENCY,
118    XML_REGEXP_SYMBOL_MODIFIER,
119    XML_REGEXP_SYMBOL_OTHERS,
120    XML_REGEXP_OTHER,
121    XML_REGEXP_OTHER_CONTROL,
122    XML_REGEXP_OTHER_FORMAT,
123    XML_REGEXP_OTHER_PRIVATE,
124    XML_REGEXP_OTHER_NA,
125    XML_REGEXP_BLOCK_NAME
126} xmlRegAtomType;
127
128typedef enum {
129    XML_REGEXP_QUANT_EPSILON = 1,
130    XML_REGEXP_QUANT_ONCE,
131    XML_REGEXP_QUANT_OPT,
132    XML_REGEXP_QUANT_MULT,
133    XML_REGEXP_QUANT_PLUS,
134    XML_REGEXP_QUANT_ONCEONLY,
135    XML_REGEXP_QUANT_ALL,
136    XML_REGEXP_QUANT_RANGE
137} xmlRegQuantType;
138
139typedef enum {
140    XML_REGEXP_START_STATE = 1,
141    XML_REGEXP_FINAL_STATE,
142    XML_REGEXP_TRANS_STATE,
143    XML_REGEXP_SINK_STATE
144} xmlRegStateType;
145
146typedef enum {
147    XML_REGEXP_MARK_NORMAL = 0,
148    XML_REGEXP_MARK_START,
149    XML_REGEXP_MARK_VISITED
150} xmlRegMarkedType;
151
152typedef struct _xmlRegRange xmlRegRange;
153typedef xmlRegRange *xmlRegRangePtr;
154
155struct _xmlRegRange {
156    int neg;		/* 0 normal, 1 not, 2 exclude */
157    xmlRegAtomType type;
158    int start;
159    int end;
160    xmlChar *blockName;
161};
162
163typedef struct _xmlRegAtom xmlRegAtom;
164typedef xmlRegAtom *xmlRegAtomPtr;
165
166typedef struct _xmlAutomataState xmlRegState;
167typedef xmlRegState *xmlRegStatePtr;
168
169struct _xmlRegAtom {
170    int no;
171    xmlRegAtomType type;
172    xmlRegQuantType quant;
173    int min;
174    int max;
175
176    void *valuep;
177    void *valuep2;
178    int neg;
179    int codepoint;
180    xmlRegStatePtr start;
181    xmlRegStatePtr stop;
182    int maxRanges;
183    int nbRanges;
184    xmlRegRangePtr *ranges;
185    void *data;
186};
187
188typedef struct _xmlRegCounter xmlRegCounter;
189typedef xmlRegCounter *xmlRegCounterPtr;
190
191struct _xmlRegCounter {
192    int min;
193    int max;
194};
195
196typedef struct _xmlRegTrans xmlRegTrans;
197typedef xmlRegTrans *xmlRegTransPtr;
198
199struct _xmlRegTrans {
200    xmlRegAtomPtr atom;
201    int to;
202    int counter;
203    int count;
204};
205
206struct _xmlAutomataState {
207    xmlRegStateType type;
208    xmlRegMarkedType mark;
209    xmlRegMarkedType reached;
210    int no;
211    int maxTrans;
212    int nbTrans;
213    xmlRegTrans *trans;
214    /*  knowing states ponting to us can speed things up */
215    int maxTransTo;
216    int nbTransTo;
217    int *transTo;
218};
219
220typedef struct _xmlAutomata xmlRegParserCtxt;
221typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
222
223struct _xmlAutomata {
224    xmlChar *string;
225    xmlChar *cur;
226
227    int error;
228    int neg;
229
230    xmlRegStatePtr start;
231    xmlRegStatePtr end;
232    xmlRegStatePtr state;
233
234    xmlRegAtomPtr atom;
235
236    int maxAtoms;
237    int nbAtoms;
238    xmlRegAtomPtr *atoms;
239
240    int maxStates;
241    int nbStates;
242    xmlRegStatePtr *states;
243
244    int maxCounters;
245    int nbCounters;
246    xmlRegCounter *counters;
247
248    int determinist;
249};
250
251struct _xmlRegexp {
252    xmlChar *string;
253    int nbStates;
254    xmlRegStatePtr *states;
255    int nbAtoms;
256    xmlRegAtomPtr *atoms;
257    int nbCounters;
258    xmlRegCounter *counters;
259    int determinist;
260    /*
261     * That's the compact form for determinists automatas
262     */
263    int nbstates;
264    int *compact;
265    void **transdata;
266    int nbstrings;
267    xmlChar **stringMap;
268};
269
270typedef struct _xmlRegExecRollback xmlRegExecRollback;
271typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
272
273struct _xmlRegExecRollback {
274    xmlRegStatePtr state;/* the current state */
275    int index;		/* the index in the input stack */
276    int nextbranch;	/* the next transition to explore in that state */
277    int *counts;	/* save the automata state if it has some */
278};
279
280typedef struct _xmlRegInputToken xmlRegInputToken;
281typedef xmlRegInputToken *xmlRegInputTokenPtr;
282
283struct _xmlRegInputToken {
284    xmlChar *value;
285    void *data;
286};
287
288struct _xmlRegExecCtxt {
289    int status;		/* execution status != 0 indicate an error */
290    int determinist;	/* did we find an indeterministic behaviour */
291    xmlRegexpPtr comp;	/* the compiled regexp */
292    xmlRegExecCallbacks callback;
293    void *data;
294
295    xmlRegStatePtr state;/* the current state */
296    int transno;	/* the current transition on that state */
297    int transcount;	/* the number of chars in char counted transitions */
298
299    /*
300     * A stack of rollback states
301     */
302    int maxRollbacks;
303    int nbRollbacks;
304    xmlRegExecRollback *rollbacks;
305
306    /*
307     * The state of the automata if any
308     */
309    int *counts;
310
311    /*
312     * The input stack
313     */
314    int inputStackMax;
315    int inputStackNr;
316    int index;
317    int *charStack;
318    const xmlChar *inputString; /* when operating on characters */
319    xmlRegInputTokenPtr inputStack;/* when operating on strings */
320
321    /*
322     * error handling
323     */
324    int errStateNo;		/* the error state number */
325    xmlRegStatePtr errState;    /* the error state */
326    xmlChar *errString;		/* the string raising the error */
327    int *errCounts;		/* counters at the error state */
328};
329
330#define REGEXP_ALL_COUNTER	0x123456
331#define REGEXP_ALL_LAX_COUNTER	0x123457
332
333static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
334static void xmlRegFreeState(xmlRegStatePtr state);
335static void xmlRegFreeAtom(xmlRegAtomPtr atom);
336static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
337
338/************************************************************************
339 *									*
340 * 		Regexp memory error handler				*
341 *									*
342 ************************************************************************/
343/**
344 * xmlRegexpErrMemory:
345 * @extra:  extra information
346 *
347 * Handle an out of memory condition
348 */
349static void
350xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
351{
352    const char *regexp = NULL;
353    if (ctxt != NULL) {
354        regexp = (const char *) ctxt->string;
355	ctxt->error = XML_ERR_NO_MEMORY;
356    }
357    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
358		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
359		    regexp, NULL, 0, 0,
360		    "Memory allocation failed : %s\n", extra);
361}
362
363/**
364 * xmlRegexpErrCompile:
365 * @extra:  extra information
366 *
367 * Handle a compilation failure
368 */
369static void
370xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
371{
372    const char *regexp = NULL;
373    int idx = 0;
374
375    if (ctxt != NULL) {
376        regexp = (const char *) ctxt->string;
377	idx = ctxt->cur - ctxt->string;
378	ctxt->error = XML_REGEXP_COMPILE_ERROR;
379    }
380    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
381		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
382		    regexp, NULL, idx, 0,
383		    "failed to compile: %s\n", extra);
384}
385
386/************************************************************************
387 * 									*
388 * 			Allocation/Deallocation				*
389 * 									*
390 ************************************************************************/
391
392static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
393/**
394 * xmlRegEpxFromParse:
395 * @ctxt:  the parser context used to build it
396 *
397 * Allocate a new regexp and fill it with the result from the parser
398 *
399 * Returns the new regexp or NULL in case of error
400 */
401static xmlRegexpPtr
402xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
403    xmlRegexpPtr ret;
404
405    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
406    if (ret == NULL) {
407	xmlRegexpErrMemory(ctxt, "compiling regexp");
408	return(NULL);
409    }
410    memset(ret, 0, sizeof(xmlRegexp));
411    ret->string = ctxt->string;
412    ret->nbStates = ctxt->nbStates;
413    ret->states = ctxt->states;
414    ret->nbAtoms = ctxt->nbAtoms;
415    ret->atoms = ctxt->atoms;
416    ret->nbCounters = ctxt->nbCounters;
417    ret->counters = ctxt->counters;
418    ret->determinist = ctxt->determinist;
419
420    if ((ret->determinist != 0) &&
421	(ret->nbCounters == 0) &&
422	(ret->atoms != NULL) &&
423	(ret->atoms[0] != NULL) &&
424	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
425	int i, j, nbstates = 0, nbatoms = 0;
426	int *stateRemap;
427	int *stringRemap;
428	int *transitions;
429	void **transdata;
430	xmlChar **stringMap;
431        xmlChar *value;
432
433	/*
434	 * Switch to a compact representation
435	 * 1/ counting the effective number of states left
436	 * 2/ counting the unique number of atoms, and check that
437	 *    they are all of the string type
438	 * 3/ build a table state x atom for the transitions
439	 */
440
441	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
442	if (stateRemap == NULL) {
443	    xmlRegexpErrMemory(ctxt, "compiling regexp");
444	    xmlFree(ret);
445	    return(NULL);
446	}
447	for (i = 0;i < ret->nbStates;i++) {
448	    if (ret->states[i] != NULL) {
449		stateRemap[i] = nbstates;
450		nbstates++;
451	    } else {
452		stateRemap[i] = -1;
453	    }
454	}
455#ifdef DEBUG_COMPACTION
456	printf("Final: %d states\n", nbstates);
457#endif
458	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
459	if (stringMap == NULL) {
460	    xmlRegexpErrMemory(ctxt, "compiling regexp");
461	    xmlFree(stateRemap);
462	    xmlFree(ret);
463	    return(NULL);
464	}
465	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
466	if (stringRemap == NULL) {
467	    xmlRegexpErrMemory(ctxt, "compiling regexp");
468	    xmlFree(stringMap);
469	    xmlFree(stateRemap);
470	    xmlFree(ret);
471	    return(NULL);
472	}
473	for (i = 0;i < ret->nbAtoms;i++) {
474	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
475		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
476		value = ret->atoms[i]->valuep;
477                for (j = 0;j < nbatoms;j++) {
478		    if (xmlStrEqual(stringMap[j], value)) {
479			stringRemap[i] = j;
480			break;
481		    }
482		}
483		if (j >= nbatoms) {
484		    stringRemap[i] = nbatoms;
485		    stringMap[nbatoms] = xmlStrdup(value);
486		    if (stringMap[nbatoms] == NULL) {
487			for (i = 0;i < nbatoms;i++)
488			    xmlFree(stringMap[i]);
489			xmlFree(stringRemap);
490			xmlFree(stringMap);
491			xmlFree(stateRemap);
492			xmlFree(ret);
493			return(NULL);
494		    }
495		    nbatoms++;
496		}
497	    } else {
498		xmlFree(stateRemap);
499		xmlFree(stringRemap);
500		for (i = 0;i < nbatoms;i++)
501		    xmlFree(stringMap[i]);
502		xmlFree(stringMap);
503		xmlFree(ret);
504		return(NULL);
505	    }
506	}
507#ifdef DEBUG_COMPACTION
508	printf("Final: %d atoms\n", nbatoms);
509#endif
510	transitions = (int *) xmlMalloc((nbstates + 1) *
511	                                (nbatoms + 1) * sizeof(int));
512	if (transitions == NULL) {
513	    xmlFree(stateRemap);
514	    xmlFree(stringRemap);
515	    xmlFree(stringMap);
516	    xmlFree(ret);
517	    return(NULL);
518	}
519	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
520
521	/*
522	 * Allocate the transition table. The first entry for each
523	 * state corresponds to the state type.
524	 */
525	transdata = NULL;
526
527	for (i = 0;i < ret->nbStates;i++) {
528	    int stateno, atomno, targetno, prev;
529	    xmlRegStatePtr state;
530	    xmlRegTransPtr trans;
531
532	    stateno = stateRemap[i];
533	    if (stateno == -1)
534		continue;
535	    state = ret->states[i];
536
537	    transitions[stateno * (nbatoms + 1)] = state->type;
538
539	    for (j = 0;j < state->nbTrans;j++) {
540		trans = &(state->trans[j]);
541		if ((trans->to == -1) || (trans->atom == NULL))
542		    continue;
543                atomno = stringRemap[trans->atom->no];
544		if ((trans->atom->data != NULL) && (transdata == NULL)) {
545		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
546			                            sizeof(void *));
547		    if (transdata != NULL)
548			memset(transdata, 0,
549			       nbstates * nbatoms * sizeof(void *));
550		    else {
551			xmlRegexpErrMemory(ctxt, "compiling regexp");
552			break;
553		    }
554		}
555		targetno = stateRemap[trans->to];
556		/*
557		 * if the same atom can generate transitions to 2 different
558		 * states then it means the automata is not determinist and
559		 * the compact form can't be used !
560		 */
561		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
562		if (prev != 0) {
563		    if (prev != targetno + 1) {
564			ret->determinist = 0;
565#ifdef DEBUG_COMPACTION
566			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
567			       i, j, trans->atom->no, trans->to, atomno, targetno);
568			printf("       previous to is %d\n", prev);
569#endif
570			ret->determinist = 0;
571			if (transdata != NULL)
572			    xmlFree(transdata);
573			xmlFree(transitions);
574			xmlFree(stateRemap);
575			xmlFree(stringRemap);
576			for (i = 0;i < nbatoms;i++)
577			    xmlFree(stringMap[i]);
578			xmlFree(stringMap);
579			goto not_determ;
580		    }
581		} else {
582#if 0
583		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
584			   i, j, trans->atom->no, trans->to, atomno, targetno);
585#endif
586		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
587			targetno + 1; /* to avoid 0 */
588		    if (transdata != NULL)
589			transdata[stateno * nbatoms + atomno] =
590			    trans->atom->data;
591		}
592	    }
593	}
594	ret->determinist = 1;
595#ifdef DEBUG_COMPACTION
596	/*
597	 * Debug
598	 */
599	for (i = 0;i < nbstates;i++) {
600	    for (j = 0;j < nbatoms + 1;j++) {
601                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
602	    }
603	    printf("\n");
604	}
605	printf("\n");
606#endif
607	/*
608	 * Cleanup of the old data
609	 */
610	if (ret->states != NULL) {
611	    for (i = 0;i < ret->nbStates;i++)
612		xmlRegFreeState(ret->states[i]);
613	    xmlFree(ret->states);
614	}
615	ret->states = NULL;
616	ret->nbStates = 0;
617	if (ret->atoms != NULL) {
618	    for (i = 0;i < ret->nbAtoms;i++)
619		xmlRegFreeAtom(ret->atoms[i]);
620	    xmlFree(ret->atoms);
621	}
622	ret->atoms = NULL;
623	ret->nbAtoms = 0;
624
625	ret->compact = transitions;
626	ret->transdata = transdata;
627	ret->stringMap = stringMap;
628	ret->nbstrings = nbatoms;
629	ret->nbstates = nbstates;
630	xmlFree(stateRemap);
631	xmlFree(stringRemap);
632    }
633not_determ:
634    ctxt->string = NULL;
635    ctxt->nbStates = 0;
636    ctxt->states = NULL;
637    ctxt->nbAtoms = 0;
638    ctxt->atoms = NULL;
639    ctxt->nbCounters = 0;
640    ctxt->counters = NULL;
641    return(ret);
642}
643
644/**
645 * xmlRegNewParserCtxt:
646 * @string:  the string to parse
647 *
648 * Allocate a new regexp parser context
649 *
650 * Returns the new context or NULL in case of error
651 */
652static xmlRegParserCtxtPtr
653xmlRegNewParserCtxt(const xmlChar *string) {
654    xmlRegParserCtxtPtr ret;
655
656    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
657    if (ret == NULL)
658	return(NULL);
659    memset(ret, 0, sizeof(xmlRegParserCtxt));
660    if (string != NULL)
661	ret->string = xmlStrdup(string);
662    ret->cur = ret->string;
663    ret->neg = 0;
664    ret->error = 0;
665    ret->determinist = -1;
666    return(ret);
667}
668
669/**
670 * xmlRegNewRange:
671 * @ctxt:  the regexp parser context
672 * @neg:  is that negative
673 * @type:  the type of range
674 * @start:  the start codepoint
675 * @end:  the end codepoint
676 *
677 * Allocate a new regexp range
678 *
679 * Returns the new range or NULL in case of error
680 */
681static xmlRegRangePtr
682xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
683	       int neg, xmlRegAtomType type, int start, int end) {
684    xmlRegRangePtr ret;
685
686    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
687    if (ret == NULL) {
688	xmlRegexpErrMemory(ctxt, "allocating range");
689	return(NULL);
690    }
691    ret->neg = neg;
692    ret->type = type;
693    ret->start = start;
694    ret->end = end;
695    return(ret);
696}
697
698/**
699 * xmlRegFreeRange:
700 * @range:  the regexp range
701 *
702 * Free a regexp range
703 */
704static void
705xmlRegFreeRange(xmlRegRangePtr range) {
706    if (range == NULL)
707	return;
708
709    if (range->blockName != NULL)
710	xmlFree(range->blockName);
711    xmlFree(range);
712}
713
714/**
715 * xmlRegNewAtom:
716 * @ctxt:  the regexp parser context
717 * @type:  the type of atom
718 *
719 * Allocate a new regexp range
720 *
721 * Returns the new atom or NULL in case of error
722 */
723static xmlRegAtomPtr
724xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
725    xmlRegAtomPtr ret;
726
727    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
728    if (ret == NULL) {
729	xmlRegexpErrMemory(ctxt, "allocating atom");
730	return(NULL);
731    }
732    memset(ret, 0, sizeof(xmlRegAtom));
733    ret->type = type;
734    ret->quant = XML_REGEXP_QUANT_ONCE;
735    ret->min = 0;
736    ret->max = 0;
737    return(ret);
738}
739
740/**
741 * xmlRegFreeAtom:
742 * @atom:  the regexp atom
743 *
744 * Free a regexp atom
745 */
746static void
747xmlRegFreeAtom(xmlRegAtomPtr atom) {
748    int i;
749
750    if (atom == NULL)
751	return;
752
753    for (i = 0;i < atom->nbRanges;i++)
754	xmlRegFreeRange(atom->ranges[i]);
755    if (atom->ranges != NULL)
756	xmlFree(atom->ranges);
757    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
758	xmlFree(atom->valuep);
759    if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
760	xmlFree(atom->valuep2);
761    if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
762	xmlFree(atom->valuep);
763    xmlFree(atom);
764}
765
766static xmlRegStatePtr
767xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
768    xmlRegStatePtr ret;
769
770    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
771    if (ret == NULL) {
772	xmlRegexpErrMemory(ctxt, "allocating state");
773	return(NULL);
774    }
775    memset(ret, 0, sizeof(xmlRegState));
776    ret->type = XML_REGEXP_TRANS_STATE;
777    ret->mark = XML_REGEXP_MARK_NORMAL;
778    return(ret);
779}
780
781/**
782 * xmlRegFreeState:
783 * @state:  the regexp state
784 *
785 * Free a regexp state
786 */
787static void
788xmlRegFreeState(xmlRegStatePtr state) {
789    if (state == NULL)
790	return;
791
792    if (state->trans != NULL)
793	xmlFree(state->trans);
794    if (state->transTo != NULL)
795	xmlFree(state->transTo);
796    xmlFree(state);
797}
798
799/**
800 * xmlRegFreeParserCtxt:
801 * @ctxt:  the regexp parser context
802 *
803 * Free a regexp parser context
804 */
805static void
806xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
807    int i;
808    if (ctxt == NULL)
809	return;
810
811    if (ctxt->string != NULL)
812	xmlFree(ctxt->string);
813    if (ctxt->states != NULL) {
814	for (i = 0;i < ctxt->nbStates;i++)
815	    xmlRegFreeState(ctxt->states[i]);
816	xmlFree(ctxt->states);
817    }
818    if (ctxt->atoms != NULL) {
819	for (i = 0;i < ctxt->nbAtoms;i++)
820	    xmlRegFreeAtom(ctxt->atoms[i]);
821	xmlFree(ctxt->atoms);
822    }
823    if (ctxt->counters != NULL)
824	xmlFree(ctxt->counters);
825    xmlFree(ctxt);
826}
827
828/************************************************************************
829 * 									*
830 * 			Display of Data structures			*
831 * 									*
832 ************************************************************************/
833
834static void
835xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
836    switch (type) {
837        case XML_REGEXP_EPSILON:
838	    fprintf(output, "epsilon "); break;
839        case XML_REGEXP_CHARVAL:
840	    fprintf(output, "charval "); break;
841        case XML_REGEXP_RANGES:
842	    fprintf(output, "ranges "); break;
843        case XML_REGEXP_SUBREG:
844	    fprintf(output, "subexpr "); break;
845        case XML_REGEXP_STRING:
846	    fprintf(output, "string "); break;
847        case XML_REGEXP_ANYCHAR:
848	    fprintf(output, "anychar "); break;
849        case XML_REGEXP_ANYSPACE:
850	    fprintf(output, "anyspace "); break;
851        case XML_REGEXP_NOTSPACE:
852	    fprintf(output, "notspace "); break;
853        case XML_REGEXP_INITNAME:
854	    fprintf(output, "initname "); break;
855        case XML_REGEXP_NOTINITNAME:
856	    fprintf(output, "notinitname "); break;
857        case XML_REGEXP_NAMECHAR:
858	    fprintf(output, "namechar "); break;
859        case XML_REGEXP_NOTNAMECHAR:
860	    fprintf(output, "notnamechar "); break;
861        case XML_REGEXP_DECIMAL:
862	    fprintf(output, "decimal "); break;
863        case XML_REGEXP_NOTDECIMAL:
864	    fprintf(output, "notdecimal "); break;
865        case XML_REGEXP_REALCHAR:
866	    fprintf(output, "realchar "); break;
867        case XML_REGEXP_NOTREALCHAR:
868	    fprintf(output, "notrealchar "); break;
869        case XML_REGEXP_LETTER:
870            fprintf(output, "LETTER "); break;
871        case XML_REGEXP_LETTER_UPPERCASE:
872            fprintf(output, "LETTER_UPPERCASE "); break;
873        case XML_REGEXP_LETTER_LOWERCASE:
874            fprintf(output, "LETTER_LOWERCASE "); break;
875        case XML_REGEXP_LETTER_TITLECASE:
876            fprintf(output, "LETTER_TITLECASE "); break;
877        case XML_REGEXP_LETTER_MODIFIER:
878            fprintf(output, "LETTER_MODIFIER "); break;
879        case XML_REGEXP_LETTER_OTHERS:
880            fprintf(output, "LETTER_OTHERS "); break;
881        case XML_REGEXP_MARK:
882            fprintf(output, "MARK "); break;
883        case XML_REGEXP_MARK_NONSPACING:
884            fprintf(output, "MARK_NONSPACING "); break;
885        case XML_REGEXP_MARK_SPACECOMBINING:
886            fprintf(output, "MARK_SPACECOMBINING "); break;
887        case XML_REGEXP_MARK_ENCLOSING:
888            fprintf(output, "MARK_ENCLOSING "); break;
889        case XML_REGEXP_NUMBER:
890            fprintf(output, "NUMBER "); break;
891        case XML_REGEXP_NUMBER_DECIMAL:
892            fprintf(output, "NUMBER_DECIMAL "); break;
893        case XML_REGEXP_NUMBER_LETTER:
894            fprintf(output, "NUMBER_LETTER "); break;
895        case XML_REGEXP_NUMBER_OTHERS:
896            fprintf(output, "NUMBER_OTHERS "); break;
897        case XML_REGEXP_PUNCT:
898            fprintf(output, "PUNCT "); break;
899        case XML_REGEXP_PUNCT_CONNECTOR:
900            fprintf(output, "PUNCT_CONNECTOR "); break;
901        case XML_REGEXP_PUNCT_DASH:
902            fprintf(output, "PUNCT_DASH "); break;
903        case XML_REGEXP_PUNCT_OPEN:
904            fprintf(output, "PUNCT_OPEN "); break;
905        case XML_REGEXP_PUNCT_CLOSE:
906            fprintf(output, "PUNCT_CLOSE "); break;
907        case XML_REGEXP_PUNCT_INITQUOTE:
908            fprintf(output, "PUNCT_INITQUOTE "); break;
909        case XML_REGEXP_PUNCT_FINQUOTE:
910            fprintf(output, "PUNCT_FINQUOTE "); break;
911        case XML_REGEXP_PUNCT_OTHERS:
912            fprintf(output, "PUNCT_OTHERS "); break;
913        case XML_REGEXP_SEPAR:
914            fprintf(output, "SEPAR "); break;
915        case XML_REGEXP_SEPAR_SPACE:
916            fprintf(output, "SEPAR_SPACE "); break;
917        case XML_REGEXP_SEPAR_LINE:
918            fprintf(output, "SEPAR_LINE "); break;
919        case XML_REGEXP_SEPAR_PARA:
920            fprintf(output, "SEPAR_PARA "); break;
921        case XML_REGEXP_SYMBOL:
922            fprintf(output, "SYMBOL "); break;
923        case XML_REGEXP_SYMBOL_MATH:
924            fprintf(output, "SYMBOL_MATH "); break;
925        case XML_REGEXP_SYMBOL_CURRENCY:
926            fprintf(output, "SYMBOL_CURRENCY "); break;
927        case XML_REGEXP_SYMBOL_MODIFIER:
928            fprintf(output, "SYMBOL_MODIFIER "); break;
929        case XML_REGEXP_SYMBOL_OTHERS:
930            fprintf(output, "SYMBOL_OTHERS "); break;
931        case XML_REGEXP_OTHER:
932            fprintf(output, "OTHER "); break;
933        case XML_REGEXP_OTHER_CONTROL:
934            fprintf(output, "OTHER_CONTROL "); break;
935        case XML_REGEXP_OTHER_FORMAT:
936            fprintf(output, "OTHER_FORMAT "); break;
937        case XML_REGEXP_OTHER_PRIVATE:
938            fprintf(output, "OTHER_PRIVATE "); break;
939        case XML_REGEXP_OTHER_NA:
940            fprintf(output, "OTHER_NA "); break;
941        case XML_REGEXP_BLOCK_NAME:
942	    fprintf(output, "BLOCK "); break;
943    }
944}
945
946static void
947xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
948    switch (type) {
949        case XML_REGEXP_QUANT_EPSILON:
950	    fprintf(output, "epsilon "); break;
951        case XML_REGEXP_QUANT_ONCE:
952	    fprintf(output, "once "); break;
953        case XML_REGEXP_QUANT_OPT:
954	    fprintf(output, "? "); break;
955        case XML_REGEXP_QUANT_MULT:
956	    fprintf(output, "* "); break;
957        case XML_REGEXP_QUANT_PLUS:
958	    fprintf(output, "+ "); break;
959	case XML_REGEXP_QUANT_RANGE:
960	    fprintf(output, "range "); break;
961	case XML_REGEXP_QUANT_ONCEONLY:
962	    fprintf(output, "onceonly "); break;
963	case XML_REGEXP_QUANT_ALL:
964	    fprintf(output, "all "); break;
965    }
966}
967static void
968xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
969    fprintf(output, "  range: ");
970    if (range->neg)
971	fprintf(output, "negative ");
972    xmlRegPrintAtomType(output, range->type);
973    fprintf(output, "%c - %c\n", range->start, range->end);
974}
975
976static void
977xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
978    fprintf(output, " atom: ");
979    if (atom == NULL) {
980	fprintf(output, "NULL\n");
981	return;
982    }
983    if (atom->neg)
984        fprintf(output, "not ");
985    xmlRegPrintAtomType(output, atom->type);
986    xmlRegPrintQuantType(output, atom->quant);
987    if (atom->quant == XML_REGEXP_QUANT_RANGE)
988	fprintf(output, "%d-%d ", atom->min, atom->max);
989    if (atom->type == XML_REGEXP_STRING)
990	fprintf(output, "'%s' ", (char *) atom->valuep);
991    if (atom->type == XML_REGEXP_CHARVAL)
992	fprintf(output, "char %c\n", atom->codepoint);
993    else if (atom->type == XML_REGEXP_RANGES) {
994	int i;
995	fprintf(output, "%d entries\n", atom->nbRanges);
996	for (i = 0; i < atom->nbRanges;i++)
997	    xmlRegPrintRange(output, atom->ranges[i]);
998    } else if (atom->type == XML_REGEXP_SUBREG) {
999	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
1000    } else {
1001	fprintf(output, "\n");
1002    }
1003}
1004
1005static void
1006xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
1007    fprintf(output, "  trans: ");
1008    if (trans == NULL) {
1009	fprintf(output, "NULL\n");
1010	return;
1011    }
1012    if (trans->to < 0) {
1013	fprintf(output, "removed\n");
1014	return;
1015    }
1016    if (trans->counter >= 0) {
1017	fprintf(output, "counted %d, ", trans->counter);
1018    }
1019    if (trans->count == REGEXP_ALL_COUNTER) {
1020	fprintf(output, "all transition, ");
1021    } else if (trans->count >= 0) {
1022	fprintf(output, "count based %d, ", trans->count);
1023    }
1024    if (trans->atom == NULL) {
1025	fprintf(output, "epsilon to %d\n", trans->to);
1026	return;
1027    }
1028    if (trans->atom->type == XML_REGEXP_CHARVAL)
1029	fprintf(output, "char %c ", trans->atom->codepoint);
1030    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
1031}
1032
1033static void
1034xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
1035    int i;
1036
1037    fprintf(output, " state: ");
1038    if (state == NULL) {
1039	fprintf(output, "NULL\n");
1040	return;
1041    }
1042    if (state->type == XML_REGEXP_START_STATE)
1043	fprintf(output, "START ");
1044    if (state->type == XML_REGEXP_FINAL_STATE)
1045	fprintf(output, "FINAL ");
1046
1047    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
1048    for (i = 0;i < state->nbTrans; i++) {
1049	xmlRegPrintTrans(output, &(state->trans[i]));
1050    }
1051}
1052
1053#ifdef DEBUG_REGEXP_GRAPH
1054static void
1055xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
1056    int i;
1057
1058    fprintf(output, " ctxt: ");
1059    if (ctxt == NULL) {
1060	fprintf(output, "NULL\n");
1061	return;
1062    }
1063    fprintf(output, "'%s' ", ctxt->string);
1064    if (ctxt->error)
1065	fprintf(output, "error ");
1066    if (ctxt->neg)
1067	fprintf(output, "neg ");
1068    fprintf(output, "\n");
1069    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
1070    for (i = 0;i < ctxt->nbAtoms; i++) {
1071	fprintf(output, " %02d ", i);
1072	xmlRegPrintAtom(output, ctxt->atoms[i]);
1073    }
1074    if (ctxt->atom != NULL) {
1075	fprintf(output, "current atom:\n");
1076	xmlRegPrintAtom(output, ctxt->atom);
1077    }
1078    fprintf(output, "%d states:", ctxt->nbStates);
1079    if (ctxt->start != NULL)
1080	fprintf(output, " start: %d", ctxt->start->no);
1081    if (ctxt->end != NULL)
1082	fprintf(output, " end: %d", ctxt->end->no);
1083    fprintf(output, "\n");
1084    for (i = 0;i < ctxt->nbStates; i++) {
1085	xmlRegPrintState(output, ctxt->states[i]);
1086    }
1087    fprintf(output, "%d counters:\n", ctxt->nbCounters);
1088    for (i = 0;i < ctxt->nbCounters; i++) {
1089	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
1090		                                ctxt->counters[i].max);
1091    }
1092}
1093#endif
1094
1095/************************************************************************
1096 * 									*
1097 *		 Finite Automata structures manipulations		*
1098 * 									*
1099 ************************************************************************/
1100
1101static void
1102xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
1103	           int neg, xmlRegAtomType type, int start, int end,
1104		   xmlChar *blockName) {
1105    xmlRegRangePtr range;
1106
1107    if (atom == NULL) {
1108	ERROR("add range: atom is NULL");
1109	return;
1110    }
1111    if (atom->type != XML_REGEXP_RANGES) {
1112	ERROR("add range: atom is not ranges");
1113	return;
1114    }
1115    if (atom->maxRanges == 0) {
1116	atom->maxRanges = 4;
1117	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
1118		                             sizeof(xmlRegRangePtr));
1119	if (atom->ranges == NULL) {
1120	    xmlRegexpErrMemory(ctxt, "adding ranges");
1121	    atom->maxRanges = 0;
1122	    return;
1123	}
1124    } else if (atom->nbRanges >= atom->maxRanges) {
1125	xmlRegRangePtr *tmp;
1126	atom->maxRanges *= 2;
1127	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1128		                             sizeof(xmlRegRangePtr));
1129	if (tmp == NULL) {
1130	    xmlRegexpErrMemory(ctxt, "adding ranges");
1131	    atom->maxRanges /= 2;
1132	    return;
1133	}
1134	atom->ranges = tmp;
1135    }
1136    range = xmlRegNewRange(ctxt, neg, type, start, end);
1137    if (range == NULL)
1138	return;
1139    range->blockName = blockName;
1140    atom->ranges[atom->nbRanges++] = range;
1141
1142}
1143
1144static int
1145xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1146    if (ctxt->maxCounters == 0) {
1147	ctxt->maxCounters = 4;
1148	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1149		                             sizeof(xmlRegCounter));
1150	if (ctxt->counters == NULL) {
1151	    xmlRegexpErrMemory(ctxt, "allocating counter");
1152	    ctxt->maxCounters = 0;
1153	    return(-1);
1154	}
1155    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1156	xmlRegCounter *tmp;
1157	ctxt->maxCounters *= 2;
1158	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1159		                           sizeof(xmlRegCounter));
1160	if (tmp == NULL) {
1161	    xmlRegexpErrMemory(ctxt, "allocating counter");
1162	    ctxt->maxCounters /= 2;
1163	    return(-1);
1164	}
1165	ctxt->counters = tmp;
1166    }
1167    ctxt->counters[ctxt->nbCounters].min = -1;
1168    ctxt->counters[ctxt->nbCounters].max = -1;
1169    return(ctxt->nbCounters++);
1170}
1171
1172static int
1173xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1174    if (atom == NULL) {
1175	ERROR("atom push: atom is NULL");
1176	return(-1);
1177    }
1178    if (ctxt->maxAtoms == 0) {
1179	ctxt->maxAtoms = 4;
1180	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1181		                             sizeof(xmlRegAtomPtr));
1182	if (ctxt->atoms == NULL) {
1183	    xmlRegexpErrMemory(ctxt, "pushing atom");
1184	    ctxt->maxAtoms = 0;
1185	    return(-1);
1186	}
1187    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1188	xmlRegAtomPtr *tmp;
1189	ctxt->maxAtoms *= 2;
1190	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1191		                             sizeof(xmlRegAtomPtr));
1192	if (tmp == NULL) {
1193	    xmlRegexpErrMemory(ctxt, "allocating counter");
1194	    ctxt->maxAtoms /= 2;
1195	    return(-1);
1196	}
1197	ctxt->atoms = tmp;
1198    }
1199    atom->no = ctxt->nbAtoms;
1200    ctxt->atoms[ctxt->nbAtoms++] = atom;
1201    return(0);
1202}
1203
1204static void
1205xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
1206                      int from) {
1207    if (target->maxTransTo == 0) {
1208	target->maxTransTo = 8;
1209	target->transTo = (int *) xmlMalloc(target->maxTransTo *
1210		                             sizeof(int));
1211	if (target->transTo == NULL) {
1212	    xmlRegexpErrMemory(ctxt, "adding transition");
1213	    target->maxTransTo = 0;
1214	    return;
1215	}
1216    } else if (target->nbTransTo >= target->maxTransTo) {
1217	int *tmp;
1218	target->maxTransTo *= 2;
1219	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
1220		                             sizeof(int));
1221	if (tmp == NULL) {
1222	    xmlRegexpErrMemory(ctxt, "adding transition");
1223	    target->maxTransTo /= 2;
1224	    return;
1225	}
1226	target->transTo = tmp;
1227    }
1228    target->transTo[target->nbTransTo] = from;
1229    target->nbTransTo++;
1230}
1231
1232static void
1233xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1234	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1235		    int counter, int count, int nchk) {
1236
1237    int nrtrans;
1238
1239    if (state == NULL) {
1240	ERROR("add state: state is NULL");
1241	return;
1242    }
1243    if (target == NULL) {
1244	ERROR("add state: target is NULL");
1245	return;
1246    }
1247    /*
1248     * Other routines follow the philosophy 'When in doubt, add a transition'
1249     * so we check here whether such a transition is already present and, if
1250     * so, silently ignore this request.
1251     */
1252
1253    if (nchk == 0) {
1254	for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
1255	    xmlRegTransPtr trans = &(state->trans[nrtrans]);
1256	    if ((trans->atom == atom) &&
1257		(trans->to == target->no) &&
1258		(trans->counter == counter) &&
1259		(trans->count == count)) {
1260#ifdef DEBUG_REGEXP_GRAPH
1261		printf("Ignoring duplicate transition from %d to %d\n",
1262			state->no, target->no);
1263#endif
1264		return;
1265	    }
1266	}
1267    }
1268
1269    if (state->maxTrans == 0) {
1270	state->maxTrans = 8;
1271	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1272		                             sizeof(xmlRegTrans));
1273	if (state->trans == NULL) {
1274	    xmlRegexpErrMemory(ctxt, "adding transition");
1275	    state->maxTrans = 0;
1276	    return;
1277	}
1278    } else if (state->nbTrans >= state->maxTrans) {
1279	xmlRegTrans *tmp;
1280	state->maxTrans *= 2;
1281	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1282		                             sizeof(xmlRegTrans));
1283	if (tmp == NULL) {
1284	    xmlRegexpErrMemory(ctxt, "adding transition");
1285	    state->maxTrans /= 2;
1286	    return;
1287	}
1288	state->trans = tmp;
1289    }
1290#ifdef DEBUG_REGEXP_GRAPH
1291    printf("Add trans from %d to %d ", state->no, target->no);
1292    if (count == REGEXP_ALL_COUNTER)
1293	printf("all transition\n");
1294    else if (count >= 0)
1295	printf("count based %d\n", count);
1296    else if (counter >= 0)
1297	printf("counted %d\n", counter);
1298    else if (atom == NULL)
1299	printf("epsilon transition\n");
1300    else if (atom != NULL)
1301        xmlRegPrintAtom(stdout, atom);
1302#endif
1303
1304    state->trans[state->nbTrans].atom = atom;
1305    state->trans[state->nbTrans].to = target->no;
1306    state->trans[state->nbTrans].counter = counter;
1307    state->trans[state->nbTrans].count = count;
1308    state->nbTrans++;
1309    xmlRegStateAddTransTo(ctxt, target, state->no);
1310}
1311
1312static int
1313xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1314    if (state == NULL) return(-1);
1315    if (ctxt->maxStates == 0) {
1316	ctxt->maxStates = 4;
1317	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1318		                             sizeof(xmlRegStatePtr));
1319	if (ctxt->states == NULL) {
1320	    xmlRegexpErrMemory(ctxt, "adding state");
1321	    ctxt->maxStates = 0;
1322	    return(-1);
1323	}
1324    } else if (ctxt->nbStates >= ctxt->maxStates) {
1325	xmlRegStatePtr *tmp;
1326	ctxt->maxStates *= 2;
1327	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1328		                             sizeof(xmlRegStatePtr));
1329	if (tmp == NULL) {
1330	    xmlRegexpErrMemory(ctxt, "adding state");
1331	    ctxt->maxStates /= 2;
1332	    return(-1);
1333	}
1334	ctxt->states = tmp;
1335    }
1336    state->no = ctxt->nbStates;
1337    ctxt->states[ctxt->nbStates++] = state;
1338    return(0);
1339}
1340
1341/**
1342 * xmlFAGenerateAllTransition:
1343 * @ctxt:  a regexp parser context
1344 * @from:  the from state
1345 * @to:  the target state or NULL for building a new one
1346 * @lax:
1347 *
1348 */
1349static void
1350xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1351			   xmlRegStatePtr from, xmlRegStatePtr to,
1352			   int lax) {
1353    if (to == NULL) {
1354	to = xmlRegNewState(ctxt);
1355	xmlRegStatePush(ctxt, to);
1356	ctxt->state = to;
1357    }
1358    if (lax)
1359	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER, 0);
1360    else
1361	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER, 0);
1362}
1363
1364/**
1365 * xmlFAGenerateEpsilonTransition:
1366 * @ctxt:  a regexp parser context
1367 * @from:  the from state
1368 * @to:  the target state or NULL for building a new one
1369 *
1370 */
1371static void
1372xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1373			       xmlRegStatePtr from, xmlRegStatePtr to) {
1374    if (to == NULL) {
1375	to = xmlRegNewState(ctxt);
1376	xmlRegStatePush(ctxt, to);
1377	ctxt->state = to;
1378    }
1379    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1, 0);
1380}
1381
1382/**
1383 * xmlFAGenerateCountedEpsilonTransition:
1384 * @ctxt:  a regexp parser context
1385 * @from:  the from state
1386 * @to:  the target state or NULL for building a new one
1387 * counter:  the counter for that transition
1388 *
1389 */
1390static void
1391xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1392	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1393    if (to == NULL) {
1394	to = xmlRegNewState(ctxt);
1395	xmlRegStatePush(ctxt, to);
1396	ctxt->state = to;
1397    }
1398    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1, 0);
1399}
1400
1401/**
1402 * xmlFAGenerateCountedTransition:
1403 * @ctxt:  a regexp parser context
1404 * @from:  the from state
1405 * @to:  the target state or NULL for building a new one
1406 * counter:  the counter for that transition
1407 *
1408 */
1409static void
1410xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1411	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1412    if (to == NULL) {
1413	to = xmlRegNewState(ctxt);
1414	xmlRegStatePush(ctxt, to);
1415	ctxt->state = to;
1416    }
1417    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter, 0);
1418}
1419
1420/**
1421 * xmlFAGenerateTransitions:
1422 * @ctxt:  a regexp parser context
1423 * @from:  the from state
1424 * @to:  the target state or NULL for building a new one
1425 * @atom:  the atom generating the transition
1426 *
1427 * Returns 0 if success and -1 in case of error.
1428 */
1429static int
1430xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1431	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1432    if (atom == NULL) {
1433	ERROR("genrate transition: atom == NULL");
1434	return(-1);
1435    }
1436    if (atom->type == XML_REGEXP_SUBREG) {
1437	/*
1438	 * this is a subexpression handling one should not need to
1439	 * create a new node except for XML_REGEXP_QUANT_RANGE.
1440	 */
1441	if (xmlRegAtomPush(ctxt, atom) < 0) {
1442	    return(-1);
1443	}
1444	if ((to != NULL) && (atom->stop != to) &&
1445	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1446	    /*
1447	     * Generate an epsilon transition to link to the target
1448	     */
1449	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1450	}
1451	switch (atom->quant) {
1452	    case XML_REGEXP_QUANT_OPT:
1453		atom->quant = XML_REGEXP_QUANT_ONCE;
1454		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1455		break;
1456	    case XML_REGEXP_QUANT_MULT:
1457		atom->quant = XML_REGEXP_QUANT_ONCE;
1458		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1459		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1460		break;
1461	    case XML_REGEXP_QUANT_PLUS:
1462		atom->quant = XML_REGEXP_QUANT_ONCE;
1463		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1464		break;
1465	    case XML_REGEXP_QUANT_RANGE: {
1466		int counter;
1467		xmlRegStatePtr newstate;
1468
1469		/*
1470		 * This one is nasty:
1471		 *   1/ if range has minOccurs == 0, create a new state
1472		 *	and create epsilon transitions from atom->start
1473		 *	to atom->stop, as well as atom->start to the new
1474		 *	state
1475		 *   2/ register a new counter
1476		 *   3/ register an epsilon transition associated to
1477		 *      this counter going from atom->stop to atom->start
1478		 *   4/ create a new state
1479		 *   5/ generate a counted transition from atom->stop to
1480		 *      that state
1481		 */
1482		if (atom->min == 0) {
1483		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1484			atom->stop);
1485		    newstate = xmlRegNewState(ctxt);
1486		    xmlRegStatePush(ctxt, newstate);
1487		    ctxt->state = newstate;
1488		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
1489			newstate);
1490		}
1491		counter = xmlRegGetCounter(ctxt);
1492		ctxt->counters[counter].min = atom->min - 1;
1493		ctxt->counters[counter].max = atom->max - 1;
1494		atom->min = 0;
1495		atom->max = 0;
1496		atom->quant = XML_REGEXP_QUANT_ONCE;
1497		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1498			                              atom->start, counter);
1499		if (to != NULL) {
1500		    newstate = to;
1501		} else {
1502		    newstate = xmlRegNewState(ctxt);
1503		    xmlRegStatePush(ctxt, newstate);
1504		    ctxt->state = newstate;
1505		}
1506		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1507			                       newstate, counter);
1508	    }
1509	    default:
1510		break;
1511	}
1512	return(0);
1513    } else if ((atom->min == 0) && (atom->max == 0) &&
1514               (atom->quant == XML_REGEXP_QUANT_RANGE)) {
1515        /*
1516	 * we can discard the atom and generate an epsilon transition instead
1517	 */
1518	if (to == NULL) {
1519	    to = xmlRegNewState(ctxt);
1520	    if (to != NULL)
1521		xmlRegStatePush(ctxt, to);
1522	    else {
1523		return(-1);
1524	    }
1525	}
1526	xmlFAGenerateEpsilonTransition(ctxt, from, to);
1527	ctxt->state = to;
1528	xmlRegFreeAtom(atom);
1529	return(0);
1530    } else {
1531	if (to == NULL) {
1532	    to = xmlRegNewState(ctxt);
1533	    if (to != NULL)
1534		xmlRegStatePush(ctxt, to);
1535	    else {
1536		return(-1);
1537	    }
1538	}
1539	if (xmlRegAtomPush(ctxt, atom) < 0) {
1540	    return(-1);
1541	}
1542	xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1, 0);
1543	ctxt->state = to;
1544    }
1545    switch (atom->quant) {
1546	case XML_REGEXP_QUANT_OPT:
1547	    atom->quant = XML_REGEXP_QUANT_ONCE;
1548	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1549	    break;
1550	case XML_REGEXP_QUANT_MULT:
1551	    atom->quant = XML_REGEXP_QUANT_ONCE;
1552	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1553	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
1554	    break;
1555	case XML_REGEXP_QUANT_PLUS:
1556	    atom->quant = XML_REGEXP_QUANT_ONCE;
1557	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
1558	    break;
1559	default:
1560	    break;
1561    }
1562    return(0);
1563}
1564
1565/**
1566 * xmlFAReduceEpsilonTransitions:
1567 * @ctxt:  a regexp parser context
1568 * @fromnr:  the from state
1569 * @tonr:  the to state
1570 * @counter:  should that transition be associated to a counted
1571 *
1572 */
1573static void
1574xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1575	                      int tonr, int counter) {
1576    int transnr;
1577    xmlRegStatePtr from;
1578    xmlRegStatePtr to;
1579
1580#ifdef DEBUG_REGEXP_GRAPH
1581    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1582#endif
1583    from = ctxt->states[fromnr];
1584    if (from == NULL)
1585	return;
1586    to = ctxt->states[tonr];
1587    if (to == NULL)
1588	return;
1589    if ((to->mark == XML_REGEXP_MARK_START) ||
1590	(to->mark == XML_REGEXP_MARK_VISITED))
1591	return;
1592
1593    to->mark = XML_REGEXP_MARK_VISITED;
1594    if (to->type == XML_REGEXP_FINAL_STATE) {
1595#ifdef DEBUG_REGEXP_GRAPH
1596	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1597#endif
1598	from->type = XML_REGEXP_FINAL_STATE;
1599    }
1600    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1601        if (to->trans[transnr].to < 0)
1602	    continue;
1603	if (to->trans[transnr].atom == NULL) {
1604	    /*
1605	     * Don't remove counted transitions
1606	     * Don't loop either
1607	     */
1608	    if (to->trans[transnr].to != fromnr) {
1609		if (to->trans[transnr].count >= 0) {
1610		    int newto = to->trans[transnr].to;
1611
1612		    xmlRegStateAddTrans(ctxt, from, NULL,
1613					ctxt->states[newto],
1614					-1, to->trans[transnr].count, 0);
1615		} else {
1616#ifdef DEBUG_REGEXP_GRAPH
1617		    printf("Found epsilon trans %d from %d to %d\n",
1618			   transnr, tonr, to->trans[transnr].to);
1619#endif
1620		    if (to->trans[transnr].counter >= 0) {
1621			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1622					      to->trans[transnr].to,
1623					      to->trans[transnr].counter);
1624		    } else {
1625			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1626					      to->trans[transnr].to,
1627					      counter);
1628		    }
1629		}
1630	    }
1631	} else {
1632	    int newto = to->trans[transnr].to;
1633
1634	    if (to->trans[transnr].counter >= 0) {
1635		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1636				    ctxt->states[newto],
1637				    to->trans[transnr].counter, -1, 1);
1638	    } else {
1639		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1640				    ctxt->states[newto], counter, -1, 1);
1641	    }
1642	}
1643    }
1644    to->mark = XML_REGEXP_MARK_NORMAL;
1645}
1646
1647/**
1648 * xmlFAEliminateSimpleEpsilonTransitions:
1649 * @ctxt:  a regexp parser context
1650 *
1651 * Eliminating general epsilon transitions can get costly in the general
1652 * algorithm due to the large amount of generated new transitions and
1653 * associated comparisons. However for simple epsilon transition used just
1654 * to separate building blocks when generating the automata this can be
1655 * reduced to state elimination:
1656 *    - if there exists an epsilon from X to Y
1657 *    - if there is no other transition from X
1658 * then X and Y are semantically equivalent and X can be eliminated
1659 * If X is the start state then make Y the start state, else replace the
1660 * target of all transitions to X by transitions to Y.
1661 */
1662static void
1663xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1664    int statenr, i, j, newto;
1665    xmlRegStatePtr state, tmp;
1666
1667    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1668	state = ctxt->states[statenr];
1669	if (state == NULL)
1670	    continue;
1671	if (state->nbTrans != 1)
1672	    continue;
1673	/* is the only transition out a basic transition */
1674	if ((state->trans[0].atom == NULL) &&
1675	    (state->trans[0].to >= 0) &&
1676	    (state->trans[0].to != statenr) &&
1677	    (state->trans[0].counter < 0) &&
1678	    (state->trans[0].count < 0)) {
1679	    newto = state->trans[0].to;
1680
1681            if (state->type == XML_REGEXP_START_STATE) {
1682#ifdef DEBUG_REGEXP_GRAPH
1683		printf("Found simple epsilon trans from start %d to %d\n",
1684		       statenr, newto);
1685#endif
1686            } else {
1687#ifdef DEBUG_REGEXP_GRAPH
1688		printf("Found simple epsilon trans from %d to %d\n",
1689		       statenr, newto);
1690#endif
1691	        for (i = 0;i < state->nbTransTo;i++) {
1692		    tmp = ctxt->states[state->transTo[i]];
1693		    for (j = 0;j < tmp->nbTrans;j++) {
1694			if (tmp->trans[j].to == statenr) {
1695			    tmp->trans[j].to = newto;
1696#ifdef DEBUG_REGEXP_GRAPH
1697			    printf("Changed transition %d on %d to go to %d\n",
1698				   j, tmp->no, newto);
1699#endif
1700                            xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
1701			                          tmp->no);
1702			}
1703		    }
1704		}
1705#if 0
1706	        for (i = 0;i < ctxt->nbStates;i++) {
1707		    tmp = ctxt->states[i];
1708		    for (j = 0;j < tmp->nbTrans;j++) {
1709			if (tmp->trans[j].to == statenr) {
1710			    tmp->trans[j].to = newto;
1711#ifdef DEBUG_REGEXP_GRAPH
1712			    printf("Changed transition %d on %d to go to %d\n",
1713				   j, tmp->no, newto);
1714#endif
1715			}
1716		    }
1717		}
1718#endif
1719		if (state->type == XML_REGEXP_FINAL_STATE)
1720		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
1721		/* eliminate the transition completely */
1722		state->nbTrans = 0;
1723
1724
1725	    }
1726
1727	}
1728    }
1729}
1730/**
1731 * xmlFAEliminateEpsilonTransitions:
1732 * @ctxt:  a regexp parser context
1733 *
1734 */
1735static void
1736xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1737    int statenr, transnr;
1738    xmlRegStatePtr state;
1739    int has_epsilon;
1740
1741    if (ctxt->states == NULL) return;
1742
1743    xmlFAEliminateSimpleEpsilonTransitions(ctxt);
1744
1745    has_epsilon = 0;
1746
1747    /*
1748     * build the completed transitions bypassing the epsilons
1749     * Use a marking algorithm to avoid loops
1750     * mark sink states too.
1751     */
1752    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1753	state = ctxt->states[statenr];
1754	if (state == NULL)
1755	    continue;
1756	if ((state->nbTrans == 0) &&
1757	    (state->type != XML_REGEXP_FINAL_STATE)) {
1758	    state->type = XML_REGEXP_SINK_STATE;
1759	}
1760	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1761	    if ((state->trans[transnr].atom == NULL) &&
1762		(state->trans[transnr].to >= 0)) {
1763		if (state->trans[transnr].to == statenr) {
1764		    state->trans[transnr].to = -1;
1765#ifdef DEBUG_REGEXP_GRAPH
1766		    printf("Removed loopback epsilon trans %d on %d\n",
1767			   transnr, statenr);
1768#endif
1769		} else if (state->trans[transnr].count < 0) {
1770		    int newto = state->trans[transnr].to;
1771
1772#ifdef DEBUG_REGEXP_GRAPH
1773		    printf("Found epsilon trans %d from %d to %d\n",
1774			   transnr, statenr, newto);
1775#endif
1776		    state->mark = XML_REGEXP_MARK_START;
1777		    has_epsilon = 1;
1778		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1779				      newto, state->trans[transnr].counter);
1780		    state->mark = XML_REGEXP_MARK_NORMAL;
1781#ifdef DEBUG_REGEXP_GRAPH
1782		} else {
1783		    printf("Found counted transition %d on %d\n",
1784			   transnr, statenr);
1785#endif
1786	        }
1787	    }
1788	}
1789    }
1790    /*
1791     * Eliminate the epsilon transitions
1792     */
1793    if (has_epsilon) {
1794	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1795	    state = ctxt->states[statenr];
1796	    if (state == NULL)
1797		continue;
1798	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1799		xmlRegTransPtr trans = &(state->trans[transnr]);
1800		if ((trans->atom == NULL) &&
1801		    (trans->count < 0) &&
1802		    (trans->to >= 0)) {
1803		    trans->to = -1;
1804		}
1805	    }
1806	}
1807    }
1808
1809    /*
1810     * Use this pass to detect unreachable states too
1811     */
1812    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1813	state = ctxt->states[statenr];
1814	if (state != NULL)
1815	    state->reached = XML_REGEXP_MARK_NORMAL;
1816    }
1817    state = ctxt->states[0];
1818    if (state != NULL)
1819	state->reached = XML_REGEXP_MARK_START;
1820    while (state != NULL) {
1821	xmlRegStatePtr target = NULL;
1822	state->reached = XML_REGEXP_MARK_VISITED;
1823	/*
1824	 * Mark all states reachable from the current reachable state
1825	 */
1826	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1827	    if ((state->trans[transnr].to >= 0) &&
1828		((state->trans[transnr].atom != NULL) ||
1829		 (state->trans[transnr].count >= 0))) {
1830		int newto = state->trans[transnr].to;
1831
1832		if (ctxt->states[newto] == NULL)
1833		    continue;
1834		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
1835		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
1836		    target = ctxt->states[newto];
1837		}
1838	    }
1839	}
1840
1841	/*
1842	 * find the next accessible state not explored
1843	 */
1844	if (target == NULL) {
1845	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1846		state = ctxt->states[statenr];
1847		if ((state != NULL) && (state->reached ==
1848			XML_REGEXP_MARK_START)) {
1849		    target = state;
1850		    break;
1851		}
1852	    }
1853	}
1854	state = target;
1855    }
1856    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1857	state = ctxt->states[statenr];
1858	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
1859#ifdef DEBUG_REGEXP_GRAPH
1860	    printf("Removed unreachable state %d\n", statenr);
1861#endif
1862	    xmlRegFreeState(state);
1863	    ctxt->states[statenr] = NULL;
1864	}
1865    }
1866
1867}
1868
1869/**
1870 * xmlFACompareAtoms:
1871 * @atom1:  an atom
1872 * @atom2:  an atom
1873 *
1874 * Compares two atoms to check whether they are equivalents
1875 *
1876 * Returns 1 if yes and 0 otherwise
1877 */
1878static int
1879xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
1880    int ret;
1881
1882    if (atom1 == atom2)
1883	return(1);
1884    if ((atom1 == NULL) || (atom2 == NULL))
1885	return(0);
1886
1887    if (atom1->type != atom2->type)
1888	return(0);
1889    switch (atom1->type) {
1890        case XML_REGEXP_STRING:
1891	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
1892	                                 (xmlChar *)atom2->valuep);
1893	    break;
1894        case XML_REGEXP_EPSILON:
1895	    return(1);
1896        case XML_REGEXP_CHARVAL:
1897	    ret = atom1->codepoint == atom2->codepoint;
1898	    break;
1899        case XML_REGEXP_RANGES:
1900	    TODO;
1901	    return(0);
1902	default:
1903	    return(1);
1904    }
1905    if (atom1->neg != atom2->neg)
1906        ret = !ret;
1907    return(ret);
1908}
1909
1910/**
1911 * xmlFARecurseDeterminism:
1912 * @ctxt:  a regexp parser context
1913 *
1914 * Check whether the associated regexp is determinist,
1915 * should be called after xmlFAEliminateEpsilonTransitions()
1916 *
1917 */
1918static int
1919xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1920	                 int to, xmlRegAtomPtr atom) {
1921    int ret = 1;
1922    int transnr;
1923    xmlRegTransPtr t1;
1924
1925    if (state == NULL)
1926	return(ret);
1927    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1928	t1 = &(state->trans[transnr]);
1929	/*
1930	 * check transitions conflicting with the one looked at
1931	 */
1932	if (t1->atom == NULL) {
1933	    if (t1->to == -1)
1934		continue;
1935	    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1936		                           to, atom);
1937	    if (ret == 0)
1938		return(0);
1939	    continue;
1940	}
1941	if (t1->to != to)
1942	    continue;
1943	if (xmlFACompareAtoms(t1->atom, atom))
1944	    return(0);
1945    }
1946    return(ret);
1947}
1948
1949/**
1950 * xmlFAComputesDeterminism:
1951 * @ctxt:  a regexp parser context
1952 *
1953 * Check whether the associated regexp is determinist,
1954 * should be called after xmlFAEliminateEpsilonTransitions()
1955 *
1956 */
1957static int
1958xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
1959    int statenr, transnr;
1960    xmlRegStatePtr state;
1961    xmlRegTransPtr t1, t2;
1962    int i;
1963    int ret = 1;
1964
1965#ifdef DEBUG_REGEXP_GRAPH
1966    printf("xmlFAComputesDeterminism\n");
1967    xmlRegPrintCtxt(stdout, ctxt);
1968#endif
1969    if (ctxt->determinist != -1)
1970	return(ctxt->determinist);
1971
1972    /*
1973     * Check for all states that there aren't 2 transitions
1974     * with the same atom and a different target.
1975     */
1976    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1977	state = ctxt->states[statenr];
1978	if (state == NULL)
1979	    continue;
1980	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1981	    t1 = &(state->trans[transnr]);
1982	    /*
1983	     * Determinism checks in case of counted or all transitions
1984	     * will have to be handled separately
1985	     */
1986	    if (t1->atom == NULL)
1987		continue;
1988	    if (t1->to == -1) /* eliminated */
1989		continue;
1990	    for (i = 0;i < transnr;i++) {
1991		t2 = &(state->trans[i]);
1992		if (t2->to == -1) /* eliminated */
1993		    continue;
1994		if (t2->atom != NULL) {
1995		    if (t1->to == t2->to) {
1996			if (xmlFACompareAtoms(t1->atom, t2->atom))
1997			    t2->to = -1; /* eliminated */
1998		    } else {
1999			/* not determinist ! */
2000			if (xmlFACompareAtoms(t1->atom, t2->atom))
2001			    ret = 0;
2002		    }
2003		} else if (t1->to != -1) {
2004		    /*
2005		     * do the closure in case of remaining specific
2006		     * epsilon transitions like choices or all
2007		     */
2008		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
2009						   t2->to, t2->atom);
2010		    if (ret == 0)
2011			return(0);
2012		}
2013	    }
2014	    if (ret == 0)
2015		break;
2016	}
2017	if (ret == 0)
2018	    break;
2019    }
2020    ctxt->determinist = ret;
2021    return(ret);
2022}
2023
2024/************************************************************************
2025 * 									*
2026 *	Routines to check input against transition atoms		*
2027 * 									*
2028 ************************************************************************/
2029
2030static int
2031xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
2032	                  int start, int end, const xmlChar *blockName) {
2033    int ret = 0;
2034
2035    switch (type) {
2036        case XML_REGEXP_STRING:
2037        case XML_REGEXP_SUBREG:
2038        case XML_REGEXP_RANGES:
2039        case XML_REGEXP_EPSILON:
2040	    return(-1);
2041        case XML_REGEXP_ANYCHAR:
2042	    ret = ((codepoint != '\n') && (codepoint != '\r'));
2043	    break;
2044        case XML_REGEXP_CHARVAL:
2045	    ret = ((codepoint >= start) && (codepoint <= end));
2046	    break;
2047        case XML_REGEXP_NOTSPACE:
2048	    neg = !neg;
2049        case XML_REGEXP_ANYSPACE:
2050	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
2051		   (codepoint == '\t') || (codepoint == ' '));
2052	    break;
2053        case XML_REGEXP_NOTINITNAME:
2054	    neg = !neg;
2055        case XML_REGEXP_INITNAME:
2056	    ret = (IS_LETTER(codepoint) ||
2057		   (codepoint == '_') || (codepoint == ':'));
2058	    break;
2059        case XML_REGEXP_NOTNAMECHAR:
2060	    neg = !neg;
2061        case XML_REGEXP_NAMECHAR:
2062	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
2063		   (codepoint == '.') || (codepoint == '-') ||
2064		   (codepoint == '_') || (codepoint == ':') ||
2065		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
2066	    break;
2067        case XML_REGEXP_NOTDECIMAL:
2068	    neg = !neg;
2069        case XML_REGEXP_DECIMAL:
2070	    ret = xmlUCSIsCatNd(codepoint);
2071	    break;
2072        case XML_REGEXP_REALCHAR:
2073	    neg = !neg;
2074        case XML_REGEXP_NOTREALCHAR:
2075	    ret = xmlUCSIsCatP(codepoint);
2076	    if (ret == 0)
2077		ret = xmlUCSIsCatZ(codepoint);
2078	    if (ret == 0)
2079		ret = xmlUCSIsCatC(codepoint);
2080	    break;
2081        case XML_REGEXP_LETTER:
2082	    ret = xmlUCSIsCatL(codepoint);
2083	    break;
2084        case XML_REGEXP_LETTER_UPPERCASE:
2085	    ret = xmlUCSIsCatLu(codepoint);
2086	    break;
2087        case XML_REGEXP_LETTER_LOWERCASE:
2088	    ret = xmlUCSIsCatLl(codepoint);
2089	    break;
2090        case XML_REGEXP_LETTER_TITLECASE:
2091	    ret = xmlUCSIsCatLt(codepoint);
2092	    break;
2093        case XML_REGEXP_LETTER_MODIFIER:
2094	    ret = xmlUCSIsCatLm(codepoint);
2095	    break;
2096        case XML_REGEXP_LETTER_OTHERS:
2097	    ret = xmlUCSIsCatLo(codepoint);
2098	    break;
2099        case XML_REGEXP_MARK:
2100	    ret = xmlUCSIsCatM(codepoint);
2101	    break;
2102        case XML_REGEXP_MARK_NONSPACING:
2103	    ret = xmlUCSIsCatMn(codepoint);
2104	    break;
2105        case XML_REGEXP_MARK_SPACECOMBINING:
2106	    ret = xmlUCSIsCatMc(codepoint);
2107	    break;
2108        case XML_REGEXP_MARK_ENCLOSING:
2109	    ret = xmlUCSIsCatMe(codepoint);
2110	    break;
2111        case XML_REGEXP_NUMBER:
2112	    ret = xmlUCSIsCatN(codepoint);
2113	    break;
2114        case XML_REGEXP_NUMBER_DECIMAL:
2115	    ret = xmlUCSIsCatNd(codepoint);
2116	    break;
2117        case XML_REGEXP_NUMBER_LETTER:
2118	    ret = xmlUCSIsCatNl(codepoint);
2119	    break;
2120        case XML_REGEXP_NUMBER_OTHERS:
2121	    ret = xmlUCSIsCatNo(codepoint);
2122	    break;
2123        case XML_REGEXP_PUNCT:
2124	    ret = xmlUCSIsCatP(codepoint);
2125	    break;
2126        case XML_REGEXP_PUNCT_CONNECTOR:
2127	    ret = xmlUCSIsCatPc(codepoint);
2128	    break;
2129        case XML_REGEXP_PUNCT_DASH:
2130	    ret = xmlUCSIsCatPd(codepoint);
2131	    break;
2132        case XML_REGEXP_PUNCT_OPEN:
2133	    ret = xmlUCSIsCatPs(codepoint);
2134	    break;
2135        case XML_REGEXP_PUNCT_CLOSE:
2136	    ret = xmlUCSIsCatPe(codepoint);
2137	    break;
2138        case XML_REGEXP_PUNCT_INITQUOTE:
2139	    ret = xmlUCSIsCatPi(codepoint);
2140	    break;
2141        case XML_REGEXP_PUNCT_FINQUOTE:
2142	    ret = xmlUCSIsCatPf(codepoint);
2143	    break;
2144        case XML_REGEXP_PUNCT_OTHERS:
2145	    ret = xmlUCSIsCatPo(codepoint);
2146	    break;
2147        case XML_REGEXP_SEPAR:
2148	    ret = xmlUCSIsCatZ(codepoint);
2149	    break;
2150        case XML_REGEXP_SEPAR_SPACE:
2151	    ret = xmlUCSIsCatZs(codepoint);
2152	    break;
2153        case XML_REGEXP_SEPAR_LINE:
2154	    ret = xmlUCSIsCatZl(codepoint);
2155	    break;
2156        case XML_REGEXP_SEPAR_PARA:
2157	    ret = xmlUCSIsCatZp(codepoint);
2158	    break;
2159        case XML_REGEXP_SYMBOL:
2160	    ret = xmlUCSIsCatS(codepoint);
2161	    break;
2162        case XML_REGEXP_SYMBOL_MATH:
2163	    ret = xmlUCSIsCatSm(codepoint);
2164	    break;
2165        case XML_REGEXP_SYMBOL_CURRENCY:
2166	    ret = xmlUCSIsCatSc(codepoint);
2167	    break;
2168        case XML_REGEXP_SYMBOL_MODIFIER:
2169	    ret = xmlUCSIsCatSk(codepoint);
2170	    break;
2171        case XML_REGEXP_SYMBOL_OTHERS:
2172	    ret = xmlUCSIsCatSo(codepoint);
2173	    break;
2174        case XML_REGEXP_OTHER:
2175	    ret = xmlUCSIsCatC(codepoint);
2176	    break;
2177        case XML_REGEXP_OTHER_CONTROL:
2178	    ret = xmlUCSIsCatCc(codepoint);
2179	    break;
2180        case XML_REGEXP_OTHER_FORMAT:
2181	    ret = xmlUCSIsCatCf(codepoint);
2182	    break;
2183        case XML_REGEXP_OTHER_PRIVATE:
2184	    ret = xmlUCSIsCatCo(codepoint);
2185	    break;
2186        case XML_REGEXP_OTHER_NA:
2187	    /* ret = xmlUCSIsCatCn(codepoint); */
2188	    /* Seems it doesn't exist anymore in recent Unicode releases */
2189	    ret = 0;
2190	    break;
2191        case XML_REGEXP_BLOCK_NAME:
2192	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
2193	    break;
2194    }
2195    if (neg)
2196	return(!ret);
2197    return(ret);
2198}
2199
2200static int
2201xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
2202    int i, ret = 0;
2203    xmlRegRangePtr range;
2204
2205    if ((atom == NULL) || (!IS_CHAR(codepoint)))
2206	return(-1);
2207
2208    switch (atom->type) {
2209        case XML_REGEXP_SUBREG:
2210        case XML_REGEXP_EPSILON:
2211	    return(-1);
2212        case XML_REGEXP_CHARVAL:
2213            return(codepoint == atom->codepoint);
2214        case XML_REGEXP_RANGES: {
2215	    int accept = 0;
2216
2217	    for (i = 0;i < atom->nbRanges;i++) {
2218		range = atom->ranges[i];
2219		if (range->neg == 2) {
2220		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2221						0, range->start, range->end,
2222						range->blockName);
2223		    if (ret != 0)
2224			return(0); /* excluded char */
2225		} else if (range->neg) {
2226		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2227						0, range->start, range->end,
2228						range->blockName);
2229		    if (ret == 0)
2230		        accept = 1;
2231		    else
2232		        return(0);
2233		} else {
2234		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
2235						0, range->start, range->end,
2236						range->blockName);
2237		    if (ret != 0)
2238			accept = 1; /* might still be excluded */
2239		}
2240	    }
2241	    return(accept);
2242	}
2243        case XML_REGEXP_STRING:
2244	    printf("TODO: XML_REGEXP_STRING\n");
2245	    return(-1);
2246        case XML_REGEXP_ANYCHAR:
2247        case XML_REGEXP_ANYSPACE:
2248        case XML_REGEXP_NOTSPACE:
2249        case XML_REGEXP_INITNAME:
2250        case XML_REGEXP_NOTINITNAME:
2251        case XML_REGEXP_NAMECHAR:
2252        case XML_REGEXP_NOTNAMECHAR:
2253        case XML_REGEXP_DECIMAL:
2254        case XML_REGEXP_NOTDECIMAL:
2255        case XML_REGEXP_REALCHAR:
2256        case XML_REGEXP_NOTREALCHAR:
2257        case XML_REGEXP_LETTER:
2258        case XML_REGEXP_LETTER_UPPERCASE:
2259        case XML_REGEXP_LETTER_LOWERCASE:
2260        case XML_REGEXP_LETTER_TITLECASE:
2261        case XML_REGEXP_LETTER_MODIFIER:
2262        case XML_REGEXP_LETTER_OTHERS:
2263        case XML_REGEXP_MARK:
2264        case XML_REGEXP_MARK_NONSPACING:
2265        case XML_REGEXP_MARK_SPACECOMBINING:
2266        case XML_REGEXP_MARK_ENCLOSING:
2267        case XML_REGEXP_NUMBER:
2268        case XML_REGEXP_NUMBER_DECIMAL:
2269        case XML_REGEXP_NUMBER_LETTER:
2270        case XML_REGEXP_NUMBER_OTHERS:
2271        case XML_REGEXP_PUNCT:
2272        case XML_REGEXP_PUNCT_CONNECTOR:
2273        case XML_REGEXP_PUNCT_DASH:
2274        case XML_REGEXP_PUNCT_OPEN:
2275        case XML_REGEXP_PUNCT_CLOSE:
2276        case XML_REGEXP_PUNCT_INITQUOTE:
2277        case XML_REGEXP_PUNCT_FINQUOTE:
2278        case XML_REGEXP_PUNCT_OTHERS:
2279        case XML_REGEXP_SEPAR:
2280        case XML_REGEXP_SEPAR_SPACE:
2281        case XML_REGEXP_SEPAR_LINE:
2282        case XML_REGEXP_SEPAR_PARA:
2283        case XML_REGEXP_SYMBOL:
2284        case XML_REGEXP_SYMBOL_MATH:
2285        case XML_REGEXP_SYMBOL_CURRENCY:
2286        case XML_REGEXP_SYMBOL_MODIFIER:
2287        case XML_REGEXP_SYMBOL_OTHERS:
2288        case XML_REGEXP_OTHER:
2289        case XML_REGEXP_OTHER_CONTROL:
2290        case XML_REGEXP_OTHER_FORMAT:
2291        case XML_REGEXP_OTHER_PRIVATE:
2292        case XML_REGEXP_OTHER_NA:
2293	case XML_REGEXP_BLOCK_NAME:
2294	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
2295		                            (const xmlChar *)atom->valuep);
2296	    if (atom->neg)
2297		ret = !ret;
2298	    break;
2299    }
2300    return(ret);
2301}
2302
2303/************************************************************************
2304 * 									*
2305 *	Saving and restoring state of an execution context		*
2306 * 									*
2307 ************************************************************************/
2308
2309#ifdef DEBUG_REGEXP_EXEC
2310static void
2311xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
2312    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
2313    if (exec->inputStack != NULL) {
2314	int i;
2315	printf(": ");
2316	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
2317	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
2318    } else {
2319	printf(": %s", &(exec->inputString[exec->index]));
2320    }
2321    printf("\n");
2322}
2323#endif
2324
2325static void
2326xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
2327#ifdef DEBUG_REGEXP_EXEC
2328    printf("saving ");
2329    exec->transno++;
2330    xmlFARegDebugExec(exec);
2331    exec->transno--;
2332#endif
2333
2334    if (exec->maxRollbacks == 0) {
2335	exec->maxRollbacks = 4;
2336	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
2337		                             sizeof(xmlRegExecRollback));
2338	if (exec->rollbacks == NULL) {
2339	    xmlRegexpErrMemory(NULL, "saving regexp");
2340	    exec->maxRollbacks = 0;
2341	    return;
2342	}
2343	memset(exec->rollbacks, 0,
2344	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
2345    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
2346	xmlRegExecRollback *tmp;
2347	int len = exec->maxRollbacks;
2348
2349	exec->maxRollbacks *= 2;
2350	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
2351			exec->maxRollbacks * sizeof(xmlRegExecRollback));
2352	if (tmp == NULL) {
2353	    xmlRegexpErrMemory(NULL, "saving regexp");
2354	    exec->maxRollbacks /= 2;
2355	    return;
2356	}
2357	exec->rollbacks = tmp;
2358	tmp = &exec->rollbacks[len];
2359	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
2360    }
2361    exec->rollbacks[exec->nbRollbacks].state = exec->state;
2362    exec->rollbacks[exec->nbRollbacks].index = exec->index;
2363    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
2364    if (exec->comp->nbCounters > 0) {
2365	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2366	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
2367		xmlMalloc(exec->comp->nbCounters * sizeof(int));
2368	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2369		xmlRegexpErrMemory(NULL, "saving regexp");
2370		exec->status = -5;
2371		return;
2372	    }
2373	}
2374	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
2375	       exec->comp->nbCounters * sizeof(int));
2376    }
2377    exec->nbRollbacks++;
2378}
2379
2380static void
2381xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
2382    if (exec->nbRollbacks <= 0) {
2383	exec->status = -1;
2384#ifdef DEBUG_REGEXP_EXEC
2385	printf("rollback failed on empty stack\n");
2386#endif
2387	return;
2388    }
2389    exec->nbRollbacks--;
2390    exec->state = exec->rollbacks[exec->nbRollbacks].state;
2391    exec->index = exec->rollbacks[exec->nbRollbacks].index;
2392    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
2393    if (exec->comp->nbCounters > 0) {
2394	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2395	    fprintf(stderr, "exec save: allocation failed");
2396	    exec->status = -6;
2397	    return;
2398	}
2399	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
2400	       exec->comp->nbCounters * sizeof(int));
2401    }
2402
2403#ifdef DEBUG_REGEXP_EXEC
2404    printf("restored ");
2405    xmlFARegDebugExec(exec);
2406#endif
2407}
2408
2409/************************************************************************
2410 * 									*
2411 *	Verifier, running an input against a compiled regexp		*
2412 * 									*
2413 ************************************************************************/
2414
2415static int
2416xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
2417    xmlRegExecCtxt execval;
2418    xmlRegExecCtxtPtr exec = &execval;
2419    int ret, codepoint = 0, len;
2420
2421    exec->inputString = content;
2422    exec->index = 0;
2423    exec->determinist = 1;
2424    exec->maxRollbacks = 0;
2425    exec->nbRollbacks = 0;
2426    exec->rollbacks = NULL;
2427    exec->status = 0;
2428    exec->comp = comp;
2429    exec->state = comp->states[0];
2430    exec->transno = 0;
2431    exec->transcount = 0;
2432    exec->inputStack = NULL;
2433    exec->inputStackMax = 0;
2434    if (comp->nbCounters > 0) {
2435	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2436	if (exec->counts == NULL) {
2437	    xmlRegexpErrMemory(NULL, "running regexp");
2438	    return(-1);
2439	}
2440        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2441    } else
2442	exec->counts = NULL;
2443    while ((exec->status == 0) &&
2444	   ((exec->inputString[exec->index] != 0) ||
2445	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2446	xmlRegTransPtr trans;
2447	xmlRegAtomPtr atom;
2448
2449	/*
2450	 * If end of input on non-terminal state, rollback, however we may
2451	 * still have epsilon like transition for counted transitions
2452	 * on counters, in that case don't break too early.  Additionally,
2453	 * if we are working on a range like "AB{0,2}", where B is not present,
2454	 * we don't want to break.
2455	 */
2456	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
2457	    /*
2458	     * if there is a transition, we must check if
2459	     *  atom allows minOccurs of 0
2460	     */
2461	    if (exec->transno < exec->state->nbTrans) {
2462	        trans = &exec->state->trans[exec->transno];
2463		if (trans->to >=0) {
2464		    atom = trans->atom;
2465		    if (!((atom->min == 0) && (atom->max > 0)))
2466		        goto rollback;
2467		}
2468	    } else
2469	        goto rollback;
2470	}
2471
2472	exec->transcount = 0;
2473	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2474	    trans = &exec->state->trans[exec->transno];
2475	    if (trans->to < 0)
2476		continue;
2477	    atom = trans->atom;
2478	    ret = 0;
2479	    if (trans->count >= 0) {
2480		int count;
2481		xmlRegCounterPtr counter;
2482
2483		/*
2484		 * A counted transition.
2485		 */
2486
2487		count = exec->counts[trans->count];
2488		counter = &exec->comp->counters[trans->count];
2489#ifdef DEBUG_REGEXP_EXEC
2490		printf("testing count %d: val %d, min %d, max %d\n",
2491		       trans->count, count, counter->min,  counter->max);
2492#endif
2493		ret = ((count >= counter->min) && (count <= counter->max));
2494	    } else if (atom == NULL) {
2495		fprintf(stderr, "epsilon transition left at runtime\n");
2496		exec->status = -2;
2497		break;
2498	    } else if (exec->inputString[exec->index] != 0) {
2499                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2500		ret = xmlRegCheckCharacter(atom, codepoint);
2501		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
2502		    xmlRegStatePtr to = comp->states[trans->to];
2503
2504		    /*
2505		     * this is a multiple input sequence
2506		     */
2507		    if (exec->state->nbTrans > exec->transno + 1) {
2508			xmlFARegExecSave(exec);
2509		    }
2510		    exec->transcount = 1;
2511		    do {
2512			/*
2513			 * Try to progress as much as possible on the input
2514			 */
2515			if (exec->transcount == atom->max) {
2516			    break;
2517			}
2518			exec->index += len;
2519			/*
2520			 * End of input: stop here
2521			 */
2522			if (exec->inputString[exec->index] == 0) {
2523			    exec->index -= len;
2524			    break;
2525			}
2526			if (exec->transcount >= atom->min) {
2527			    int transno = exec->transno;
2528			    xmlRegStatePtr state = exec->state;
2529
2530			    /*
2531			     * The transition is acceptable save it
2532			     */
2533			    exec->transno = -1; /* trick */
2534			    exec->state = to;
2535			    xmlFARegExecSave(exec);
2536			    exec->transno = transno;
2537			    exec->state = state;
2538			}
2539			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2540				              len);
2541			ret = xmlRegCheckCharacter(atom, codepoint);
2542			exec->transcount++;
2543		    } while (ret == 1);
2544		    if (exec->transcount < atom->min)
2545			ret = 0;
2546
2547		    /*
2548		     * If the last check failed but one transition was found
2549		     * possible, rollback
2550		     */
2551		    if (ret < 0)
2552			ret = 0;
2553		    if (ret == 0) {
2554			goto rollback;
2555		    }
2556		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
2557		    /*
2558		     * we don't match on the codepoint, but minOccurs of 0
2559		     * says that's ok.  Setting len to 0 inhibits stepping
2560		     * over the codepoint.
2561		     */
2562		    exec->transcount = 1;
2563		    len = 0;
2564		    ret = 1;
2565		}
2566	    } else if ((atom->min == 0) && (atom->max > 0)) {
2567	        /* another spot to match when minOccurs is 0 */
2568		exec->transcount = 1;
2569		len = 0;
2570		ret = 1;
2571	    }
2572	    if (ret == 1) {
2573		if (exec->state->nbTrans > exec->transno + 1) {
2574		    xmlFARegExecSave(exec);
2575		}
2576		if (trans->counter >= 0) {
2577#ifdef DEBUG_REGEXP_EXEC
2578		    printf("Increasing count %d\n", trans->counter);
2579#endif
2580		    exec->counts[trans->counter]++;
2581		}
2582#ifdef DEBUG_REGEXP_EXEC
2583		printf("entering state %d\n", trans->to);
2584#endif
2585		exec->state = comp->states[trans->to];
2586		exec->transno = 0;
2587		if (trans->atom != NULL) {
2588		    exec->index += len;
2589		}
2590		goto progress;
2591	    } else if (ret < 0) {
2592		exec->status = -4;
2593		break;
2594	    }
2595	}
2596	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2597rollback:
2598	    /*
2599	     * Failed to find a way out
2600	     */
2601	    exec->determinist = 0;
2602	    xmlFARegExecRollBack(exec);
2603	}
2604progress:
2605	continue;
2606    }
2607    if (exec->rollbacks != NULL) {
2608	if (exec->counts != NULL) {
2609	    int i;
2610
2611	    for (i = 0;i < exec->maxRollbacks;i++)
2612		if (exec->rollbacks[i].counts != NULL)
2613		    xmlFree(exec->rollbacks[i].counts);
2614	}
2615	xmlFree(exec->rollbacks);
2616    }
2617    if (exec->counts != NULL)
2618	xmlFree(exec->counts);
2619    if (exec->status == 0)
2620	return(1);
2621    if (exec->status == -1)
2622	return(0);
2623    return(exec->status);
2624}
2625
2626/************************************************************************
2627 * 									*
2628 *	Progressive interface to the verifier one atom at a time	*
2629 * 									*
2630 ************************************************************************/
2631#ifdef DEBUG_ERR
2632static void testerr(xmlRegExecCtxtPtr exec);
2633#endif
2634
2635/**
2636 * xmlRegNewExecCtxt:
2637 * @comp: a precompiled regular expression
2638 * @callback: a callback function used for handling progresses in the
2639 *            automata matching phase
2640 * @data: the context data associated to the callback in this context
2641 *
2642 * Build a context used for progressive evaluation of a regexp.
2643 *
2644 * Returns the new context
2645 */
2646xmlRegExecCtxtPtr
2647xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
2648    xmlRegExecCtxtPtr exec;
2649
2650    if (comp == NULL)
2651	return(NULL);
2652    if ((comp->compact == NULL) && (comp->states == NULL))
2653        return(NULL);
2654    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
2655    if (exec == NULL) {
2656	xmlRegexpErrMemory(NULL, "creating execution context");
2657	return(NULL);
2658    }
2659    memset(exec, 0, sizeof(xmlRegExecCtxt));
2660    exec->inputString = NULL;
2661    exec->index = 0;
2662    exec->determinist = 1;
2663    exec->maxRollbacks = 0;
2664    exec->nbRollbacks = 0;
2665    exec->rollbacks = NULL;
2666    exec->status = 0;
2667    exec->comp = comp;
2668    if (comp->compact == NULL)
2669	exec->state = comp->states[0];
2670    exec->transno = 0;
2671    exec->transcount = 0;
2672    exec->callback = callback;
2673    exec->data = data;
2674    if (comp->nbCounters > 0) {
2675        /*
2676	 * For error handling, exec->counts is allocated twice the size
2677	 * the second half is used to store the data in case of rollback
2678	 */
2679	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
2680	                                 * 2);
2681	if (exec->counts == NULL) {
2682	    xmlRegexpErrMemory(NULL, "creating execution context");
2683	    xmlFree(exec);
2684	    return(NULL);
2685	}
2686        memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
2687	exec->errCounts = &exec->counts[comp->nbCounters];
2688    } else {
2689	exec->counts = NULL;
2690	exec->errCounts = NULL;
2691    }
2692    exec->inputStackMax = 0;
2693    exec->inputStackNr = 0;
2694    exec->inputStack = NULL;
2695    exec->errStateNo = -1;
2696    exec->errString = NULL;
2697    return(exec);
2698}
2699
2700/**
2701 * xmlRegFreeExecCtxt:
2702 * @exec: a regular expression evaulation context
2703 *
2704 * Free the structures associated to a regular expression evaulation context.
2705 */
2706void
2707xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
2708    if (exec == NULL)
2709	return;
2710
2711    if (exec->rollbacks != NULL) {
2712	if (exec->counts != NULL) {
2713	    int i;
2714
2715	    for (i = 0;i < exec->maxRollbacks;i++)
2716		if (exec->rollbacks[i].counts != NULL)
2717		    xmlFree(exec->rollbacks[i].counts);
2718	}
2719	xmlFree(exec->rollbacks);
2720    }
2721    if (exec->counts != NULL)
2722	xmlFree(exec->counts);
2723    if (exec->inputStack != NULL) {
2724	int i;
2725
2726	for (i = 0;i < exec->inputStackNr;i++) {
2727	    if (exec->inputStack[i].value != NULL)
2728		xmlFree(exec->inputStack[i].value);
2729	}
2730	xmlFree(exec->inputStack);
2731    }
2732    if (exec->errString != NULL)
2733        xmlFree(exec->errString);
2734    xmlFree(exec);
2735}
2736
2737static void
2738xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2739	                    void *data) {
2740#ifdef DEBUG_PUSH
2741    printf("saving value: %d:%s\n", exec->inputStackNr, value);
2742#endif
2743    if (exec->inputStackMax == 0) {
2744	exec->inputStackMax = 4;
2745	exec->inputStack = (xmlRegInputTokenPtr)
2746	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
2747	if (exec->inputStack == NULL) {
2748	    xmlRegexpErrMemory(NULL, "pushing input string");
2749	    exec->inputStackMax = 0;
2750	    return;
2751	}
2752    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
2753	xmlRegInputTokenPtr tmp;
2754
2755	exec->inputStackMax *= 2;
2756	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
2757			exec->inputStackMax * sizeof(xmlRegInputToken));
2758	if (tmp == NULL) {
2759	    xmlRegexpErrMemory(NULL, "pushing input string");
2760	    exec->inputStackMax /= 2;
2761	    return;
2762	}
2763	exec->inputStack = tmp;
2764    }
2765    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
2766    exec->inputStack[exec->inputStackNr].data = data;
2767    exec->inputStackNr++;
2768    exec->inputStack[exec->inputStackNr].value = NULL;
2769    exec->inputStack[exec->inputStackNr].data = NULL;
2770}
2771
2772/**
2773 * xmlRegStrEqualWildcard:
2774 * @expStr:  the string to be evaluated
2775 * @valStr:  the validation string
2776 *
2777 * Checks if both strings are equal or have the same content. "*"
2778 * can be used as a wildcard in @valStr; "|" is used as a seperator of
2779 * substrings in both @expStr and @valStr.
2780 *
2781 * Returns 1 if the comparison is satisfied and the number of substrings
2782 * is equal, 0 otherwise.
2783 */
2784
2785static int
2786xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
2787    if (expStr == valStr) return(1);
2788    if (expStr == NULL) return(0);
2789    if (valStr == NULL) return(0);
2790    do {
2791	/*
2792	* Eval if we have a wildcard for the current item.
2793	*/
2794        if (*expStr != *valStr) {
2795	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
2796		do {
2797		    if (*valStr == XML_REG_STRING_SEPARATOR)
2798			break;
2799		    valStr++;
2800		} while (*valStr != 0);
2801		continue;
2802	    } else
2803		return(0);
2804	}
2805	expStr++;
2806	valStr++;
2807    } while (*valStr != 0);
2808    if (*expStr != 0)
2809	return (0);
2810    else
2811	return (1);
2812}
2813
2814/**
2815 * xmlRegCompactPushString:
2816 * @exec: a regexp execution context
2817 * @comp:  the precompiled exec with a compact table
2818 * @value: a string token input
2819 * @data: data associated to the token to reuse in callbacks
2820 *
2821 * Push one input token in the execution context
2822 *
2823 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2824 *     a negative value in case of error.
2825 */
2826static int
2827xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
2828	                xmlRegexpPtr comp,
2829	                const xmlChar *value,
2830	                void *data) {
2831    int state = exec->index;
2832    int i, target;
2833
2834    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
2835	return(-1);
2836
2837    if (value == NULL) {
2838	/*
2839	 * are we at a final state ?
2840	 */
2841	if (comp->compact[state * (comp->nbstrings + 1)] ==
2842            XML_REGEXP_FINAL_STATE)
2843	    return(1);
2844	return(0);
2845    }
2846
2847#ifdef DEBUG_PUSH
2848    printf("value pushed: %s\n", value);
2849#endif
2850
2851    /*
2852     * Examine all outside transitions from current state
2853     */
2854    for (i = 0;i < comp->nbstrings;i++) {
2855	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
2856	if ((target > 0) && (target <= comp->nbstates)) {
2857	    target--; /* to avoid 0 */
2858	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
2859		exec->index = target;
2860		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
2861		    exec->callback(exec->data, value,
2862			  comp->transdata[state * comp->nbstrings + i], data);
2863		}
2864#ifdef DEBUG_PUSH
2865		printf("entering state %d\n", target);
2866#endif
2867		if (comp->compact[target * (comp->nbstrings + 1)] ==
2868		    XML_REGEXP_SINK_STATE)
2869		    goto error;
2870
2871		if (comp->compact[target * (comp->nbstrings + 1)] ==
2872		    XML_REGEXP_FINAL_STATE)
2873		    return(1);
2874		return(0);
2875	    }
2876	}
2877    }
2878    /*
2879     * Failed to find an exit transition out from current state for the
2880     * current token
2881     */
2882#ifdef DEBUG_PUSH
2883    printf("failed to find a transition for %s on state %d\n", value, state);
2884#endif
2885error:
2886    if (exec->errString != NULL)
2887        xmlFree(exec->errString);
2888    exec->errString = xmlStrdup(value);
2889    exec->errStateNo = state;
2890    exec->status = -1;
2891#ifdef DEBUG_ERR
2892    testerr(exec);
2893#endif
2894    return(-1);
2895}
2896
2897/**
2898 * xmlRegExecPushString:
2899 * @exec: a regexp execution context or NULL to indicate the end
2900 * @value: a string token input
2901 * @data: data associated to the token to reuse in callbacks
2902 *
2903 * Push one input token in the execution context
2904 *
2905 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2906 *     a negative value in case of error.
2907 */
2908int
2909xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2910	             void *data) {
2911    xmlRegTransPtr trans;
2912    xmlRegAtomPtr atom;
2913    int ret;
2914    int final = 0;
2915    int progress = 1;
2916
2917    if (exec == NULL)
2918	return(-1);
2919    if (exec->comp == NULL)
2920	return(-1);
2921    if (exec->status != 0)
2922	return(exec->status);
2923
2924    if (exec->comp->compact != NULL)
2925	return(xmlRegCompactPushString(exec, exec->comp, value, data));
2926
2927    if (value == NULL) {
2928        if (exec->state->type == XML_REGEXP_FINAL_STATE)
2929	    return(1);
2930	final = 1;
2931    }
2932
2933#ifdef DEBUG_PUSH
2934    printf("value pushed: %s\n", value);
2935#endif
2936    /*
2937     * If we have an active rollback stack push the new value there
2938     * and get back to where we were left
2939     */
2940    if ((value != NULL) && (exec->inputStackNr > 0)) {
2941	xmlFARegExecSaveInputString(exec, value, data);
2942	value = exec->inputStack[exec->index].value;
2943	data = exec->inputStack[exec->index].data;
2944#ifdef DEBUG_PUSH
2945	printf("value loaded: %s\n", value);
2946#endif
2947    }
2948
2949    while ((exec->status == 0) &&
2950	   ((value != NULL) ||
2951	    ((final == 1) &&
2952	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
2953
2954	/*
2955	 * End of input on non-terminal state, rollback, however we may
2956	 * still have epsilon like transition for counted transitions
2957	 * on counters, in that case don't break too early.
2958	 */
2959	if ((value == NULL) && (exec->counts == NULL))
2960	    goto rollback;
2961
2962	exec->transcount = 0;
2963	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2964	    trans = &exec->state->trans[exec->transno];
2965	    if (trans->to < 0)
2966		continue;
2967	    atom = trans->atom;
2968	    ret = 0;
2969	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
2970		int i;
2971		int count;
2972		xmlRegTransPtr t;
2973		xmlRegCounterPtr counter;
2974
2975		ret = 0;
2976
2977#ifdef DEBUG_PUSH
2978		printf("testing all lax %d\n", trans->count);
2979#endif
2980		/*
2981		 * Check all counted transitions from the current state
2982		 */
2983		if ((value == NULL) && (final)) {
2984		    ret = 1;
2985		} else if (value != NULL) {
2986		    for (i = 0;i < exec->state->nbTrans;i++) {
2987			t = &exec->state->trans[i];
2988			if ((t->counter < 0) || (t == trans))
2989			    continue;
2990			counter = &exec->comp->counters[t->counter];
2991			count = exec->counts[t->counter];
2992			if ((count < counter->max) &&
2993		            (t->atom != NULL) &&
2994			    (xmlStrEqual(value, t->atom->valuep))) {
2995			    ret = 0;
2996			    break;
2997			}
2998			if ((count >= counter->min) &&
2999			    (count < counter->max) &&
3000			    (xmlStrEqual(value, t->atom->valuep))) {
3001			    ret = 1;
3002			    break;
3003			}
3004		    }
3005		}
3006	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3007		int i;
3008		int count;
3009		xmlRegTransPtr t;
3010		xmlRegCounterPtr counter;
3011
3012		ret = 1;
3013
3014#ifdef DEBUG_PUSH
3015		printf("testing all %d\n", trans->count);
3016#endif
3017		/*
3018		 * Check all counted transitions from the current state
3019		 */
3020		for (i = 0;i < exec->state->nbTrans;i++) {
3021                    t = &exec->state->trans[i];
3022		    if ((t->counter < 0) || (t == trans))
3023			continue;
3024                    counter = &exec->comp->counters[t->counter];
3025		    count = exec->counts[t->counter];
3026		    if ((count < counter->min) || (count > counter->max)) {
3027			ret = 0;
3028			break;
3029		    }
3030		}
3031	    } else if (trans->count >= 0) {
3032		int count;
3033		xmlRegCounterPtr counter;
3034
3035		/*
3036		 * A counted transition.
3037		 */
3038
3039		count = exec->counts[trans->count];
3040		counter = &exec->comp->counters[trans->count];
3041#ifdef DEBUG_PUSH
3042		printf("testing count %d: val %d, min %d, max %d\n",
3043		       trans->count, count, counter->min,  counter->max);
3044#endif
3045		ret = ((count >= counter->min) && (count <= counter->max));
3046	    } else if (atom == NULL) {
3047		fprintf(stderr, "epsilon transition left at runtime\n");
3048		exec->status = -2;
3049		break;
3050	    } else if (value != NULL) {
3051		ret = xmlRegStrEqualWildcard(atom->valuep, value);
3052		if (atom->neg)
3053		    ret = !ret;
3054		if ((ret == 1) && (trans->counter >= 0)) {
3055		    xmlRegCounterPtr counter;
3056		    int count;
3057
3058		    count = exec->counts[trans->counter];
3059		    counter = &exec->comp->counters[trans->counter];
3060		    if (count >= counter->max)
3061			ret = 0;
3062		}
3063
3064		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3065		    xmlRegStatePtr to = exec->comp->states[trans->to];
3066
3067		    /*
3068		     * this is a multiple input sequence
3069		     */
3070		    if (exec->state->nbTrans > exec->transno + 1) {
3071			if (exec->inputStackNr <= 0) {
3072			    xmlFARegExecSaveInputString(exec, value, data);
3073			}
3074			xmlFARegExecSave(exec);
3075		    }
3076		    exec->transcount = 1;
3077		    do {
3078			/*
3079			 * Try to progress as much as possible on the input
3080			 */
3081			if (exec->transcount == atom->max) {
3082			    break;
3083			}
3084			exec->index++;
3085			value = exec->inputStack[exec->index].value;
3086			data = exec->inputStack[exec->index].data;
3087#ifdef DEBUG_PUSH
3088			printf("value loaded: %s\n", value);
3089#endif
3090
3091			/*
3092			 * End of input: stop here
3093			 */
3094			if (value == NULL) {
3095			    exec->index --;
3096			    break;
3097			}
3098			if (exec->transcount >= atom->min) {
3099			    int transno = exec->transno;
3100			    xmlRegStatePtr state = exec->state;
3101
3102			    /*
3103			     * The transition is acceptable save it
3104			     */
3105			    exec->transno = -1; /* trick */
3106			    exec->state = to;
3107			    if (exec->inputStackNr <= 0) {
3108				xmlFARegExecSaveInputString(exec, value, data);
3109			    }
3110			    xmlFARegExecSave(exec);
3111			    exec->transno = transno;
3112			    exec->state = state;
3113			}
3114			ret = xmlStrEqual(value, atom->valuep);
3115			exec->transcount++;
3116		    } while (ret == 1);
3117		    if (exec->transcount < atom->min)
3118			ret = 0;
3119
3120		    /*
3121		     * If the last check failed but one transition was found
3122		     * possible, rollback
3123		     */
3124		    if (ret < 0)
3125			ret = 0;
3126		    if (ret == 0) {
3127			goto rollback;
3128		    }
3129		}
3130	    }
3131	    if (ret == 1) {
3132		if ((exec->callback != NULL) && (atom != NULL) &&
3133			(data != NULL)) {
3134		    exec->callback(exec->data, atom->valuep,
3135			           atom->data, data);
3136		}
3137		if (exec->state->nbTrans > exec->transno + 1) {
3138		    if (exec->inputStackNr <= 0) {
3139			xmlFARegExecSaveInputString(exec, value, data);
3140		    }
3141		    xmlFARegExecSave(exec);
3142		}
3143		if (trans->counter >= 0) {
3144#ifdef DEBUG_PUSH
3145		    printf("Increasing count %d\n", trans->counter);
3146#endif
3147		    exec->counts[trans->counter]++;
3148		}
3149#ifdef DEBUG_PUSH
3150		printf("entering state %d\n", trans->to);
3151#endif
3152                if ((exec->comp->states[trans->to] != NULL) &&
3153		    (exec->comp->states[trans->to]->type ==
3154		     XML_REGEXP_SINK_STATE)) {
3155		    /*
3156		     * entering a sink state, save the current state as error
3157		     * state.
3158		     */
3159		    if (exec->errString != NULL)
3160			xmlFree(exec->errString);
3161		    exec->errString = xmlStrdup(value);
3162		    exec->errState = exec->state;
3163		    memcpy(exec->errCounts, exec->counts,
3164			   exec->comp->nbCounters * sizeof(int));
3165		}
3166		exec->state = exec->comp->states[trans->to];
3167		exec->transno = 0;
3168		if (trans->atom != NULL) {
3169		    if (exec->inputStack != NULL) {
3170			exec->index++;
3171			if (exec->index < exec->inputStackNr) {
3172			    value = exec->inputStack[exec->index].value;
3173			    data = exec->inputStack[exec->index].data;
3174#ifdef DEBUG_PUSH
3175			    printf("value loaded: %s\n", value);
3176#endif
3177			} else {
3178			    value = NULL;
3179			    data = NULL;
3180#ifdef DEBUG_PUSH
3181			    printf("end of input\n");
3182#endif
3183			}
3184		    } else {
3185			value = NULL;
3186			data = NULL;
3187#ifdef DEBUG_PUSH
3188			printf("end of input\n");
3189#endif
3190		    }
3191		}
3192		goto progress;
3193	    } else if (ret < 0) {
3194		exec->status = -4;
3195		break;
3196	    }
3197	}
3198	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3199rollback:
3200            /*
3201	     * if we didn't yet rollback on the current input
3202	     * store the current state as the error state.
3203	     */
3204	    if ((progress) && (exec->state != NULL) &&
3205	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
3206	        progress = 0;
3207		if (exec->errString != NULL)
3208		    xmlFree(exec->errString);
3209		exec->errString = xmlStrdup(value);
3210		exec->errState = exec->state;
3211		memcpy(exec->errCounts, exec->counts,
3212		       exec->comp->nbCounters * sizeof(int));
3213	    }
3214
3215	    /*
3216	     * Failed to find a way out
3217	     */
3218	    exec->determinist = 0;
3219	    xmlFARegExecRollBack(exec);
3220	    if (exec->status == 0) {
3221		value = exec->inputStack[exec->index].value;
3222		data = exec->inputStack[exec->index].data;
3223#ifdef DEBUG_PUSH
3224		printf("value loaded: %s\n", value);
3225#endif
3226	    }
3227	}
3228	continue;
3229progress:
3230        progress = 1;
3231	continue;
3232    }
3233    if (exec->status == 0) {
3234        return(exec->state->type == XML_REGEXP_FINAL_STATE);
3235    }
3236#ifdef DEBUG_ERR
3237    if (exec->status < 0) {
3238	testerr(exec);
3239    }
3240#endif
3241    return(exec->status);
3242}
3243
3244/**
3245 * xmlRegExecPushString2:
3246 * @exec: a regexp execution context or NULL to indicate the end
3247 * @value: the first string token input
3248 * @value2: the second string token input
3249 * @data: data associated to the token to reuse in callbacks
3250 *
3251 * Push one input token in the execution context
3252 *
3253 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
3254 *     a negative value in case of error.
3255 */
3256int
3257xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
3258                      const xmlChar *value2, void *data) {
3259    xmlChar buf[150];
3260    int lenn, lenp, ret;
3261    xmlChar *str;
3262
3263    if (exec == NULL)
3264	return(-1);
3265    if (exec->comp == NULL)
3266	return(-1);
3267    if (exec->status != 0)
3268	return(exec->status);
3269
3270    if (value2 == NULL)
3271        return(xmlRegExecPushString(exec, value, data));
3272
3273    lenn = strlen((char *) value2);
3274    lenp = strlen((char *) value);
3275
3276    if (150 < lenn + lenp + 2) {
3277	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
3278	if (str == NULL) {
3279	    exec->status = -1;
3280	    return(-1);
3281	}
3282    } else {
3283	str = buf;
3284    }
3285    memcpy(&str[0], value, lenp);
3286    str[lenp] = XML_REG_STRING_SEPARATOR;
3287    memcpy(&str[lenp + 1], value2, lenn);
3288    str[lenn + lenp + 1] = 0;
3289
3290    if (exec->comp->compact != NULL)
3291	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
3292    else
3293        ret = xmlRegExecPushString(exec, str, data);
3294
3295    if (str != buf)
3296        xmlFree(buf);
3297    return(ret);
3298}
3299
3300/**
3301 * xmlRegExecGetValues:
3302 * @exec: a regexp execution context
3303 * @err: error extraction or normal one
3304 * @nbval: pointer to the number of accepted values IN/OUT
3305 * @nbneg: return number of negative transitions
3306 * @values: pointer to the array of acceptable values
3307 * @terminal: return value if this was a terminal state
3308 *
3309 * Extract informations from the regexp execution, internal routine to
3310 * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
3311 *
3312 * Returns: 0 in case of success or -1 in case of error.
3313 */
3314static int
3315xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
3316                    int *nbval, int *nbneg,
3317		    xmlChar **values, int *terminal) {
3318    int maxval;
3319    int nb = 0;
3320
3321    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
3322        (values == NULL) || (*nbval <= 0))
3323        return(-1);
3324
3325    maxval = *nbval;
3326    *nbval = 0;
3327    *nbneg = 0;
3328    if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
3329        xmlRegexpPtr comp;
3330	int target, i, state;
3331
3332        comp = exec->comp;
3333
3334	if (err) {
3335	    if (exec->errStateNo == -1) return(-1);
3336	    state = exec->errStateNo;
3337	} else {
3338	    state = exec->index;
3339	}
3340	if (terminal != NULL) {
3341	    if (comp->compact[state * (comp->nbstrings + 1)] ==
3342	        XML_REGEXP_FINAL_STATE)
3343		*terminal = 1;
3344	    else
3345		*terminal = 0;
3346	}
3347	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3348	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3349	    if ((target > 0) && (target <= comp->nbstates) &&
3350	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
3351		 XML_REGEXP_SINK_STATE)) {
3352	        values[nb++] = comp->stringMap[i];
3353		(*nbval)++;
3354	    }
3355	}
3356	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
3357	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
3358	    if ((target > 0) && (target <= comp->nbstates) &&
3359	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
3360		 XML_REGEXP_SINK_STATE)) {
3361	        values[nb++] = comp->stringMap[i];
3362		(*nbneg)++;
3363	    }
3364	}
3365    } else {
3366        int transno;
3367	xmlRegTransPtr trans;
3368	xmlRegAtomPtr atom;
3369	xmlRegStatePtr state;
3370
3371	if (terminal != NULL) {
3372	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
3373		*terminal = 1;
3374	    else
3375		*terminal = 0;
3376	}
3377
3378	if (err) {
3379	    if (exec->errState == NULL) return(-1);
3380	    state = exec->errState;
3381	} else {
3382	    if (exec->state == NULL) return(-1);
3383	    state = exec->state;
3384	}
3385	for (transno = 0;
3386	     (transno < state->nbTrans) && (nb < maxval);
3387	     transno++) {
3388	    trans = &state->trans[transno];
3389	    if (trans->to < 0)
3390		continue;
3391	    atom = trans->atom;
3392	    if ((atom == NULL) || (atom->valuep == NULL))
3393		continue;
3394	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3395	        /* this should not be reached but ... */
3396	        TODO;
3397	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3398	        /* this should not be reached but ... */
3399	        TODO;
3400	    } else if (trans->counter >= 0) {
3401		xmlRegCounterPtr counter;
3402		int count;
3403
3404		if (err)
3405		    count = exec->errCounts[trans->counter];
3406		else
3407		    count = exec->counts[trans->counter];
3408		counter = &exec->comp->counters[trans->counter];
3409		if (count < counter->max) {
3410		    if (atom->neg)
3411			values[nb++] = (xmlChar *) atom->valuep2;
3412		    else
3413			values[nb++] = (xmlChar *) atom->valuep;
3414		    (*nbval)++;
3415		}
3416	    } else {
3417                if ((exec->comp->states[trans->to] != NULL) &&
3418		    (exec->comp->states[trans->to]->type !=
3419		     XML_REGEXP_SINK_STATE)) {
3420		    if (atom->neg)
3421			values[nb++] = (xmlChar *) atom->valuep2;
3422		    else
3423			values[nb++] = (xmlChar *) atom->valuep;
3424		    (*nbval)++;
3425		}
3426	    }
3427	}
3428	for (transno = 0;
3429	     (transno < state->nbTrans) && (nb < maxval);
3430	     transno++) {
3431	    trans = &state->trans[transno];
3432	    if (trans->to < 0)
3433		continue;
3434	    atom = trans->atom;
3435	    if ((atom == NULL) || (atom->valuep == NULL))
3436		continue;
3437	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
3438	        continue;
3439	    } else if (trans->count == REGEXP_ALL_COUNTER) {
3440	        continue;
3441	    } else if (trans->counter >= 0) {
3442	        continue;
3443	    } else {
3444                if ((exec->comp->states[trans->to] != NULL) &&
3445		    (exec->comp->states[trans->to]->type ==
3446		     XML_REGEXP_SINK_STATE)) {
3447		    if (atom->neg)
3448			values[nb++] = (xmlChar *) atom->valuep2;
3449		    else
3450			values[nb++] = (xmlChar *) atom->valuep;
3451		    (*nbneg)++;
3452		}
3453	    }
3454	}
3455    }
3456    return(0);
3457}
3458
3459/**
3460 * xmlRegExecNextValues:
3461 * @exec: a regexp execution context
3462 * @nbval: pointer to the number of accepted values IN/OUT
3463 * @nbneg: return number of negative transitions
3464 * @values: pointer to the array of acceptable values
3465 * @terminal: return value if this was a terminal state
3466 *
3467 * Extract informations from the regexp execution,
3468 * the parameter @values must point to an array of @nbval string pointers
3469 * on return nbval will contain the number of possible strings in that
3470 * state and the @values array will be updated with them. The string values
3471 * returned will be freed with the @exec context and don't need to be
3472 * deallocated.
3473 *
3474 * Returns: 0 in case of success or -1 in case of error.
3475 */
3476int
3477xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
3478                     xmlChar **values, int *terminal) {
3479    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
3480}
3481
3482/**
3483 * xmlRegExecErrInfo:
3484 * @exec: a regexp execution context generating an error
3485 * @string: return value for the error string
3486 * @nbval: pointer to the number of accepted values IN/OUT
3487 * @nbneg: return number of negative transitions
3488 * @values: pointer to the array of acceptable values
3489 * @terminal: return value if this was a terminal state
3490 *
3491 * Extract error informations from the regexp execution, the parameter
3492 * @string will be updated with the value pushed and not accepted,
3493 * the parameter @values must point to an array of @nbval string pointers
3494 * on return nbval will contain the number of possible strings in that
3495 * state and the @values array will be updated with them. The string values
3496 * returned will be freed with the @exec context and don't need to be
3497 * deallocated.
3498 *
3499 * Returns: 0 in case of success or -1 in case of error.
3500 */
3501int
3502xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
3503                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
3504    if (exec == NULL)
3505        return(-1);
3506    if (string != NULL) {
3507        if (exec->status != 0)
3508	    *string = exec->errString;
3509	else
3510	    *string = NULL;
3511    }
3512    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
3513}
3514
3515#ifdef DEBUG_ERR
3516static void testerr(xmlRegExecCtxtPtr exec) {
3517    const xmlChar *string;
3518    xmlChar *values[5];
3519    int nb = 5;
3520    int nbneg;
3521    int terminal;
3522    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
3523}
3524#endif
3525
3526#if 0
3527static int
3528xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
3529    xmlRegTransPtr trans;
3530    xmlRegAtomPtr atom;
3531    int ret;
3532    int codepoint, len;
3533
3534    if (exec == NULL)
3535	return(-1);
3536    if (exec->status != 0)
3537	return(exec->status);
3538
3539    while ((exec->status == 0) &&
3540	   ((exec->inputString[exec->index] != 0) ||
3541	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
3542
3543	/*
3544	 * End of input on non-terminal state, rollback, however we may
3545	 * still have epsilon like transition for counted transitions
3546	 * on counters, in that case don't break too early.
3547	 */
3548	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
3549	    goto rollback;
3550
3551	exec->transcount = 0;
3552	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
3553	    trans = &exec->state->trans[exec->transno];
3554	    if (trans->to < 0)
3555		continue;
3556	    atom = trans->atom;
3557	    ret = 0;
3558	    if (trans->count >= 0) {
3559		int count;
3560		xmlRegCounterPtr counter;
3561
3562		/*
3563		 * A counted transition.
3564		 */
3565
3566		count = exec->counts[trans->count];
3567		counter = &exec->comp->counters[trans->count];
3568#ifdef DEBUG_REGEXP_EXEC
3569		printf("testing count %d: val %d, min %d, max %d\n",
3570		       trans->count, count, counter->min,  counter->max);
3571#endif
3572		ret = ((count >= counter->min) && (count <= counter->max));
3573	    } else if (atom == NULL) {
3574		fprintf(stderr, "epsilon transition left at runtime\n");
3575		exec->status = -2;
3576		break;
3577	    } else if (exec->inputString[exec->index] != 0) {
3578                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
3579		ret = xmlRegCheckCharacter(atom, codepoint);
3580		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
3581		    xmlRegStatePtr to = exec->comp->states[trans->to];
3582
3583		    /*
3584		     * this is a multiple input sequence
3585		     */
3586		    if (exec->state->nbTrans > exec->transno + 1) {
3587			xmlFARegExecSave(exec);
3588		    }
3589		    exec->transcount = 1;
3590		    do {
3591			/*
3592			 * Try to progress as much as possible on the input
3593			 */
3594			if (exec->transcount == atom->max) {
3595			    break;
3596			}
3597			exec->index += len;
3598			/*
3599			 * End of input: stop here
3600			 */
3601			if (exec->inputString[exec->index] == 0) {
3602			    exec->index -= len;
3603			    break;
3604			}
3605			if (exec->transcount >= atom->min) {
3606			    int transno = exec->transno;
3607			    xmlRegStatePtr state = exec->state;
3608
3609			    /*
3610			     * The transition is acceptable save it
3611			     */
3612			    exec->transno = -1; /* trick */
3613			    exec->state = to;
3614			    xmlFARegExecSave(exec);
3615			    exec->transno = transno;
3616			    exec->state = state;
3617			}
3618			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
3619				              len);
3620			ret = xmlRegCheckCharacter(atom, codepoint);
3621			exec->transcount++;
3622		    } while (ret == 1);
3623		    if (exec->transcount < atom->min)
3624			ret = 0;
3625
3626		    /*
3627		     * If the last check failed but one transition was found
3628		     * possible, rollback
3629		     */
3630		    if (ret < 0)
3631			ret = 0;
3632		    if (ret == 0) {
3633			goto rollback;
3634		    }
3635		}
3636	    }
3637	    if (ret == 1) {
3638		if (exec->state->nbTrans > exec->transno + 1) {
3639		    xmlFARegExecSave(exec);
3640		}
3641		if (trans->counter >= 0) {
3642#ifdef DEBUG_REGEXP_EXEC
3643		    printf("Increasing count %d\n", trans->counter);
3644#endif
3645		    exec->counts[trans->counter]++;
3646		}
3647#ifdef DEBUG_REGEXP_EXEC
3648		printf("entering state %d\n", trans->to);
3649#endif
3650		exec->state = exec->comp->states[trans->to];
3651		exec->transno = 0;
3652		if (trans->atom != NULL) {
3653		    exec->index += len;
3654		}
3655		goto progress;
3656	    } else if (ret < 0) {
3657		exec->status = -4;
3658		break;
3659	    }
3660	}
3661	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
3662rollback:
3663	    /*
3664	     * Failed to find a way out
3665	     */
3666	    exec->determinist = 0;
3667	    xmlFARegExecRollBack(exec);
3668	}
3669progress:
3670	continue;
3671    }
3672}
3673#endif
3674/************************************************************************
3675 * 									*
3676 *	Parser for the Schemas Datatype Regular Expressions		*
3677 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
3678 * 									*
3679 ************************************************************************/
3680
3681/**
3682 * xmlFAIsChar:
3683 * @ctxt:  a regexp parser context
3684 *
3685 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
3686 */
3687static int
3688xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
3689    int cur;
3690    int len;
3691
3692    cur = CUR_SCHAR(ctxt->cur, len);
3693    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
3694	(cur == '*') || (cur == '+') || (cur == '(') ||
3695	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
3696	(cur == 0x5D) || (cur == 0))
3697	return(-1);
3698    return(cur);
3699}
3700
3701/**
3702 * xmlFAParseCharProp:
3703 * @ctxt:  a regexp parser context
3704 *
3705 * [27]   charProp   ::=   IsCategory | IsBlock
3706 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
3707 *                       Separators | Symbols | Others
3708 * [29]   Letters   ::=   'L' [ultmo]?
3709 * [30]   Marks   ::=   'M' [nce]?
3710 * [31]   Numbers   ::=   'N' [dlo]?
3711 * [32]   Punctuation   ::=   'P' [cdseifo]?
3712 * [33]   Separators   ::=   'Z' [slp]?
3713 * [34]   Symbols   ::=   'S' [mcko]?
3714 * [35]   Others   ::=   'C' [cfon]?
3715 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
3716 */
3717static void
3718xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
3719    int cur;
3720    xmlRegAtomType type = (xmlRegAtomType) 0;
3721    xmlChar *blockName = NULL;
3722
3723    cur = CUR;
3724    if (cur == 'L') {
3725	NEXT;
3726	cur = CUR;
3727	if (cur == 'u') {
3728	    NEXT;
3729	    type = XML_REGEXP_LETTER_UPPERCASE;
3730	} else if (cur == 'l') {
3731	    NEXT;
3732	    type = XML_REGEXP_LETTER_LOWERCASE;
3733	} else if (cur == 't') {
3734	    NEXT;
3735	    type = XML_REGEXP_LETTER_TITLECASE;
3736	} else if (cur == 'm') {
3737	    NEXT;
3738	    type = XML_REGEXP_LETTER_MODIFIER;
3739	} else if (cur == 'o') {
3740	    NEXT;
3741	    type = XML_REGEXP_LETTER_OTHERS;
3742	} else {
3743	    type = XML_REGEXP_LETTER;
3744	}
3745    } else if (cur == 'M') {
3746	NEXT;
3747	cur = CUR;
3748	if (cur == 'n') {
3749	    NEXT;
3750	    /* nonspacing */
3751	    type = XML_REGEXP_MARK_NONSPACING;
3752	} else if (cur == 'c') {
3753	    NEXT;
3754	    /* spacing combining */
3755	    type = XML_REGEXP_MARK_SPACECOMBINING;
3756	} else if (cur == 'e') {
3757	    NEXT;
3758	    /* enclosing */
3759	    type = XML_REGEXP_MARK_ENCLOSING;
3760	} else {
3761	    /* all marks */
3762	    type = XML_REGEXP_MARK;
3763	}
3764    } else if (cur == 'N') {
3765	NEXT;
3766	cur = CUR;
3767	if (cur == 'd') {
3768	    NEXT;
3769	    /* digital */
3770	    type = XML_REGEXP_NUMBER_DECIMAL;
3771	} else if (cur == 'l') {
3772	    NEXT;
3773	    /* letter */
3774	    type = XML_REGEXP_NUMBER_LETTER;
3775	} else if (cur == 'o') {
3776	    NEXT;
3777	    /* other */
3778	    type = XML_REGEXP_NUMBER_OTHERS;
3779	} else {
3780	    /* all numbers */
3781	    type = XML_REGEXP_NUMBER;
3782	}
3783    } else if (cur == 'P') {
3784	NEXT;
3785	cur = CUR;
3786	if (cur == 'c') {
3787	    NEXT;
3788	    /* connector */
3789	    type = XML_REGEXP_PUNCT_CONNECTOR;
3790	} else if (cur == 'd') {
3791	    NEXT;
3792	    /* dash */
3793	    type = XML_REGEXP_PUNCT_DASH;
3794	} else if (cur == 's') {
3795	    NEXT;
3796	    /* open */
3797	    type = XML_REGEXP_PUNCT_OPEN;
3798	} else if (cur == 'e') {
3799	    NEXT;
3800	    /* close */
3801	    type = XML_REGEXP_PUNCT_CLOSE;
3802	} else if (cur == 'i') {
3803	    NEXT;
3804	    /* initial quote */
3805	    type = XML_REGEXP_PUNCT_INITQUOTE;
3806	} else if (cur == 'f') {
3807	    NEXT;
3808	    /* final quote */
3809	    type = XML_REGEXP_PUNCT_FINQUOTE;
3810	} else if (cur == 'o') {
3811	    NEXT;
3812	    /* other */
3813	    type = XML_REGEXP_PUNCT_OTHERS;
3814	} else {
3815	    /* all punctuation */
3816	    type = XML_REGEXP_PUNCT;
3817	}
3818    } else if (cur == 'Z') {
3819	NEXT;
3820	cur = CUR;
3821	if (cur == 's') {
3822	    NEXT;
3823	    /* space */
3824	    type = XML_REGEXP_SEPAR_SPACE;
3825	} else if (cur == 'l') {
3826	    NEXT;
3827	    /* line */
3828	    type = XML_REGEXP_SEPAR_LINE;
3829	} else if (cur == 'p') {
3830	    NEXT;
3831	    /* paragraph */
3832	    type = XML_REGEXP_SEPAR_PARA;
3833	} else {
3834	    /* all separators */
3835	    type = XML_REGEXP_SEPAR;
3836	}
3837    } else if (cur == 'S') {
3838	NEXT;
3839	cur = CUR;
3840	if (cur == 'm') {
3841	    NEXT;
3842	    type = XML_REGEXP_SYMBOL_MATH;
3843	    /* math */
3844	} else if (cur == 'c') {
3845	    NEXT;
3846	    type = XML_REGEXP_SYMBOL_CURRENCY;
3847	    /* currency */
3848	} else if (cur == 'k') {
3849	    NEXT;
3850	    type = XML_REGEXP_SYMBOL_MODIFIER;
3851	    /* modifiers */
3852	} else if (cur == 'o') {
3853	    NEXT;
3854	    type = XML_REGEXP_SYMBOL_OTHERS;
3855	    /* other */
3856	} else {
3857	    /* all symbols */
3858	    type = XML_REGEXP_SYMBOL;
3859	}
3860    } else if (cur == 'C') {
3861	NEXT;
3862	cur = CUR;
3863	if (cur == 'c') {
3864	    NEXT;
3865	    /* control */
3866	    type = XML_REGEXP_OTHER_CONTROL;
3867	} else if (cur == 'f') {
3868	    NEXT;
3869	    /* format */
3870	    type = XML_REGEXP_OTHER_FORMAT;
3871	} else if (cur == 'o') {
3872	    NEXT;
3873	    /* private use */
3874	    type = XML_REGEXP_OTHER_PRIVATE;
3875	} else if (cur == 'n') {
3876	    NEXT;
3877	    /* not assigned */
3878	    type = XML_REGEXP_OTHER_NA;
3879	} else {
3880	    /* all others */
3881	    type = XML_REGEXP_OTHER;
3882	}
3883    } else if (cur == 'I') {
3884	const xmlChar *start;
3885	NEXT;
3886	cur = CUR;
3887	if (cur != 's') {
3888	    ERROR("IsXXXX expected");
3889	    return;
3890	}
3891	NEXT;
3892	start = ctxt->cur;
3893	cur = CUR;
3894	if (((cur >= 'a') && (cur <= 'z')) ||
3895	    ((cur >= 'A') && (cur <= 'Z')) ||
3896	    ((cur >= '0') && (cur <= '9')) ||
3897	    (cur == 0x2D)) {
3898	    NEXT;
3899	    cur = CUR;
3900	    while (((cur >= 'a') && (cur <= 'z')) ||
3901		((cur >= 'A') && (cur <= 'Z')) ||
3902		((cur >= '0') && (cur <= '9')) ||
3903		(cur == 0x2D)) {
3904		NEXT;
3905		cur = CUR;
3906	    }
3907	}
3908	type = XML_REGEXP_BLOCK_NAME;
3909	blockName = xmlStrndup(start, ctxt->cur - start);
3910    } else {
3911	ERROR("Unknown char property");
3912	return;
3913    }
3914    if (ctxt->atom == NULL) {
3915	ctxt->atom = xmlRegNewAtom(ctxt, type);
3916	if (ctxt->atom != NULL)
3917	    ctxt->atom->valuep = blockName;
3918    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3919        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3920		           type, 0, 0, blockName);
3921    }
3922}
3923
3924/**
3925 * xmlFAParseCharClassEsc:
3926 * @ctxt:  a regexp parser context
3927 *
3928 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
3929 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
3930 * [25] catEsc   ::=   '\p{' charProp '}'
3931 * [26] complEsc ::=   '\P{' charProp '}'
3932 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
3933 */
3934static void
3935xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
3936    int cur;
3937
3938    if (CUR == '.') {
3939	if (ctxt->atom == NULL) {
3940	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
3941	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3942	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3943			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
3944	}
3945	NEXT;
3946	return;
3947    }
3948    if (CUR != '\\') {
3949	ERROR("Escaped sequence: expecting \\");
3950	return;
3951    }
3952    NEXT;
3953    cur = CUR;
3954    if (cur == 'p') {
3955	NEXT;
3956	if (CUR != '{') {
3957	    ERROR("Expecting '{'");
3958	    return;
3959	}
3960	NEXT;
3961	xmlFAParseCharProp(ctxt);
3962	if (CUR != '}') {
3963	    ERROR("Expecting '}'");
3964	    return;
3965	}
3966	NEXT;
3967    } else if (cur == 'P') {
3968	NEXT;
3969	if (CUR != '{') {
3970	    ERROR("Expecting '{'");
3971	    return;
3972	}
3973	NEXT;
3974	xmlFAParseCharProp(ctxt);
3975	ctxt->atom->neg = 1;
3976	if (CUR != '}') {
3977	    ERROR("Expecting '}'");
3978	    return;
3979	}
3980	NEXT;
3981    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
3982	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
3983	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
3984	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
3985	(cur == 0x5E)) {
3986	if (ctxt->atom == NULL) {
3987	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3988	    if (ctxt->atom != NULL) {
3989	        switch (cur) {
3990		    case 'n':
3991		        ctxt->atom->codepoint = '\n';
3992			break;
3993		    case 'r':
3994		        ctxt->atom->codepoint = '\r';
3995			break;
3996		    case 't':
3997		        ctxt->atom->codepoint = '\t';
3998			break;
3999		    default:
4000			ctxt->atom->codepoint = cur;
4001		}
4002	    }
4003	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4004	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4005			       XML_REGEXP_CHARVAL, cur, cur, NULL);
4006	}
4007	NEXT;
4008    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
4009	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
4010	(cur == 'w') || (cur == 'W')) {
4011	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
4012
4013	switch (cur) {
4014	    case 's':
4015		type = XML_REGEXP_ANYSPACE;
4016		break;
4017	    case 'S':
4018		type = XML_REGEXP_NOTSPACE;
4019		break;
4020	    case 'i':
4021		type = XML_REGEXP_INITNAME;
4022		break;
4023	    case 'I':
4024		type = XML_REGEXP_NOTINITNAME;
4025		break;
4026	    case 'c':
4027		type = XML_REGEXP_NAMECHAR;
4028		break;
4029	    case 'C':
4030		type = XML_REGEXP_NOTNAMECHAR;
4031		break;
4032	    case 'd':
4033		type = XML_REGEXP_DECIMAL;
4034		break;
4035	    case 'D':
4036		type = XML_REGEXP_NOTDECIMAL;
4037		break;
4038	    case 'w':
4039		type = XML_REGEXP_REALCHAR;
4040		break;
4041	    case 'W':
4042		type = XML_REGEXP_NOTREALCHAR;
4043		break;
4044	}
4045	NEXT;
4046	if (ctxt->atom == NULL) {
4047	    ctxt->atom = xmlRegNewAtom(ctxt, type);
4048	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
4049	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4050			       type, 0, 0, NULL);
4051	}
4052    }
4053}
4054
4055/**
4056 * xmlFAParseCharRef:
4057 * @ctxt:  a regexp parser context
4058 *
4059 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
4060 */
4061static int
4062xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
4063    int ret = 0, cur;
4064
4065    if ((CUR != '&') || (NXT(1) != '#'))
4066	return(-1);
4067    NEXT;
4068    NEXT;
4069    cur = CUR;
4070    if (cur == 'x') {
4071	NEXT;
4072	cur = CUR;
4073	if (((cur >= '0') && (cur <= '9')) ||
4074	    ((cur >= 'a') && (cur <= 'f')) ||
4075	    ((cur >= 'A') && (cur <= 'F'))) {
4076	    while (((cur >= '0') && (cur <= '9')) ||
4077		   ((cur >= 'A') && (cur <= 'F'))) {
4078		if ((cur >= '0') && (cur <= '9'))
4079		    ret = ret * 16 + cur - '0';
4080		else if ((cur >= 'a') && (cur <= 'f'))
4081		    ret = ret * 16 + 10 + (cur - 'a');
4082		else
4083		    ret = ret * 16 + 10 + (cur - 'A');
4084		NEXT;
4085		cur = CUR;
4086	    }
4087	} else {
4088	    ERROR("Char ref: expecting [0-9A-F]");
4089	    return(-1);
4090	}
4091    } else {
4092	if ((cur >= '0') && (cur <= '9')) {
4093	    while ((cur >= '0') && (cur <= '9')) {
4094		ret = ret * 10 + cur - '0';
4095		NEXT;
4096		cur = CUR;
4097	    }
4098	} else {
4099	    ERROR("Char ref: expecting [0-9]");
4100	    return(-1);
4101	}
4102    }
4103    if (cur != ';') {
4104	ERROR("Char ref: expecting ';'");
4105	return(-1);
4106    } else {
4107	NEXT;
4108    }
4109    return(ret);
4110}
4111
4112/**
4113 * xmlFAParseCharRange:
4114 * @ctxt:  a regexp parser context
4115 *
4116 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
4117 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
4118 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
4119 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
4120 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
4121 */
4122static void
4123xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
4124    int cur, len;
4125    int start = -1;
4126    int end = -1;
4127
4128    if ((CUR == '&') && (NXT(1) == '#')) {
4129	end = start = xmlFAParseCharRef(ctxt);
4130        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4131	                   XML_REGEXP_CHARVAL, start, end, NULL);
4132	return;
4133    }
4134    cur = CUR;
4135    if (cur == '\\') {
4136	NEXT;
4137	cur = CUR;
4138	switch (cur) {
4139	    case 'n': start = 0xA; break;
4140	    case 'r': start = 0xD; break;
4141	    case 't': start = 0x9; break;
4142	    case '\\': case '|': case '.': case '-': case '^': case '?':
4143	    case '*': case '+': case '{': case '}': case '(': case ')':
4144	    case '[': case ']':
4145		start = cur; break;
4146	    default:
4147		ERROR("Invalid escape value");
4148		return;
4149	}
4150	end = start;
4151        len = 1;
4152    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4153        end = start = CUR_SCHAR(ctxt->cur, len);
4154    } else {
4155	ERROR("Expecting a char range");
4156	return;
4157    }
4158    NEXTL(len);
4159    if (start == '-') {
4160	return;
4161    }
4162    cur = CUR;
4163    if ((cur != '-') || (NXT(1) == ']')) {
4164        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4165		              XML_REGEXP_CHARVAL, start, end, NULL);
4166	return;
4167    }
4168    NEXT;
4169    cur = CUR;
4170    if (cur == '\\') {
4171	NEXT;
4172	cur = CUR;
4173	switch (cur) {
4174	    case 'n': end = 0xA; break;
4175	    case 'r': end = 0xD; break;
4176	    case 't': end = 0x9; break;
4177	    case '\\': case '|': case '.': case '-': case '^': case '?':
4178	    case '*': case '+': case '{': case '}': case '(': case ')':
4179	    case '[': case ']':
4180		end = cur; break;
4181	    default:
4182		ERROR("Invalid escape value");
4183		return;
4184	}
4185        len = 1;
4186    } else if ((cur != 0x5B) && (cur != 0x5D)) {
4187        end = CUR_SCHAR(ctxt->cur, len);
4188    } else {
4189	ERROR("Expecting the end of a char range");
4190	return;
4191    }
4192    NEXTL(len);
4193    /* TODO check that the values are acceptable character ranges for XML */
4194    if (end < start) {
4195	ERROR("End of range is before start of range");
4196    } else {
4197        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
4198		           XML_REGEXP_CHARVAL, start, end, NULL);
4199    }
4200    return;
4201}
4202
4203/**
4204 * xmlFAParsePosCharGroup:
4205 * @ctxt:  a regexp parser context
4206 *
4207 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
4208 */
4209static void
4210xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
4211    do {
4212	if ((CUR == '\\') || (CUR == '.')) {
4213	    xmlFAParseCharClassEsc(ctxt);
4214	} else {
4215	    xmlFAParseCharRange(ctxt);
4216	}
4217    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
4218	     (ctxt->error == 0));
4219}
4220
4221/**
4222 * xmlFAParseCharGroup:
4223 * @ctxt:  a regexp parser context
4224 *
4225 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
4226 * [15]   negCharGroup ::= '^' posCharGroup
4227 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
4228 * [12]   charClassExpr ::= '[' charGroup ']'
4229 */
4230static void
4231xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
4232    int n = ctxt->neg;
4233    while ((CUR != ']') && (ctxt->error == 0)) {
4234	if (CUR == '^') {
4235	    int neg = ctxt->neg;
4236
4237	    NEXT;
4238	    ctxt->neg = !ctxt->neg;
4239	    xmlFAParsePosCharGroup(ctxt);
4240	    ctxt->neg = neg;
4241	} else if ((CUR == '-') && (NXT(1) == '[')) {
4242	    int neg = ctxt->neg;
4243	    ctxt->neg = 2;
4244	    NEXT;	/* eat the '-' */
4245	    NEXT;	/* eat the '[' */
4246	    xmlFAParseCharGroup(ctxt);
4247	    if (CUR == ']') {
4248		NEXT;
4249	    } else {
4250		ERROR("charClassExpr: ']' expected");
4251		break;
4252	    }
4253	    ctxt->neg = neg;
4254	    break;
4255	} else if (CUR != ']') {
4256	    xmlFAParsePosCharGroup(ctxt);
4257	}
4258    }
4259    ctxt->neg = n;
4260}
4261
4262/**
4263 * xmlFAParseCharClass:
4264 * @ctxt:  a regexp parser context
4265 *
4266 * [11]   charClass   ::=     charClassEsc | charClassExpr
4267 * [12]   charClassExpr   ::=   '[' charGroup ']'
4268 */
4269static void
4270xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
4271    if (CUR == '[') {
4272	NEXT;
4273	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
4274	if (ctxt->atom == NULL)
4275	    return;
4276	xmlFAParseCharGroup(ctxt);
4277	if (CUR == ']') {
4278	    NEXT;
4279	} else {
4280	    ERROR("xmlFAParseCharClass: ']' expected");
4281	}
4282    } else {
4283	xmlFAParseCharClassEsc(ctxt);
4284    }
4285}
4286
4287/**
4288 * xmlFAParseQuantExact:
4289 * @ctxt:  a regexp parser context
4290 *
4291 * [8]   QuantExact   ::=   [0-9]+
4292 *
4293 * Returns 0 if success or -1 in case of error
4294 */
4295static int
4296xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
4297    int ret = 0;
4298    int ok = 0;
4299
4300    while ((CUR >= '0') && (CUR <= '9')) {
4301	ret = ret * 10 + (CUR - '0');
4302	ok = 1;
4303	NEXT;
4304    }
4305    if (ok != 1) {
4306	return(-1);
4307    }
4308    return(ret);
4309}
4310
4311/**
4312 * xmlFAParseQuantifier:
4313 * @ctxt:  a regexp parser context
4314 *
4315 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
4316 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
4317 * [6]   quantRange   ::=   QuantExact ',' QuantExact
4318 * [7]   quantMin   ::=   QuantExact ','
4319 * [8]   QuantExact   ::=   [0-9]+
4320 */
4321static int
4322xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
4323    int cur;
4324
4325    cur = CUR;
4326    if ((cur == '?') || (cur == '*') || (cur == '+')) {
4327	if (ctxt->atom != NULL) {
4328	    if (cur == '?')
4329		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
4330	    else if (cur == '*')
4331		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
4332	    else if (cur == '+')
4333		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
4334	}
4335	NEXT;
4336	return(1);
4337    }
4338    if (cur == '{') {
4339	int min = 0, max = 0;
4340
4341	NEXT;
4342	cur = xmlFAParseQuantExact(ctxt);
4343	if (cur >= 0)
4344	    min = cur;
4345	if (CUR == ',') {
4346	    NEXT;
4347	    if (CUR == '}')
4348	        max = INT_MAX;
4349	    else {
4350	        cur = xmlFAParseQuantExact(ctxt);
4351	        if (cur >= 0)
4352		    max = cur;
4353		else {
4354		    ERROR("Improper quantifier");
4355		}
4356	    }
4357	}
4358	if (CUR == '}') {
4359	    NEXT;
4360	} else {
4361	    ERROR("Unterminated quantifier");
4362	}
4363	if (max == 0)
4364	    max = min;
4365	if (ctxt->atom != NULL) {
4366	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
4367	    ctxt->atom->min = min;
4368	    ctxt->atom->max = max;
4369	}
4370	return(1);
4371    }
4372    return(0);
4373}
4374
4375/**
4376 * xmlFAParseAtom:
4377 * @ctxt:  a regexp parser context
4378 *
4379 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
4380 */
4381static int
4382xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
4383    int codepoint, len;
4384
4385    codepoint = xmlFAIsChar(ctxt);
4386    if (codepoint > 0) {
4387	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
4388	if (ctxt->atom == NULL)
4389	    return(-1);
4390	codepoint = CUR_SCHAR(ctxt->cur, len);
4391	ctxt->atom->codepoint = codepoint;
4392	NEXTL(len);
4393	return(1);
4394    } else if (CUR == '|') {
4395	return(0);
4396    } else if (CUR == 0) {
4397	return(0);
4398    } else if (CUR == ')') {
4399	return(0);
4400    } else if (CUR == '(') {
4401	xmlRegStatePtr start, oldend;
4402
4403	NEXT;
4404	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
4405	start = ctxt->state;
4406	oldend = ctxt->end;
4407	ctxt->end = NULL;
4408	ctxt->atom = NULL;
4409	xmlFAParseRegExp(ctxt, 0);
4410	if (CUR == ')') {
4411	    NEXT;
4412	} else {
4413	    ERROR("xmlFAParseAtom: expecting ')'");
4414	}
4415	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
4416	if (ctxt->atom == NULL)
4417	    return(-1);
4418	ctxt->atom->start = start;
4419	ctxt->atom->stop = ctxt->state;
4420	ctxt->end = oldend;
4421	return(1);
4422    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
4423	xmlFAParseCharClass(ctxt);
4424	return(1);
4425    }
4426    return(0);
4427}
4428
4429/**
4430 * xmlFAParsePiece:
4431 * @ctxt:  a regexp parser context
4432 *
4433 * [3]   piece   ::=   atom quantifier?
4434 */
4435static int
4436xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
4437    int ret;
4438
4439    ctxt->atom = NULL;
4440    ret = xmlFAParseAtom(ctxt);
4441    if (ret == 0)
4442	return(0);
4443    if (ctxt->atom == NULL) {
4444	ERROR("internal: no atom generated");
4445    }
4446    xmlFAParseQuantifier(ctxt);
4447    return(1);
4448}
4449
4450/**
4451 * xmlFAParseBranch:
4452 * @ctxt:  a regexp parser context
4453 *
4454 * [2]   branch   ::=   piece*
4455 8
4456 */
4457static int
4458xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) {
4459    xmlRegStatePtr previous;
4460    int ret;
4461
4462    previous = ctxt->state;
4463    ret = xmlFAParsePiece(ctxt);
4464    if (ret != 0) {
4465	if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0)
4466	    return(-1);
4467	previous = ctxt->state;
4468	ctxt->atom = NULL;
4469    }
4470    while ((ret != 0) && (ctxt->error == 0)) {
4471	ret = xmlFAParsePiece(ctxt);
4472	if (ret != 0) {
4473	    if (xmlFAGenerateTransitions(ctxt, previous, NULL,
4474					 ctxt->atom) < 0)
4475		    return(-1);
4476	    previous = ctxt->state;
4477	    ctxt->atom = NULL;
4478	}
4479    }
4480    return(0);
4481}
4482
4483/**
4484 * xmlFAParseRegExp:
4485 * @ctxt:  a regexp parser context
4486 * @top:  is this the top-level expression ?
4487 *
4488 * [1]   regExp   ::=     branch  ( '|' branch )*
4489 */
4490static void
4491xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
4492    xmlRegStatePtr start, end;
4493
4494    /* if not top start should have been generated by an epsilon trans */
4495    start = ctxt->state;
4496    ctxt->end = NULL;
4497    xmlFAParseBranch(ctxt);
4498    if (top) {
4499#ifdef DEBUG_REGEXP_GRAPH
4500	printf("State %d is final\n", ctxt->state->no);
4501#endif
4502	ctxt->state->type = XML_REGEXP_FINAL_STATE;
4503    }
4504    if (CUR != '|') {
4505	ctxt->end = ctxt->state;
4506	return;
4507    }
4508    end = ctxt->state;
4509    while ((CUR == '|') && (ctxt->error == 0)) {
4510	NEXT;
4511	ctxt->state = start;
4512	ctxt->end = NULL;
4513	xmlFAParseBranch(ctxt);
4514	if (top) {
4515	    ctxt->state->type = XML_REGEXP_FINAL_STATE;
4516#ifdef DEBUG_REGEXP_GRAPH
4517	    printf("State %d is final\n", ctxt->state->no);
4518#endif
4519	} else {
4520	    xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end);
4521	}
4522    }
4523    if (!top) {
4524	ctxt->state = end;
4525	ctxt->end = end;
4526    }
4527}
4528
4529/************************************************************************
4530 * 									*
4531 * 			The basic API					*
4532 * 									*
4533 ************************************************************************/
4534
4535/**
4536 * xmlRegexpPrint:
4537 * @output: the file for the output debug
4538 * @regexp: the compiled regexp
4539 *
4540 * Print the content of the compiled regular expression
4541 */
4542void
4543xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
4544    int i;
4545
4546    if (output == NULL)
4547        return;
4548    fprintf(output, " regexp: ");
4549    if (regexp == NULL) {
4550	fprintf(output, "NULL\n");
4551	return;
4552    }
4553    fprintf(output, "'%s' ", regexp->string);
4554    fprintf(output, "\n");
4555    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
4556    for (i = 0;i < regexp->nbAtoms; i++) {
4557	fprintf(output, " %02d ", i);
4558	xmlRegPrintAtom(output, regexp->atoms[i]);
4559    }
4560    fprintf(output, "%d states:", regexp->nbStates);
4561    fprintf(output, "\n");
4562    for (i = 0;i < regexp->nbStates; i++) {
4563	xmlRegPrintState(output, regexp->states[i]);
4564    }
4565    fprintf(output, "%d counters:\n", regexp->nbCounters);
4566    for (i = 0;i < regexp->nbCounters; i++) {
4567	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
4568		                                regexp->counters[i].max);
4569    }
4570}
4571
4572/**
4573 * xmlRegexpCompile:
4574 * @regexp:  a regular expression string
4575 *
4576 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
4577 * Appendix F and builds an automata suitable for testing strings against
4578 * that regular expression
4579 *
4580 * Returns the compiled expression or NULL in case of error
4581 */
4582xmlRegexpPtr
4583xmlRegexpCompile(const xmlChar *regexp) {
4584    xmlRegexpPtr ret;
4585    xmlRegParserCtxtPtr ctxt;
4586
4587    ctxt = xmlRegNewParserCtxt(regexp);
4588    if (ctxt == NULL)
4589	return(NULL);
4590
4591    /* initialize the parser */
4592    ctxt->end = NULL;
4593    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
4594    xmlRegStatePush(ctxt, ctxt->start);
4595
4596    /* parse the expression building an automata */
4597    xmlFAParseRegExp(ctxt, 1);
4598    if (CUR != 0) {
4599	ERROR("xmlFAParseRegExp: extra characters");
4600    }
4601    ctxt->end = ctxt->state;
4602    ctxt->start->type = XML_REGEXP_START_STATE;
4603    ctxt->end->type = XML_REGEXP_FINAL_STATE;
4604
4605    /* remove the Epsilon except for counted transitions */
4606    xmlFAEliminateEpsilonTransitions(ctxt);
4607
4608
4609    if (ctxt->error != 0) {
4610	xmlRegFreeParserCtxt(ctxt);
4611	return(NULL);
4612    }
4613    ret = xmlRegEpxFromParse(ctxt);
4614    xmlRegFreeParserCtxt(ctxt);
4615    return(ret);
4616}
4617
4618/**
4619 * xmlRegexpExec:
4620 * @comp:  the compiled regular expression
4621 * @content:  the value to check against the regular expression
4622 *
4623 * Check if the regular expression generates the value
4624 *
4625 * Returns 1 if it matches, 0 if not and a negative value in case of error
4626 */
4627int
4628xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
4629    if ((comp == NULL) || (content == NULL))
4630	return(-1);
4631    return(xmlFARegExec(comp, content));
4632}
4633
4634/**
4635 * xmlRegexpIsDeterminist:
4636 * @comp:  the compiled regular expression
4637 *
4638 * Check if the regular expression is determinist
4639 *
4640 * Returns 1 if it yes, 0 if not and a negative value in case of error
4641 */
4642int
4643xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
4644    xmlAutomataPtr am;
4645    int ret;
4646
4647    if (comp == NULL)
4648	return(-1);
4649    if (comp->determinist != -1)
4650	return(comp->determinist);
4651
4652    am = xmlNewAutomata();
4653    if (am->states != NULL) {
4654	int i;
4655
4656	for (i = 0;i < am->nbStates;i++)
4657	    xmlRegFreeState(am->states[i]);
4658	xmlFree(am->states);
4659    }
4660    am->nbAtoms = comp->nbAtoms;
4661    am->atoms = comp->atoms;
4662    am->nbStates = comp->nbStates;
4663    am->states = comp->states;
4664    am->determinist = -1;
4665    ret = xmlFAComputesDeterminism(am);
4666    am->atoms = NULL;
4667    am->states = NULL;
4668    xmlFreeAutomata(am);
4669    return(ret);
4670}
4671
4672/**
4673 * xmlRegFreeRegexp:
4674 * @regexp:  the regexp
4675 *
4676 * Free a regexp
4677 */
4678void
4679xmlRegFreeRegexp(xmlRegexpPtr regexp) {
4680    int i;
4681    if (regexp == NULL)
4682	return;
4683
4684    if (regexp->string != NULL)
4685	xmlFree(regexp->string);
4686    if (regexp->states != NULL) {
4687	for (i = 0;i < regexp->nbStates;i++)
4688	    xmlRegFreeState(regexp->states[i]);
4689	xmlFree(regexp->states);
4690    }
4691    if (regexp->atoms != NULL) {
4692	for (i = 0;i < regexp->nbAtoms;i++)
4693	    xmlRegFreeAtom(regexp->atoms[i]);
4694	xmlFree(regexp->atoms);
4695    }
4696    if (regexp->counters != NULL)
4697	xmlFree(regexp->counters);
4698    if (regexp->compact != NULL)
4699	xmlFree(regexp->compact);
4700    if (regexp->transdata != NULL)
4701	xmlFree(regexp->transdata);
4702    if (regexp->stringMap != NULL) {
4703	for (i = 0; i < regexp->nbstrings;i++)
4704	    xmlFree(regexp->stringMap[i]);
4705	xmlFree(regexp->stringMap);
4706    }
4707
4708    xmlFree(regexp);
4709}
4710
4711#ifdef LIBXML_AUTOMATA_ENABLED
4712/************************************************************************
4713 * 									*
4714 * 			The Automata interface				*
4715 * 									*
4716 ************************************************************************/
4717
4718/**
4719 * xmlNewAutomata:
4720 *
4721 * Create a new automata
4722 *
4723 * Returns the new object or NULL in case of failure
4724 */
4725xmlAutomataPtr
4726xmlNewAutomata(void) {
4727    xmlAutomataPtr ctxt;
4728
4729    ctxt = xmlRegNewParserCtxt(NULL);
4730    if (ctxt == NULL)
4731	return(NULL);
4732
4733    /* initialize the parser */
4734    ctxt->end = NULL;
4735    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
4736    ctxt->start->type = XML_REGEXP_START_STATE;
4737    if (ctxt->start == NULL) {
4738	xmlFreeAutomata(ctxt);
4739	return(NULL);
4740    }
4741    if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
4742        xmlRegFreeState(ctxt->start);
4743	xmlFreeAutomata(ctxt);
4744	return(NULL);
4745    }
4746
4747    return(ctxt);
4748}
4749
4750/**
4751 * xmlFreeAutomata:
4752 * @am: an automata
4753 *
4754 * Free an automata
4755 */
4756void
4757xmlFreeAutomata(xmlAutomataPtr am) {
4758    if (am == NULL)
4759	return;
4760    xmlRegFreeParserCtxt(am);
4761}
4762
4763/**
4764 * xmlAutomataGetInitState:
4765 * @am: an automata
4766 *
4767 * Initial state lookup
4768 *
4769 * Returns the initial state of the automata
4770 */
4771xmlAutomataStatePtr
4772xmlAutomataGetInitState(xmlAutomataPtr am) {
4773    if (am == NULL)
4774	return(NULL);
4775    return(am->start);
4776}
4777
4778/**
4779 * xmlAutomataSetFinalState:
4780 * @am: an automata
4781 * @state: a state in this automata
4782 *
4783 * Makes that state a final state
4784 *
4785 * Returns 0 or -1 in case of error
4786 */
4787int
4788xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
4789    if ((am == NULL) || (state == NULL))
4790	return(-1);
4791    state->type = XML_REGEXP_FINAL_STATE;
4792    return(0);
4793}
4794
4795/**
4796 * xmlAutomataNewTransition:
4797 * @am: an automata
4798 * @from: the starting point of the transition
4799 * @to: the target point of the transition or NULL
4800 * @token: the input string associated to that transition
4801 * @data: data passed to the callback function if the transition is activated
4802 *
4803 * If @to is NULL, this creates first a new target state in the automata
4804 * and then adds a transition from the @from state to the target state
4805 * activated by the value of @token
4806 *
4807 * Returns the target state or NULL in case of error
4808 */
4809xmlAutomataStatePtr
4810xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
4811			 xmlAutomataStatePtr to, const xmlChar *token,
4812			 void *data) {
4813    xmlRegAtomPtr atom;
4814
4815    if ((am == NULL) || (from == NULL) || (token == NULL))
4816	return(NULL);
4817    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4818    if (atom == NULL)
4819        return(NULL);
4820    atom->data = data;
4821    if (atom == NULL)
4822	return(NULL);
4823    atom->valuep = xmlStrdup(token);
4824
4825    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
4826        xmlRegFreeAtom(atom);
4827	return(NULL);
4828    }
4829    if (to == NULL)
4830	return(am->state);
4831    return(to);
4832}
4833
4834/**
4835 * xmlAutomataNewTransition2:
4836 * @am: an automata
4837 * @from: the starting point of the transition
4838 * @to: the target point of the transition or NULL
4839 * @token: the first input string associated to that transition
4840 * @token2: the second input string associated to that transition
4841 * @data: data passed to the callback function if the transition is activated
4842 *
4843 * If @to is NULL, this creates first a new target state in the automata
4844 * and then adds a transition from the @from state to the target state
4845 * activated by the value of @token
4846 *
4847 * Returns the target state or NULL in case of error
4848 */
4849xmlAutomataStatePtr
4850xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
4851			  xmlAutomataStatePtr to, const xmlChar *token,
4852			  const xmlChar *token2, void *data) {
4853    xmlRegAtomPtr atom;
4854
4855    if ((am == NULL) || (from == NULL) || (token == NULL))
4856	return(NULL);
4857    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4858    atom->data = data;
4859    if (atom == NULL)
4860	return(NULL);
4861    if ((token2 == NULL) || (*token2 == 0)) {
4862	atom->valuep = xmlStrdup(token);
4863    } else {
4864	int lenn, lenp;
4865	xmlChar *str;
4866
4867	lenn = strlen((char *) token2);
4868	lenp = strlen((char *) token);
4869
4870	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4871	if (str == NULL) {
4872	    xmlRegFreeAtom(atom);
4873	    return(NULL);
4874	}
4875	memcpy(&str[0], token, lenp);
4876	str[lenp] = '|';
4877	memcpy(&str[lenp + 1], token2, lenn);
4878	str[lenn + lenp + 1] = 0;
4879
4880	atom->valuep = str;
4881    }
4882
4883    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
4884        xmlRegFreeAtom(atom);
4885	return(NULL);
4886    }
4887    if (to == NULL)
4888	return(am->state);
4889    return(to);
4890}
4891
4892/**
4893 * xmlAutomataNewNegTrans:
4894 * @am: an automata
4895 * @from: the starting point of the transition
4896 * @to: the target point of the transition or NULL
4897 * @token: the first input string associated to that transition
4898 * @token2: the second input string associated to that transition
4899 * @data: data passed to the callback function if the transition is activated
4900 *
4901 * If @to is NULL, this creates first a new target state in the automata
4902 * and then adds a transition from the @from state to the target state
4903 * activated by any value except (@token,@token2)
4904 *
4905 * Returns the target state or NULL in case of error
4906 */
4907xmlAutomataStatePtr
4908xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4909		       xmlAutomataStatePtr to, const xmlChar *token,
4910		       const xmlChar *token2, void *data) {
4911    xmlRegAtomPtr atom;
4912    xmlChar err_msg[200];
4913
4914    if ((am == NULL) || (from == NULL) || (token == NULL))
4915	return(NULL);
4916    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4917    if (atom == NULL)
4918	return(NULL);
4919    atom->data = data;
4920    atom->neg = 1;
4921    if ((token2 == NULL) || (*token2 == 0)) {
4922	atom->valuep = xmlStrdup(token);
4923    } else {
4924	int lenn, lenp;
4925	xmlChar *str;
4926
4927	lenn = strlen((char *) token2);
4928	lenp = strlen((char *) token);
4929
4930	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
4931	if (str == NULL) {
4932	    xmlRegFreeAtom(atom);
4933	    return(NULL);
4934	}
4935	memcpy(&str[0], token, lenp);
4936	str[lenp] = '|';
4937	memcpy(&str[lenp + 1], token2, lenn);
4938	str[lenn + lenp + 1] = 0;
4939
4940	atom->valuep = str;
4941    }
4942    snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
4943    err_msg[199] = 0;
4944    atom->valuep2 = xmlStrdup(err_msg);
4945
4946    if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
4947        xmlRegFreeAtom(atom);
4948	return(NULL);
4949    }
4950    if (to == NULL)
4951	return(am->state);
4952    return(to);
4953}
4954
4955/**
4956 * xmlAutomataNewCountTrans2:
4957 * @am: an automata
4958 * @from: the starting point of the transition
4959 * @to: the target point of the transition or NULL
4960 * @token: the input string associated to that transition
4961 * @token2: the second input string associated to that transition
4962 * @min:  the minimum successive occurences of token
4963 * @max:  the maximum successive occurences of token
4964 * @data:  data associated to the transition
4965 *
4966 * If @to is NULL, this creates first a new target state in the automata
4967 * and then adds a transition from the @from state to the target state
4968 * activated by a succession of input of value @token and @token2 and
4969 * whose number is between @min and @max
4970 *
4971 * Returns the target state or NULL in case of error
4972 */
4973xmlAutomataStatePtr
4974xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
4975			 xmlAutomataStatePtr to, const xmlChar *token,
4976			 const xmlChar *token2,
4977			 int min, int max, void *data) {
4978    xmlRegAtomPtr atom;
4979    int counter;
4980
4981    if ((am == NULL) || (from == NULL) || (token == NULL))
4982	return(NULL);
4983    if (min < 0)
4984	return(NULL);
4985    if ((max < min) || (max < 1))
4986	return(NULL);
4987    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4988    if (atom == NULL)
4989	return(NULL);
4990    if ((token2 == NULL) || (*token2 == 0)) {
4991	atom->valuep = xmlStrdup(token);
4992    } else {
4993	int lenn, lenp;
4994	xmlChar *str;
4995
4996	lenn = strlen((char *) token2);
4997	lenp = strlen((char *) token);
4998
4999	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5000	if (str == NULL) {
5001	    xmlRegFreeAtom(atom);
5002	    return(NULL);
5003	}
5004	memcpy(&str[0], token, lenp);
5005	str[lenp] = '|';
5006	memcpy(&str[lenp + 1], token2, lenn);
5007	str[lenn + lenp + 1] = 0;
5008
5009	atom->valuep = str;
5010    }
5011    atom->data = data;
5012    if (min == 0)
5013	atom->min = 1;
5014    else
5015	atom->min = min;
5016    atom->max = max;
5017
5018    /*
5019     * associate a counter to the transition.
5020     */
5021    counter = xmlRegGetCounter(am);
5022    am->counters[counter].min = min;
5023    am->counters[counter].max = max;
5024
5025    /* xmlFAGenerateTransitions(am, from, to, atom); */
5026    if (to == NULL) {
5027        to = xmlRegNewState(am);
5028	xmlRegStatePush(am, to);
5029    }
5030    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5031    xmlRegAtomPush(am, atom);
5032    am->state = to;
5033
5034    if (to == NULL)
5035	to = am->state;
5036    if (to == NULL)
5037	return(NULL);
5038    if (min == 0)
5039	xmlFAGenerateEpsilonTransition(am, from, to);
5040    return(to);
5041}
5042
5043/**
5044 * xmlAutomataNewCountTrans:
5045 * @am: an automata
5046 * @from: the starting point of the transition
5047 * @to: the target point of the transition or NULL
5048 * @token: the input string associated to that transition
5049 * @min:  the minimum successive occurences of token
5050 * @max:  the maximum successive occurences of token
5051 * @data:  data associated to the transition
5052 *
5053 * If @to is NULL, this creates first a new target state in the automata
5054 * and then adds a transition from the @from state to the target state
5055 * activated by a succession of input of value @token and whose number
5056 * is between @min and @max
5057 *
5058 * Returns the target state or NULL in case of error
5059 */
5060xmlAutomataStatePtr
5061xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5062			 xmlAutomataStatePtr to, const xmlChar *token,
5063			 int min, int max, void *data) {
5064    xmlRegAtomPtr atom;
5065    int counter;
5066
5067    if ((am == NULL) || (from == NULL) || (token == NULL))
5068	return(NULL);
5069    if (min < 0)
5070	return(NULL);
5071    if ((max < min) || (max < 1))
5072	return(NULL);
5073    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5074    if (atom == NULL)
5075	return(NULL);
5076    atom->valuep = xmlStrdup(token);
5077    atom->data = data;
5078    if (min == 0)
5079	atom->min = 1;
5080    else
5081	atom->min = min;
5082    atom->max = max;
5083
5084    /*
5085     * associate a counter to the transition.
5086     */
5087    counter = xmlRegGetCounter(am);
5088    am->counters[counter].min = min;
5089    am->counters[counter].max = max;
5090
5091    /* xmlFAGenerateTransitions(am, from, to, atom); */
5092    if (to == NULL) {
5093        to = xmlRegNewState(am);
5094	xmlRegStatePush(am, to);
5095    }
5096    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5097    xmlRegAtomPush(am, atom);
5098    am->state = to;
5099
5100    if (to == NULL)
5101	to = am->state;
5102    if (to == NULL)
5103	return(NULL);
5104    if (min == 0)
5105	xmlFAGenerateEpsilonTransition(am, from, to);
5106    return(to);
5107}
5108
5109/**
5110 * xmlAutomataNewOnceTrans2:
5111 * @am: an automata
5112 * @from: the starting point of the transition
5113 * @to: the target point of the transition or NULL
5114 * @token: the input string associated to that transition
5115 * @token2: the second input string associated to that transition
5116 * @min:  the minimum successive occurences of token
5117 * @max:  the maximum successive occurences of token
5118 * @data:  data associated to the transition
5119 *
5120 * If @to is NULL, this creates first a new target state in the automata
5121 * and then adds a transition from the @from state to the target state
5122 * activated by a succession of input of value @token and @token2 and whose
5123 * number is between @min and @max, moreover that transition can only be
5124 * crossed once.
5125 *
5126 * Returns the target state or NULL in case of error
5127 */
5128xmlAutomataStatePtr
5129xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
5130			 xmlAutomataStatePtr to, const xmlChar *token,
5131			 const xmlChar *token2,
5132			 int min, int max, void *data) {
5133    xmlRegAtomPtr atom;
5134    int counter;
5135
5136    if ((am == NULL) || (from == NULL) || (token == NULL))
5137	return(NULL);
5138    if (min < 1)
5139	return(NULL);
5140    if ((max < min) || (max < 1))
5141	return(NULL);
5142    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5143    if (atom == NULL)
5144	return(NULL);
5145    if ((token2 == NULL) || (*token2 == 0)) {
5146	atom->valuep = xmlStrdup(token);
5147    } else {
5148	int lenn, lenp;
5149	xmlChar *str;
5150
5151	lenn = strlen((char *) token2);
5152	lenp = strlen((char *) token);
5153
5154	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
5155	if (str == NULL) {
5156	    xmlRegFreeAtom(atom);
5157	    return(NULL);
5158	}
5159	memcpy(&str[0], token, lenp);
5160	str[lenp] = '|';
5161	memcpy(&str[lenp + 1], token2, lenn);
5162	str[lenn + lenp + 1] = 0;
5163
5164	atom->valuep = str;
5165    }
5166    atom->data = data;
5167    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5168    if (min == 0)
5169	atom->min = 1;
5170    else
5171	atom->min = min;
5172    atom->max = max;
5173    /*
5174     * associate a counter to the transition.
5175     */
5176    counter = xmlRegGetCounter(am);
5177    am->counters[counter].min = 1;
5178    am->counters[counter].max = 1;
5179
5180    /* xmlFAGenerateTransitions(am, from, to, atom); */
5181    if (to == NULL) {
5182	to = xmlRegNewState(am);
5183	xmlRegStatePush(am, to);
5184    }
5185    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5186    xmlRegAtomPush(am, atom);
5187    am->state = to;
5188    return(to);
5189}
5190
5191
5192
5193/**
5194 * xmlAutomataNewOnceTrans:
5195 * @am: an automata
5196 * @from: the starting point of the transition
5197 * @to: the target point of the transition or NULL
5198 * @token: the input string associated to that transition
5199 * @min:  the minimum successive occurences of token
5200 * @max:  the maximum successive occurences of token
5201 * @data:  data associated to the transition
5202 *
5203 * If @to is NULL, this creates first a new target state in the automata
5204 * and then adds a transition from the @from state to the target state
5205 * activated by a succession of input of value @token and whose number
5206 * is between @min and @max, moreover that transition can only be crossed
5207 * once.
5208 *
5209 * Returns the target state or NULL in case of error
5210 */
5211xmlAutomataStatePtr
5212xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5213			 xmlAutomataStatePtr to, const xmlChar *token,
5214			 int min, int max, void *data) {
5215    xmlRegAtomPtr atom;
5216    int counter;
5217
5218    if ((am == NULL) || (from == NULL) || (token == NULL))
5219	return(NULL);
5220    if (min < 1)
5221	return(NULL);
5222    if ((max < min) || (max < 1))
5223	return(NULL);
5224    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
5225    if (atom == NULL)
5226	return(NULL);
5227    atom->valuep = xmlStrdup(token);
5228    atom->data = data;
5229    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
5230    if (min == 0)
5231	atom->min = 1;
5232    else
5233	atom->min = min;
5234    atom->max = max;
5235    /*
5236     * associate a counter to the transition.
5237     */
5238    counter = xmlRegGetCounter(am);
5239    am->counters[counter].min = 1;
5240    am->counters[counter].max = 1;
5241
5242    /* xmlFAGenerateTransitions(am, from, to, atom); */
5243    if (to == NULL) {
5244	to = xmlRegNewState(am);
5245	xmlRegStatePush(am, to);
5246    }
5247    xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
5248    xmlRegAtomPush(am, atom);
5249    am->state = to;
5250    return(to);
5251}
5252
5253/**
5254 * xmlAutomataNewState:
5255 * @am: an automata
5256 *
5257 * Create a new disconnected state in the automata
5258 *
5259 * Returns the new state or NULL in case of error
5260 */
5261xmlAutomataStatePtr
5262xmlAutomataNewState(xmlAutomataPtr am) {
5263    xmlAutomataStatePtr to;
5264
5265    if (am == NULL)
5266	return(NULL);
5267    to = xmlRegNewState(am);
5268    xmlRegStatePush(am, to);
5269    return(to);
5270}
5271
5272/**
5273 * xmlAutomataNewEpsilon:
5274 * @am: an automata
5275 * @from: the starting point of the transition
5276 * @to: the target point of the transition or NULL
5277 *
5278 * If @to is NULL, this creates first a new target state in the automata
5279 * and then adds an epsilon transition from the @from state to the
5280 * target state
5281 *
5282 * Returns the target state or NULL in case of error
5283 */
5284xmlAutomataStatePtr
5285xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
5286		      xmlAutomataStatePtr to) {
5287    if ((am == NULL) || (from == NULL))
5288	return(NULL);
5289    xmlFAGenerateEpsilonTransition(am, from, to);
5290    if (to == NULL)
5291	return(am->state);
5292    return(to);
5293}
5294
5295/**
5296 * xmlAutomataNewAllTrans:
5297 * @am: an automata
5298 * @from: the starting point of the transition
5299 * @to: the target point of the transition or NULL
5300 * @lax: allow to transition if not all all transitions have been activated
5301 *
5302 * If @to is NULL, this creates first a new target state in the automata
5303 * and then adds a an ALL transition from the @from state to the
5304 * target state. That transition is an epsilon transition allowed only when
5305 * all transitions from the @from node have been activated.
5306 *
5307 * Returns the target state or NULL in case of error
5308 */
5309xmlAutomataStatePtr
5310xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5311		       xmlAutomataStatePtr to, int lax) {
5312    if ((am == NULL) || (from == NULL))
5313	return(NULL);
5314    xmlFAGenerateAllTransition(am, from, to, lax);
5315    if (to == NULL)
5316	return(am->state);
5317    return(to);
5318}
5319
5320/**
5321 * xmlAutomataNewCounter:
5322 * @am: an automata
5323 * @min:  the minimal value on the counter
5324 * @max:  the maximal value on the counter
5325 *
5326 * Create a new counter
5327 *
5328 * Returns the counter number or -1 in case of error
5329 */
5330int
5331xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
5332    int ret;
5333
5334    if (am == NULL)
5335	return(-1);
5336
5337    ret = xmlRegGetCounter(am);
5338    if (ret < 0)
5339	return(-1);
5340    am->counters[ret].min = min;
5341    am->counters[ret].max = max;
5342    return(ret);
5343}
5344
5345/**
5346 * xmlAutomataNewCountedTrans:
5347 * @am: an automata
5348 * @from: the starting point of the transition
5349 * @to: the target point of the transition or NULL
5350 * @counter: the counter associated to that transition
5351 *
5352 * If @to is NULL, this creates first a new target state in the automata
5353 * and then adds an epsilon transition from the @from state to the target state
5354 * which will increment the counter provided
5355 *
5356 * Returns the target state or NULL in case of error
5357 */
5358xmlAutomataStatePtr
5359xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5360		xmlAutomataStatePtr to, int counter) {
5361    if ((am == NULL) || (from == NULL) || (counter < 0))
5362	return(NULL);
5363    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
5364    if (to == NULL)
5365	return(am->state);
5366    return(to);
5367}
5368
5369/**
5370 * xmlAutomataNewCounterTrans:
5371 * @am: an automata
5372 * @from: the starting point of the transition
5373 * @to: the target point of the transition or NULL
5374 * @counter: the counter associated to that transition
5375 *
5376 * If @to is NULL, this creates first a new target state in the automata
5377 * and then adds an epsilon transition from the @from state to the target state
5378 * which will be allowed only if the counter is within the right range.
5379 *
5380 * Returns the target state or NULL in case of error
5381 */
5382xmlAutomataStatePtr
5383xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
5384		xmlAutomataStatePtr to, int counter) {
5385    if ((am == NULL) || (from == NULL) || (counter < 0))
5386	return(NULL);
5387    xmlFAGenerateCountedTransition(am, from, to, counter);
5388    if (to == NULL)
5389	return(am->state);
5390    return(to);
5391}
5392
5393/**
5394 * xmlAutomataCompile:
5395 * @am: an automata
5396 *
5397 * Compile the automata into a Reg Exp ready for being executed.
5398 * The automata should be free after this point.
5399 *
5400 * Returns the compiled regexp or NULL in case of error
5401 */
5402xmlRegexpPtr
5403xmlAutomataCompile(xmlAutomataPtr am) {
5404    xmlRegexpPtr ret;
5405
5406    if ((am == NULL) || (am->error != 0)) return(NULL);
5407    xmlFAEliminateEpsilonTransitions(am);
5408    /* xmlFAComputesDeterminism(am); */
5409    ret = xmlRegEpxFromParse(am);
5410
5411    return(ret);
5412}
5413
5414/**
5415 * xmlAutomataIsDeterminist:
5416 * @am: an automata
5417 *
5418 * Checks if an automata is determinist.
5419 *
5420 * Returns 1 if true, 0 if not, and -1 in case of error
5421 */
5422int
5423xmlAutomataIsDeterminist(xmlAutomataPtr am) {
5424    int ret;
5425
5426    if (am == NULL)
5427	return(-1);
5428
5429    ret = xmlFAComputesDeterminism(am);
5430    return(ret);
5431}
5432#endif /* LIBXML_AUTOMATA_ENABLED */
5433#define bottom_xmlregexp
5434#include "elfgcchack.h"
5435#endif /* LIBXML_REGEXP_ENABLED */
5436