xmlregexp.c revision 23e73571f8f6918e4ea7be3506ee5bd24ee86c52
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications thise includes:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22#include <stdio.h>
23#include <string.h>
24#include <libxml/tree.h>
25#include <libxml/parserInternals.h>
26#include <libxml/xmlregexp.h>
27#include <libxml/xmlautomata.h>
28#include <libxml/xmlunicode.h>
29
30/* #define DEBUG_REGEXP_GRAPH  */
31/* #define DEBUG_REGEXP_EXEC */
32/* #define DEBUG_PUSH */
33/* #define DEBUG_COMPACTION */
34
35#define ERROR(str) ctxt->error = 1;					\
36    xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
37#define NEXT ctxt->cur++
38#define CUR (*(ctxt->cur))
39#define NXT(index) (ctxt->cur[index])
40
41#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
42#define NEXTL(l) ctxt->cur += l;
43
44/**
45 * TODO:
46 *
47 * macro to flag unimplemented blocks
48 */
49#define TODO 								\
50    xmlGenericError(xmlGenericErrorContext,				\
51	    "Unimplemented block at %s:%d\n",				\
52            __FILE__, __LINE__);
53
54
55/************************************************************************
56 * 									*
57 * 			Datatypes and structures			*
58 * 									*
59 ************************************************************************/
60
61typedef enum {
62    XML_REGEXP_EPSILON = 1,
63    XML_REGEXP_CHARVAL,
64    XML_REGEXP_RANGES,
65    XML_REGEXP_SUBREG,
66    XML_REGEXP_STRING,
67    XML_REGEXP_ANYCHAR, /* . */
68    XML_REGEXP_ANYSPACE, /* \s */
69    XML_REGEXP_NOTSPACE, /* \S */
70    XML_REGEXP_INITNAME, /* \l */
71    XML_REGEXP_NOTINITNAME, /* \l */
72    XML_REGEXP_NAMECHAR, /* \c */
73    XML_REGEXP_NOTNAMECHAR, /* \C */
74    XML_REGEXP_DECIMAL, /* \d */
75    XML_REGEXP_NOTDECIMAL, /* \d */
76    XML_REGEXP_REALCHAR, /* \w */
77    XML_REGEXP_NOTREALCHAR, /* \w */
78    XML_REGEXP_LETTER,
79    XML_REGEXP_LETTER_UPPERCASE,
80    XML_REGEXP_LETTER_LOWERCASE,
81    XML_REGEXP_LETTER_TITLECASE,
82    XML_REGEXP_LETTER_MODIFIER,
83    XML_REGEXP_LETTER_OTHERS,
84    XML_REGEXP_MARK,
85    XML_REGEXP_MARK_NONSPACING,
86    XML_REGEXP_MARK_SPACECOMBINING,
87    XML_REGEXP_MARK_ENCLOSING,
88    XML_REGEXP_NUMBER,
89    XML_REGEXP_NUMBER_DECIMAL,
90    XML_REGEXP_NUMBER_LETTER,
91    XML_REGEXP_NUMBER_OTHERS,
92    XML_REGEXP_PUNCT,
93    XML_REGEXP_PUNCT_CONNECTOR,
94    XML_REGEXP_PUNCT_DASH,
95    XML_REGEXP_PUNCT_OPEN,
96    XML_REGEXP_PUNCT_CLOSE,
97    XML_REGEXP_PUNCT_INITQUOTE,
98    XML_REGEXP_PUNCT_FINQUOTE,
99    XML_REGEXP_PUNCT_OTHERS,
100    XML_REGEXP_SEPAR,
101    XML_REGEXP_SEPAR_SPACE,
102    XML_REGEXP_SEPAR_LINE,
103    XML_REGEXP_SEPAR_PARA,
104    XML_REGEXP_SYMBOL,
105    XML_REGEXP_SYMBOL_MATH,
106    XML_REGEXP_SYMBOL_CURRENCY,
107    XML_REGEXP_SYMBOL_MODIFIER,
108    XML_REGEXP_SYMBOL_OTHERS,
109    XML_REGEXP_OTHER,
110    XML_REGEXP_OTHER_CONTROL,
111    XML_REGEXP_OTHER_FORMAT,
112    XML_REGEXP_OTHER_PRIVATE,
113    XML_REGEXP_OTHER_NA,
114    XML_REGEXP_BLOCK_NAME
115} xmlRegAtomType;
116
117typedef enum {
118    XML_REGEXP_QUANT_EPSILON = 1,
119    XML_REGEXP_QUANT_ONCE,
120    XML_REGEXP_QUANT_OPT,
121    XML_REGEXP_QUANT_MULT,
122    XML_REGEXP_QUANT_PLUS,
123    XML_REGEXP_QUANT_ONCEONLY,
124    XML_REGEXP_QUANT_ALL,
125    XML_REGEXP_QUANT_RANGE
126} xmlRegQuantType;
127
128typedef enum {
129    XML_REGEXP_START_STATE = 1,
130    XML_REGEXP_FINAL_STATE,
131    XML_REGEXP_TRANS_STATE
132} xmlRegStateType;
133
134typedef enum {
135    XML_REGEXP_MARK_NORMAL = 0,
136    XML_REGEXP_MARK_START,
137    XML_REGEXP_MARK_VISITED
138} xmlRegMarkedType;
139
140typedef struct _xmlRegRange xmlRegRange;
141typedef xmlRegRange *xmlRegRangePtr;
142
143struct _xmlRegRange {
144    int neg;
145    xmlRegAtomType type;
146    int start;
147    int end;
148    xmlChar *blockName;
149};
150
151typedef struct _xmlRegAtom xmlRegAtom;
152typedef xmlRegAtom *xmlRegAtomPtr;
153
154typedef struct _xmlAutomataState xmlRegState;
155typedef xmlRegState *xmlRegStatePtr;
156
157struct _xmlRegAtom {
158    int no;
159    xmlRegAtomType type;
160    xmlRegQuantType quant;
161    int min;
162    int max;
163
164    void *valuep;
165    void *valuep2;
166    int neg;
167    int codepoint;
168    xmlRegStatePtr start;
169    xmlRegStatePtr stop;
170    int maxRanges;
171    int nbRanges;
172    xmlRegRangePtr *ranges;
173    void *data;
174};
175
176typedef struct _xmlRegCounter xmlRegCounter;
177typedef xmlRegCounter *xmlRegCounterPtr;
178
179struct _xmlRegCounter {
180    int min;
181    int max;
182};
183
184typedef struct _xmlRegTrans xmlRegTrans;
185typedef xmlRegTrans *xmlRegTransPtr;
186
187struct _xmlRegTrans {
188    xmlRegAtomPtr atom;
189    int to;
190    int counter;
191    int count;
192};
193
194struct _xmlAutomataState {
195    xmlRegStateType type;
196    xmlRegMarkedType mark;
197    xmlRegMarkedType reached;
198    int no;
199
200    int maxTrans;
201    int nbTrans;
202    xmlRegTrans *trans;
203};
204
205typedef struct _xmlAutomata xmlRegParserCtxt;
206typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
207
208struct _xmlAutomata {
209    xmlChar *string;
210    xmlChar *cur;
211
212    int error;
213    int neg;
214
215    xmlRegStatePtr start;
216    xmlRegStatePtr end;
217    xmlRegStatePtr state;
218
219    xmlRegAtomPtr atom;
220
221    int maxAtoms;
222    int nbAtoms;
223    xmlRegAtomPtr *atoms;
224
225    int maxStates;
226    int nbStates;
227    xmlRegStatePtr *states;
228
229    int maxCounters;
230    int nbCounters;
231    xmlRegCounter *counters;
232
233    int determinist;
234};
235
236struct _xmlRegexp {
237    xmlChar *string;
238    int nbStates;
239    xmlRegStatePtr *states;
240    int nbAtoms;
241    xmlRegAtomPtr *atoms;
242    int nbCounters;
243    xmlRegCounter *counters;
244    int determinist;
245    /*
246     * That's the compact form for determinists automatas
247     */
248    int nbstates;
249    int *compact;
250    int nbstrings;
251    xmlChar **stringMap;
252};
253
254typedef struct _xmlRegExecRollback xmlRegExecRollback;
255typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
256
257struct _xmlRegExecRollback {
258    xmlRegStatePtr state;/* the current state */
259    int index;		/* the index in the input stack */
260    int nextbranch;	/* the next transition to explore in that state */
261    int *counts;	/* save the automate state if it has some */
262};
263
264typedef struct _xmlRegInputToken xmlRegInputToken;
265typedef xmlRegInputToken *xmlRegInputTokenPtr;
266
267struct _xmlRegInputToken {
268    xmlChar *value;
269    void *data;
270};
271
272struct _xmlRegExecCtxt {
273    int status;		/* execution status != 0 indicate an error */
274    int determinist;	/* did we found an inderterministic behaviour */
275    xmlRegexpPtr comp;	/* the compiled regexp */
276    xmlRegExecCallbacks callback;
277    void *data;
278
279    xmlRegStatePtr state;/* the current state */
280    int transno;	/* the current transition on that state */
281    int transcount;	/* the number of char in char counted transitions */
282
283    /*
284     * A stack of rollback states
285     */
286    int maxRollbacks;
287    int nbRollbacks;
288    xmlRegExecRollback *rollbacks;
289
290    /*
291     * The state of the automata if any
292     */
293    int *counts;
294
295    /*
296     * The input stack
297     */
298    int inputStackMax;
299    int inputStackNr;
300    int index;
301    int *charStack;
302    const xmlChar *inputString; /* when operating on characters */
303    xmlRegInputTokenPtr inputStack;/* when operating on strings */
304
305};
306
307#define REGEXP_ALL_COUNTER	0x123456
308#define REGEXP_ALL_LAX_COUNTER	0x123457
309
310static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
311static void xmlRegFreeState(xmlRegStatePtr state);
312static void xmlRegFreeAtom(xmlRegAtomPtr atom);
313
314/************************************************************************
315 * 									*
316 * 			Allocation/Deallocation				*
317 * 									*
318 ************************************************************************/
319
320static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
321/**
322 * xmlRegEpxFromParse:
323 * @ctxt:  the parser context used to build it
324 *
325 * Allocate a new regexp and fill it with the reult from the parser
326 *
327 * Returns the new regexp or NULL in case of error
328 */
329static xmlRegexpPtr
330xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
331    xmlRegexpPtr ret;
332
333    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
334    if (ret == NULL)
335	return(NULL);
336    memset(ret, 0, sizeof(xmlRegexp));
337    ret->string = ctxt->string;
338    ctxt->string = NULL;
339    ret->nbStates = ctxt->nbStates;
340    ctxt->nbStates = 0;
341    ret->states = ctxt->states;
342    ctxt->states = NULL;
343    ret->nbAtoms = ctxt->nbAtoms;
344    ctxt->nbAtoms = 0;
345    ret->atoms = ctxt->atoms;
346    ctxt->atoms = NULL;
347    ret->nbCounters = ctxt->nbCounters;
348    ctxt->nbCounters = 0;
349    ret->counters = ctxt->counters;
350    ctxt->counters = NULL;
351    ret->determinist = ctxt->determinist;
352
353    if ((ret->determinist != 0) &&
354	(ret->nbCounters == 0) &&
355	(ret->atoms[0] != NULL) &&
356	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
357	int i, j, nbstates = 0, nbatoms = 0;
358	int *stateRemap;
359	int *stringRemap;
360	int *transitions;
361	xmlChar **stringMap;
362        xmlChar *value;
363
364	/*
365	 * Switch to a compact representation
366	 * 1/ counting the effective number of states left
367	 * 2/ conting the unique number of atoms, and check that
368	 *    they are all of the string type
369	 * 3/ build a table state x atom for the transitions
370	 */
371
372	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
373	for (i = 0;i < ret->nbStates;i++) {
374	    if (ret->states[i] != NULL) {
375		stateRemap[i] = nbstates;
376		nbstates++;
377	    } else {
378		stateRemap[i] = -1;
379	    }
380	}
381#ifdef DEBUG_COMPACTION
382	printf("Final: %d states\n", nbstates);
383#endif
384	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
385	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
386	for (i = 0;i < ret->nbAtoms;i++) {
387	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
388		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
389		value = ret->atoms[i]->valuep;
390                for (j = 0;j < nbatoms;j++) {
391		    if (xmlStrEqual(stringMap[j], value)) {
392			stringRemap[i] = j;
393			break;
394		    }
395		}
396		if (j >= nbatoms) {
397		    stringRemap[i] = nbatoms;
398		    stringMap[nbatoms] = xmlStrdup(value);
399		    nbatoms++;
400		}
401	    } else {
402		xmlFree(stateRemap);
403		xmlFree(stringRemap);
404		for (i = 0;i < nbatoms;i++)
405		    xmlFree(stringMap[i]);
406		xmlFree(stringMap);
407		goto fail_compact;
408	    }
409	}
410#ifdef DEBUG_COMPACTION
411	printf("Final: %d atoms\n", nbatoms);
412#endif
413
414	/*
415	 * Allocate the transition table. The first entry for each
416	 * state correspond to the state type.
417	 */
418	transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int));
419	memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int));
420
421	for (i = 0;i < ret->nbStates;i++) {
422	    int stateno, atomno, targetno, prev;
423	    xmlRegStatePtr state;
424	    xmlRegTransPtr trans;
425
426	    stateno = stateRemap[i];
427	    if (stateno == -1)
428		continue;
429	    state = ret->states[i];
430
431	    transitions[stateno * (nbatoms + 1)] = state->type;
432
433	    for (j = 0;j < state->nbTrans;j++) {
434		trans = &(state->trans[j]);
435		if ((trans->to == -1) || (trans->atom == NULL))
436		    continue;
437                atomno = stringRemap[trans->atom->no];
438		targetno = stateRemap[trans->to];
439		/*
440		 * if the same atome can generate transition to 2 different
441		 * states then it means the automata is not determinist and
442		 * the compact form can't be used !
443		 */
444		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
445		if (prev != 0) {
446		    if (prev != targetno + 1) {
447			printf("not determinist\n");
448			ret->determinist = 0;
449#ifdef DEBUG_COMPACTION
450			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
451			       i, j, trans->atom->no, trans->to, atomno, targetno);
452			printf("       previous to is %d\n", prev);
453#endif
454			ret->determinist = 0;
455			xmlFree(transitions);
456			xmlFree(stateRemap);
457			xmlFree(stringRemap);
458			for (i = 0;i < nbatoms;i++)
459			    xmlFree(stringMap[i]);
460			xmlFree(stringMap);
461			goto fail_compact;
462		    }
463		} else {
464#if 0
465		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
466			   i, j, trans->atom->no, trans->to, atomno, targetno);
467#endif
468		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
469			targetno + 1;; /* to avoid 0 */
470		}
471	    }
472	}
473	ret->determinist = 1;
474#ifdef DEBUG_COMPACTION
475	/*
476	 * Debug
477	 */
478	for (i = 0;i < nbstates;i++) {
479	    for (j = 0;j < nbatoms + 1;j++) {
480                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
481	    }
482	    printf("\n");
483	}
484	printf("\n");
485#endif
486	/*
487	 * Cleanup of the old data
488	 */
489	if (ret->states != NULL) {
490	    for (i = 0;i < ret->nbStates;i++)
491		xmlRegFreeState(ret->states[i]);
492	    xmlFree(ret->states);
493	}
494	ret->states = NULL;
495	ret->nbStates = 0;
496	if (ret->atoms != NULL) {
497	    for (i = 0;i < ret->nbAtoms;i++)
498		xmlRegFreeAtom(ret->atoms[i]);
499	    xmlFree(ret->atoms);
500	}
501	ret->atoms = NULL;
502	ret->nbAtoms = 0;
503
504	ret->compact = transitions;
505	ret->stringMap = stringMap;
506	ret->nbstrings = nbatoms;
507	ret->nbstates = nbstates;
508	xmlFree(stateRemap);
509	xmlFree(stringRemap);
510    }
511fail_compact:
512    return(ret);
513}
514
515/**
516 * xmlRegNewParserCtxt:
517 * @string:  the string to parse
518 *
519 * Allocate a new regexp parser context
520 *
521 * Returns the new context or NULL in case of error
522 */
523static xmlRegParserCtxtPtr
524xmlRegNewParserCtxt(const xmlChar *string) {
525    xmlRegParserCtxtPtr ret;
526
527    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
528    if (ret == NULL)
529	return(NULL);
530    memset(ret, 0, sizeof(xmlRegParserCtxt));
531    if (string != NULL)
532	ret->string = xmlStrdup(string);
533    ret->cur = ret->string;
534    ret->neg = 0;
535    ret->error = 0;
536    ret->determinist = -1;
537    return(ret);
538}
539
540/**
541 * xmlRegNewRange:
542 * @ctxt:  the regexp parser context
543 * @neg:  is that negative
544 * @type:  the type of range
545 * @start:  the start codepoint
546 * @end:  the end codepoint
547 *
548 * Allocate a new regexp range
549 *
550 * Returns the new range or NULL in case of error
551 */
552static xmlRegRangePtr
553xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
554	       int neg, xmlRegAtomType type, int start, int end) {
555    xmlRegRangePtr ret;
556
557    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
558    if (ret == NULL) {
559	ERROR("failed to allocate regexp range");
560	return(NULL);
561    }
562    ret->neg = neg;
563    ret->type = type;
564    ret->start = start;
565    ret->end = end;
566    return(ret);
567}
568
569/**
570 * xmlRegFreeRange:
571 * @range:  the regexp range
572 *
573 * Free a regexp range
574 */
575static void
576xmlRegFreeRange(xmlRegRangePtr range) {
577    if (range == NULL)
578	return;
579
580    if (range->blockName != NULL)
581	xmlFree(range->blockName);
582    xmlFree(range);
583}
584
585/**
586 * xmlRegNewAtom:
587 * @ctxt:  the regexp parser context
588 * @type:  the type of atom
589 *
590 * Allocate a new regexp range
591 *
592 * Returns the new atom or NULL in case of error
593 */
594static xmlRegAtomPtr
595xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
596    xmlRegAtomPtr ret;
597
598    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
599    if (ret == NULL) {
600	ERROR("failed to allocate regexp atom");
601	return(NULL);
602    }
603    memset(ret, 0, sizeof(xmlRegAtom));
604    ret->type = type;
605    ret->quant = XML_REGEXP_QUANT_ONCE;
606    ret->min = 0;
607    ret->max = 0;
608    return(ret);
609}
610
611/**
612 * xmlRegFreeAtom:
613 * @atom:  the regexp atom
614 *
615 * Free a regexp atom
616 */
617static void
618xmlRegFreeAtom(xmlRegAtomPtr atom) {
619    int i;
620
621    if (atom == NULL)
622	return;
623
624    for (i = 0;i < atom->nbRanges;i++)
625	xmlRegFreeRange(atom->ranges[i]);
626    if (atom->ranges != NULL)
627	xmlFree(atom->ranges);
628    if (atom->type == XML_REGEXP_STRING)
629	xmlFree(atom->valuep);
630    xmlFree(atom);
631}
632
633static xmlRegStatePtr
634xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
635    xmlRegStatePtr ret;
636
637    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
638    if (ret == NULL) {
639	ERROR("failed to allocate regexp state");
640	return(NULL);
641    }
642    memset(ret, 0, sizeof(xmlRegState));
643    ret->type = XML_REGEXP_TRANS_STATE;
644    ret->mark = XML_REGEXP_MARK_NORMAL;
645    return(ret);
646}
647
648/**
649 * xmlRegFreeState:
650 * @state:  the regexp state
651 *
652 * Free a regexp state
653 */
654static void
655xmlRegFreeState(xmlRegStatePtr state) {
656    if (state == NULL)
657	return;
658
659    if (state->trans != NULL)
660	xmlFree(state->trans);
661    xmlFree(state);
662}
663
664/**
665 * xmlRegFreeParserCtxt:
666 * @ctxt:  the regexp parser context
667 *
668 * Free a regexp parser context
669 */
670static void
671xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
672    int i;
673    if (ctxt == NULL)
674	return;
675
676    if (ctxt->string != NULL)
677	xmlFree(ctxt->string);
678    if (ctxt->states != NULL) {
679	for (i = 0;i < ctxt->nbStates;i++)
680	    xmlRegFreeState(ctxt->states[i]);
681	xmlFree(ctxt->states);
682    }
683    if (ctxt->atoms != NULL) {
684	for (i = 0;i < ctxt->nbAtoms;i++)
685	    xmlRegFreeAtom(ctxt->atoms[i]);
686	xmlFree(ctxt->atoms);
687    }
688    if (ctxt->counters != NULL)
689	xmlFree(ctxt->counters);
690    xmlFree(ctxt);
691}
692
693/************************************************************************
694 * 									*
695 * 			Display of Data structures			*
696 * 									*
697 ************************************************************************/
698
699static void
700xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
701    switch (type) {
702        case XML_REGEXP_EPSILON:
703	    fprintf(output, "epsilon "); break;
704        case XML_REGEXP_CHARVAL:
705	    fprintf(output, "charval "); break;
706        case XML_REGEXP_RANGES:
707	    fprintf(output, "ranges "); break;
708        case XML_REGEXP_SUBREG:
709	    fprintf(output, "subexpr "); break;
710        case XML_REGEXP_STRING:
711	    fprintf(output, "string "); break;
712        case XML_REGEXP_ANYCHAR:
713	    fprintf(output, "anychar "); break;
714        case XML_REGEXP_ANYSPACE:
715	    fprintf(output, "anyspace "); break;
716        case XML_REGEXP_NOTSPACE:
717	    fprintf(output, "notspace "); break;
718        case XML_REGEXP_INITNAME:
719	    fprintf(output, "initname "); break;
720        case XML_REGEXP_NOTINITNAME:
721	    fprintf(output, "notinitname "); break;
722        case XML_REGEXP_NAMECHAR:
723	    fprintf(output, "namechar "); break;
724        case XML_REGEXP_NOTNAMECHAR:
725	    fprintf(output, "notnamechar "); break;
726        case XML_REGEXP_DECIMAL:
727	    fprintf(output, "decimal "); break;
728        case XML_REGEXP_NOTDECIMAL:
729	    fprintf(output, "notdecimal "); break;
730        case XML_REGEXP_REALCHAR:
731	    fprintf(output, "realchar "); break;
732        case XML_REGEXP_NOTREALCHAR:
733	    fprintf(output, "notrealchar "); break;
734        case XML_REGEXP_LETTER:
735            fprintf(output, "LETTER "); break;
736        case XML_REGEXP_LETTER_UPPERCASE:
737            fprintf(output, "LETTER_UPPERCASE "); break;
738        case XML_REGEXP_LETTER_LOWERCASE:
739            fprintf(output, "LETTER_LOWERCASE "); break;
740        case XML_REGEXP_LETTER_TITLECASE:
741            fprintf(output, "LETTER_TITLECASE "); break;
742        case XML_REGEXP_LETTER_MODIFIER:
743            fprintf(output, "LETTER_MODIFIER "); break;
744        case XML_REGEXP_LETTER_OTHERS:
745            fprintf(output, "LETTER_OTHERS "); break;
746        case XML_REGEXP_MARK:
747            fprintf(output, "MARK "); break;
748        case XML_REGEXP_MARK_NONSPACING:
749            fprintf(output, "MARK_NONSPACING "); break;
750        case XML_REGEXP_MARK_SPACECOMBINING:
751            fprintf(output, "MARK_SPACECOMBINING "); break;
752        case XML_REGEXP_MARK_ENCLOSING:
753            fprintf(output, "MARK_ENCLOSING "); break;
754        case XML_REGEXP_NUMBER:
755            fprintf(output, "NUMBER "); break;
756        case XML_REGEXP_NUMBER_DECIMAL:
757            fprintf(output, "NUMBER_DECIMAL "); break;
758        case XML_REGEXP_NUMBER_LETTER:
759            fprintf(output, "NUMBER_LETTER "); break;
760        case XML_REGEXP_NUMBER_OTHERS:
761            fprintf(output, "NUMBER_OTHERS "); break;
762        case XML_REGEXP_PUNCT:
763            fprintf(output, "PUNCT "); break;
764        case XML_REGEXP_PUNCT_CONNECTOR:
765            fprintf(output, "PUNCT_CONNECTOR "); break;
766        case XML_REGEXP_PUNCT_DASH:
767            fprintf(output, "PUNCT_DASH "); break;
768        case XML_REGEXP_PUNCT_OPEN:
769            fprintf(output, "PUNCT_OPEN "); break;
770        case XML_REGEXP_PUNCT_CLOSE:
771            fprintf(output, "PUNCT_CLOSE "); break;
772        case XML_REGEXP_PUNCT_INITQUOTE:
773            fprintf(output, "PUNCT_INITQUOTE "); break;
774        case XML_REGEXP_PUNCT_FINQUOTE:
775            fprintf(output, "PUNCT_FINQUOTE "); break;
776        case XML_REGEXP_PUNCT_OTHERS:
777            fprintf(output, "PUNCT_OTHERS "); break;
778        case XML_REGEXP_SEPAR:
779            fprintf(output, "SEPAR "); break;
780        case XML_REGEXP_SEPAR_SPACE:
781            fprintf(output, "SEPAR_SPACE "); break;
782        case XML_REGEXP_SEPAR_LINE:
783            fprintf(output, "SEPAR_LINE "); break;
784        case XML_REGEXP_SEPAR_PARA:
785            fprintf(output, "SEPAR_PARA "); break;
786        case XML_REGEXP_SYMBOL:
787            fprintf(output, "SYMBOL "); break;
788        case XML_REGEXP_SYMBOL_MATH:
789            fprintf(output, "SYMBOL_MATH "); break;
790        case XML_REGEXP_SYMBOL_CURRENCY:
791            fprintf(output, "SYMBOL_CURRENCY "); break;
792        case XML_REGEXP_SYMBOL_MODIFIER:
793            fprintf(output, "SYMBOL_MODIFIER "); break;
794        case XML_REGEXP_SYMBOL_OTHERS:
795            fprintf(output, "SYMBOL_OTHERS "); break;
796        case XML_REGEXP_OTHER:
797            fprintf(output, "OTHER "); break;
798        case XML_REGEXP_OTHER_CONTROL:
799            fprintf(output, "OTHER_CONTROL "); break;
800        case XML_REGEXP_OTHER_FORMAT:
801            fprintf(output, "OTHER_FORMAT "); break;
802        case XML_REGEXP_OTHER_PRIVATE:
803            fprintf(output, "OTHER_PRIVATE "); break;
804        case XML_REGEXP_OTHER_NA:
805            fprintf(output, "OTHER_NA "); break;
806        case XML_REGEXP_BLOCK_NAME:
807	    fprintf(output, "BLOCK "); break;
808    }
809}
810
811static void
812xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
813    switch (type) {
814        case XML_REGEXP_QUANT_EPSILON:
815	    fprintf(output, "epsilon "); break;
816        case XML_REGEXP_QUANT_ONCE:
817	    fprintf(output, "once "); break;
818        case XML_REGEXP_QUANT_OPT:
819	    fprintf(output, "? "); break;
820        case XML_REGEXP_QUANT_MULT:
821	    fprintf(output, "* "); break;
822        case XML_REGEXP_QUANT_PLUS:
823	    fprintf(output, "+ "); break;
824	case XML_REGEXP_QUANT_RANGE:
825	    fprintf(output, "range "); break;
826	case XML_REGEXP_QUANT_ONCEONLY:
827	    fprintf(output, "onceonly "); break;
828	case XML_REGEXP_QUANT_ALL:
829	    fprintf(output, "all "); break;
830    }
831}
832static void
833xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
834    fprintf(output, "  range: ");
835    if (range->neg)
836	fprintf(output, "negative ");
837    xmlRegPrintAtomType(output, range->type);
838    fprintf(output, "%c - %c\n", range->start, range->end);
839}
840
841static void
842xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
843    fprintf(output, " atom: ");
844    if (atom == NULL) {
845	fprintf(output, "NULL\n");
846	return;
847    }
848    xmlRegPrintAtomType(output, atom->type);
849    xmlRegPrintQuantType(output, atom->quant);
850    if (atom->quant == XML_REGEXP_QUANT_RANGE)
851	fprintf(output, "%d-%d ", atom->min, atom->max);
852    if (atom->type == XML_REGEXP_STRING)
853	fprintf(output, "'%s' ", (char *) atom->valuep);
854    if (atom->type == XML_REGEXP_CHARVAL)
855	fprintf(output, "char %c\n", atom->codepoint);
856    else if (atom->type == XML_REGEXP_RANGES) {
857	int i;
858	fprintf(output, "%d entries\n", atom->nbRanges);
859	for (i = 0; i < atom->nbRanges;i++)
860	    xmlRegPrintRange(output, atom->ranges[i]);
861    } else if (atom->type == XML_REGEXP_SUBREG) {
862	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
863    } else {
864	fprintf(output, "\n");
865    }
866}
867
868static void
869xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
870    fprintf(output, "  trans: ");
871    if (trans == NULL) {
872	fprintf(output, "NULL\n");
873	return;
874    }
875    if (trans->to < 0) {
876	fprintf(output, "removed\n");
877	return;
878    }
879    if (trans->counter >= 0) {
880	fprintf(output, "counted %d, ", trans->counter);
881    }
882    if (trans->count == REGEXP_ALL_COUNTER) {
883	fprintf(output, "all transition, ");
884    } else if (trans->count >= 0) {
885	fprintf(output, "count based %d, ", trans->count);
886    }
887    if (trans->atom == NULL) {
888	fprintf(output, "epsilon to %d\n", trans->to);
889	return;
890    }
891    if (trans->atom->type == XML_REGEXP_CHARVAL)
892	fprintf(output, "char %c ", trans->atom->codepoint);
893    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
894}
895
896static void
897xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
898    int i;
899
900    fprintf(output, " state: ");
901    if (state == NULL) {
902	fprintf(output, "NULL\n");
903	return;
904    }
905    if (state->type == XML_REGEXP_START_STATE)
906	fprintf(output, "START ");
907    if (state->type == XML_REGEXP_FINAL_STATE)
908	fprintf(output, "FINAL ");
909
910    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
911    for (i = 0;i < state->nbTrans; i++) {
912	xmlRegPrintTrans(output, &(state->trans[i]));
913    }
914}
915
916#ifdef DEBUG_REGEXP_GRAPH
917static void
918xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
919    int i;
920
921    fprintf(output, " ctxt: ");
922    if (ctxt == NULL) {
923	fprintf(output, "NULL\n");
924	return;
925    }
926    fprintf(output, "'%s' ", ctxt->string);
927    if (ctxt->error)
928	fprintf(output, "error ");
929    if (ctxt->neg)
930	fprintf(output, "neg ");
931    fprintf(output, "\n");
932    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
933    for (i = 0;i < ctxt->nbAtoms; i++) {
934	fprintf(output, " %02d ", i);
935	xmlRegPrintAtom(output, ctxt->atoms[i]);
936    }
937    if (ctxt->atom != NULL) {
938	fprintf(output, "current atom:\n");
939	xmlRegPrintAtom(output, ctxt->atom);
940    }
941    fprintf(output, "%d states:", ctxt->nbStates);
942    if (ctxt->start != NULL)
943	fprintf(output, " start: %d", ctxt->start->no);
944    if (ctxt->end != NULL)
945	fprintf(output, " end: %d", ctxt->end->no);
946    fprintf(output, "\n");
947    for (i = 0;i < ctxt->nbStates; i++) {
948	xmlRegPrintState(output, ctxt->states[i]);
949    }
950    fprintf(output, "%d counters:\n", ctxt->nbCounters);
951    for (i = 0;i < ctxt->nbCounters; i++) {
952	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
953		                                ctxt->counters[i].max);
954    }
955}
956#endif
957
958/************************************************************************
959 * 									*
960 *		 Finite Automata structures manipulations		*
961 * 									*
962 ************************************************************************/
963
964static void
965xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
966	           int neg, xmlRegAtomType type, int start, int end,
967		   xmlChar *blockName) {
968    xmlRegRangePtr range;
969
970    if (atom == NULL) {
971	ERROR("add range: atom is NULL");
972	return;
973    }
974    if (atom->type != XML_REGEXP_RANGES) {
975	ERROR("add range: atom is not ranges");
976	return;
977    }
978    if (atom->maxRanges == 0) {
979	atom->maxRanges = 4;
980	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
981		                             sizeof(xmlRegRangePtr));
982	if (atom->ranges == NULL) {
983	    ERROR("add range: allocation failed");
984	    atom->maxRanges = 0;
985	    return;
986	}
987    } else if (atom->nbRanges >= atom->maxRanges) {
988	xmlRegRangePtr *tmp;
989	atom->maxRanges *= 2;
990	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
991		                             sizeof(xmlRegRangePtr));
992	if (tmp == NULL) {
993	    ERROR("add range: allocation failed");
994	    atom->maxRanges /= 2;
995	    return;
996	}
997	atom->ranges = tmp;
998    }
999    range = xmlRegNewRange(ctxt, neg, type, start, end);
1000    if (range == NULL)
1001	return;
1002    range->blockName = blockName;
1003    atom->ranges[atom->nbRanges++] = range;
1004
1005}
1006
1007static int
1008xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1009    if (ctxt->maxCounters == 0) {
1010	ctxt->maxCounters = 4;
1011	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1012		                             sizeof(xmlRegCounter));
1013	if (ctxt->counters == NULL) {
1014	    ERROR("reg counter: allocation failed");
1015	    ctxt->maxCounters = 0;
1016	    return(-1);
1017	}
1018    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1019	xmlRegCounter *tmp;
1020	ctxt->maxCounters *= 2;
1021	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1022		                           sizeof(xmlRegCounter));
1023	if (tmp == NULL) {
1024	    ERROR("reg counter: allocation failed");
1025	    ctxt->maxCounters /= 2;
1026	    return(-1);
1027	}
1028	ctxt->counters = tmp;
1029    }
1030    ctxt->counters[ctxt->nbCounters].min = -1;
1031    ctxt->counters[ctxt->nbCounters].max = -1;
1032    return(ctxt->nbCounters++);
1033}
1034
1035static void
1036xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1037    if (atom == NULL) {
1038	ERROR("atom push: atom is NULL");
1039	return;
1040    }
1041    if (ctxt->maxAtoms == 0) {
1042	ctxt->maxAtoms = 4;
1043	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1044		                             sizeof(xmlRegAtomPtr));
1045	if (ctxt->atoms == NULL) {
1046	    ERROR("atom push: allocation failed");
1047	    ctxt->maxAtoms = 0;
1048	    return;
1049	}
1050    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1051	xmlRegAtomPtr *tmp;
1052	ctxt->maxAtoms *= 2;
1053	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1054		                             sizeof(xmlRegAtomPtr));
1055	if (tmp == NULL) {
1056	    ERROR("atom push: allocation failed");
1057	    ctxt->maxAtoms /= 2;
1058	    return;
1059	}
1060	ctxt->atoms = tmp;
1061    }
1062    atom->no = ctxt->nbAtoms;
1063    ctxt->atoms[ctxt->nbAtoms++] = atom;
1064}
1065
1066static void
1067xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1068	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1069		    int counter, int count) {
1070    if (state == NULL) {
1071	ERROR("add state: state is NULL");
1072	return;
1073    }
1074    if (target == NULL) {
1075	ERROR("add state: target is NULL");
1076	return;
1077    }
1078    if (state->maxTrans == 0) {
1079	state->maxTrans = 4;
1080	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1081		                             sizeof(xmlRegTrans));
1082	if (state->trans == NULL) {
1083	    ERROR("add range: allocation failed");
1084	    state->maxTrans = 0;
1085	    return;
1086	}
1087    } else if (state->nbTrans >= state->maxTrans) {
1088	xmlRegTrans *tmp;
1089	state->maxTrans *= 2;
1090	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1091		                             sizeof(xmlRegTrans));
1092	if (tmp == NULL) {
1093	    ERROR("add range: allocation failed");
1094	    state->maxTrans /= 2;
1095	    return;
1096	}
1097	state->trans = tmp;
1098    }
1099#ifdef DEBUG_REGEXP_GRAPH
1100    printf("Add trans from %d to %d ", state->no, target->no);
1101    if (count == REGEXP_ALL_COUNTER)
1102	printf("all transition");
1103    else if (count >= 0)
1104	printf("count based %d", count);
1105    else if (counter >= 0)
1106	printf("counted %d", counter);
1107    else if (atom == NULL)
1108	printf("epsilon transition");
1109    printf("\n");
1110#endif
1111
1112    state->trans[state->nbTrans].atom = atom;
1113    state->trans[state->nbTrans].to = target->no;
1114    state->trans[state->nbTrans].counter = counter;
1115    state->trans[state->nbTrans].count = count;
1116    state->nbTrans++;
1117}
1118
1119static void
1120xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1121    if (ctxt->maxStates == 0) {
1122	ctxt->maxStates = 4;
1123	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1124		                             sizeof(xmlRegStatePtr));
1125	if (ctxt->states == NULL) {
1126	    ERROR("add range: allocation failed");
1127	    ctxt->maxStates = 0;
1128	    return;
1129	}
1130    } else if (ctxt->nbStates >= ctxt->maxStates) {
1131	xmlRegStatePtr *tmp;
1132	ctxt->maxStates *= 2;
1133	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1134		                             sizeof(xmlRegStatePtr));
1135	if (tmp == NULL) {
1136	    ERROR("add range: allocation failed");
1137	    ctxt->maxStates /= 2;
1138	    return;
1139	}
1140	ctxt->states = tmp;
1141    }
1142    state->no = ctxt->nbStates;
1143    ctxt->states[ctxt->nbStates++] = state;
1144}
1145
1146/**
1147 * xmlFAGenerateAllTransition:
1148 * @ctxt:  a regexp parser context
1149 * @from:  the from state
1150 * @to:  the target state or NULL for building a new one
1151 * @lax:
1152 *
1153 */
1154static void
1155xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1156			   xmlRegStatePtr from, xmlRegStatePtr to,
1157			   int lax) {
1158    if (to == NULL) {
1159	to = xmlRegNewState(ctxt);
1160	xmlRegStatePush(ctxt, to);
1161	ctxt->state = to;
1162    }
1163    if (lax)
1164	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1165    else
1166	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1167}
1168
1169/**
1170 * xmlFAGenerateEpsilonTransition:
1171 * @ctxt:  a regexp parser context
1172 * @from:  the from state
1173 * @to:  the target state or NULL for building a new one
1174 *
1175 */
1176static void
1177xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1178			       xmlRegStatePtr from, xmlRegStatePtr to) {
1179    if (to == NULL) {
1180	to = xmlRegNewState(ctxt);
1181	xmlRegStatePush(ctxt, to);
1182	ctxt->state = to;
1183    }
1184    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1185}
1186
1187/**
1188 * xmlFAGenerateCountedEpsilonTransition:
1189 * @ctxt:  a regexp parser context
1190 * @from:  the from state
1191 * @to:  the target state or NULL for building a new one
1192 * counter:  the counter for that transition
1193 *
1194 */
1195static void
1196xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1197	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1198    if (to == NULL) {
1199	to = xmlRegNewState(ctxt);
1200	xmlRegStatePush(ctxt, to);
1201	ctxt->state = to;
1202    }
1203    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1204}
1205
1206/**
1207 * xmlFAGenerateCountedTransition:
1208 * @ctxt:  a regexp parser context
1209 * @from:  the from state
1210 * @to:  the target state or NULL for building a new one
1211 * counter:  the counter for that transition
1212 *
1213 */
1214static void
1215xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1216	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1217    if (to == NULL) {
1218	to = xmlRegNewState(ctxt);
1219	xmlRegStatePush(ctxt, to);
1220	ctxt->state = to;
1221    }
1222    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1223}
1224
1225/**
1226 * xmlFAGenerateTransitions:
1227 * @ctxt:  a regexp parser context
1228 * @from:  the from state
1229 * @to:  the target state or NULL for building a new one
1230 * @atom:  the atom generating the transition
1231 *
1232 */
1233static void
1234xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1235	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1236    if (atom == NULL) {
1237	ERROR("genrate transition: atom == NULL");
1238	return;
1239    }
1240    if (atom->type == XML_REGEXP_SUBREG) {
1241	/*
1242	 * this is a subexpression handling one should not need to
1243	 * create a new node excep for XML_REGEXP_QUANT_RANGE.
1244	 */
1245	xmlRegAtomPush(ctxt, atom);
1246	if ((to != NULL) && (atom->stop != to) &&
1247	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1248	    /*
1249	     * Generate an epsilon transition to link to the target
1250	     */
1251	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1252	}
1253	switch (atom->quant) {
1254	    case XML_REGEXP_QUANT_OPT:
1255		atom->quant = XML_REGEXP_QUANT_ONCE;
1256		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1257		break;
1258	    case XML_REGEXP_QUANT_MULT:
1259		atom->quant = XML_REGEXP_QUANT_ONCE;
1260		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1261		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1262		break;
1263	    case XML_REGEXP_QUANT_PLUS:
1264		atom->quant = XML_REGEXP_QUANT_ONCE;
1265		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1266		break;
1267	    case XML_REGEXP_QUANT_RANGE: {
1268		int counter;
1269		xmlRegStatePtr newstate;
1270
1271		/*
1272		 * This one is nasty:
1273		 *   1/ register a new counter
1274		 *   2/ register an epsilon transition associated to
1275		 *      this counter going from atom->stop to atom->start
1276		 *   3/ create a new state
1277		 *   4/ generate a counted transition from atom->stop to
1278		 *      that state
1279		 */
1280		counter = xmlRegGetCounter(ctxt);
1281		ctxt->counters[counter].min = atom->min - 1;
1282		ctxt->counters[counter].max = atom->max - 1;
1283		atom->min = 0;
1284		atom->max = 0;
1285		atom->quant = XML_REGEXP_QUANT_ONCE;
1286		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1287			                              atom->start, counter);
1288		if (to != NULL) {
1289		    newstate = to;
1290		} else {
1291		    newstate = xmlRegNewState(ctxt);
1292		    xmlRegStatePush(ctxt, newstate);
1293		    ctxt->state = newstate;
1294		}
1295		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1296			                       newstate, counter);
1297	    }
1298	    default:
1299		break;
1300	}
1301	return;
1302    } else {
1303	if (to == NULL) {
1304	    to = xmlRegNewState(ctxt);
1305	    xmlRegStatePush(ctxt, to);
1306	}
1307	xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1308	xmlRegAtomPush(ctxt, atom);
1309	ctxt->state = to;
1310    }
1311    switch (atom->quant) {
1312	case XML_REGEXP_QUANT_OPT:
1313	    atom->quant = XML_REGEXP_QUANT_ONCE;
1314	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1315	    break;
1316	case XML_REGEXP_QUANT_MULT:
1317	    atom->quant = XML_REGEXP_QUANT_ONCE;
1318	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1319	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1320	    break;
1321	case XML_REGEXP_QUANT_PLUS:
1322	    atom->quant = XML_REGEXP_QUANT_ONCE;
1323	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1324	    break;
1325	default:
1326	    break;
1327    }
1328}
1329
1330/**
1331 * xmlFAReduceEpsilonTransitions:
1332 * @ctxt:  a regexp parser context
1333 * @fromnr:  the from state
1334 * @tonr:  the to state
1335 * @cpunter:  should that transition be associted to a counted
1336 *
1337 */
1338static void
1339xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1340	                      int tonr, int counter) {
1341    int transnr;
1342    xmlRegStatePtr from;
1343    xmlRegStatePtr to;
1344
1345#ifdef DEBUG_REGEXP_GRAPH
1346    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1347#endif
1348    from = ctxt->states[fromnr];
1349    if (from == NULL)
1350	return;
1351    to = ctxt->states[tonr];
1352    if (to == NULL)
1353	return;
1354    if ((to->mark == XML_REGEXP_MARK_START) ||
1355	(to->mark == XML_REGEXP_MARK_VISITED))
1356	return;
1357
1358    to->mark = XML_REGEXP_MARK_VISITED;
1359    if (to->type == XML_REGEXP_FINAL_STATE) {
1360#ifdef DEBUG_REGEXP_GRAPH
1361	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1362#endif
1363	from->type = XML_REGEXP_FINAL_STATE;
1364    }
1365    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1366	if (to->trans[transnr].atom == NULL) {
1367	    /*
1368	     * Don't remove counted transitions
1369	     * Don't loop either
1370	     */
1371	    if (to->trans[transnr].to != fromnr) {
1372		if (to->trans[transnr].count >= 0) {
1373		    int newto = to->trans[transnr].to;
1374
1375		    xmlRegStateAddTrans(ctxt, from, NULL,
1376					ctxt->states[newto],
1377					-1, to->trans[transnr].count);
1378		} else {
1379#ifdef DEBUG_REGEXP_GRAPH
1380		    printf("Found epsilon trans %d from %d to %d\n",
1381			   transnr, tonr, to->trans[transnr].to);
1382#endif
1383		    if (to->trans[transnr].counter >= 0) {
1384			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1385					      to->trans[transnr].to,
1386					      to->trans[transnr].counter);
1387		    } else {
1388			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1389					      to->trans[transnr].to,
1390					      counter);
1391		    }
1392		}
1393	    }
1394	} else {
1395	    int newto = to->trans[transnr].to;
1396
1397	    if (to->trans[transnr].counter >= 0) {
1398		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1399				    ctxt->states[newto],
1400				    to->trans[transnr].counter, -1);
1401	    } else {
1402		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1403				    ctxt->states[newto], counter, -1);
1404	    }
1405	}
1406    }
1407    to->mark = XML_REGEXP_MARK_NORMAL;
1408}
1409
1410/**
1411 * xmlFAEliminateEpsilonTransitions:
1412 * @ctxt:  a regexp parser context
1413 *
1414 */
1415static void
1416xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1417    int statenr, transnr;
1418    xmlRegStatePtr state;
1419
1420    /*
1421     * build the completed transitions bypassing the epsilons
1422     * Use a marking algorithm to avoid loops
1423     */
1424    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1425	state = ctxt->states[statenr];
1426	if (state == NULL)
1427	    continue;
1428	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1429	    if ((state->trans[transnr].atom == NULL) &&
1430		(state->trans[transnr].to >= 0)) {
1431		if (state->trans[transnr].to == statenr) {
1432		    state->trans[transnr].to = -1;
1433#ifdef DEBUG_REGEXP_GRAPH
1434		    printf("Removed loopback epsilon trans %d on %d\n",
1435			   transnr, statenr);
1436#endif
1437		} else if (state->trans[transnr].count < 0) {
1438		    int newto = state->trans[transnr].to;
1439
1440#ifdef DEBUG_REGEXP_GRAPH
1441		    printf("Found epsilon trans %d from %d to %d\n",
1442			   transnr, statenr, newto);
1443#endif
1444		    state->mark = XML_REGEXP_MARK_START;
1445		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1446				      newto, state->trans[transnr].counter);
1447		    state->mark = XML_REGEXP_MARK_NORMAL;
1448#ifdef DEBUG_REGEXP_GRAPH
1449		} else {
1450		    printf("Found counted transition %d on %d\n",
1451			   transnr, statenr);
1452#endif
1453	        }
1454	    }
1455	}
1456    }
1457    /*
1458     * Eliminate the epsilon transitions
1459     */
1460    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1461	state = ctxt->states[statenr];
1462	if (state == NULL)
1463	    continue;
1464	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1465	    if ((state->trans[transnr].atom == NULL) &&
1466		(state->trans[transnr].count < 0) &&
1467		(state->trans[transnr].to >= 0)) {
1468		state->trans[transnr].to = -1;
1469	    }
1470	}
1471    }
1472
1473    /*
1474     * Use this pass to detect unreachable states too
1475     */
1476    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1477	state = ctxt->states[statenr];
1478	if (state != NULL)
1479	    state->reached = 0;
1480    }
1481    state = ctxt->states[0];
1482    if (state != NULL)
1483	state->reached = 1;
1484    while (state != NULL) {
1485	xmlRegStatePtr target = NULL;
1486	state->reached = 2;
1487	/*
1488	 * Mark all state reachable from the current reachable state
1489	 */
1490	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1491	    if ((state->trans[transnr].to >= 0) &&
1492		((state->trans[transnr].atom != NULL) ||
1493		 (state->trans[transnr].count >= 0))) {
1494		int newto = state->trans[transnr].to;
1495
1496		if (ctxt->states[newto] == NULL)
1497		    continue;
1498		if (ctxt->states[newto]->reached == 0) {
1499		    ctxt->states[newto]->reached = 1;
1500		    target = ctxt->states[newto];
1501		}
1502	    }
1503	}
1504	/*
1505	 * find the next accessible state not explored
1506	 */
1507	if (target == NULL) {
1508	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1509		state = ctxt->states[statenr];
1510		if ((state != NULL) && (state->reached == 1)) {
1511		    target = state;
1512		    break;
1513		}
1514	    }
1515	}
1516	state = target;
1517    }
1518    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1519	state = ctxt->states[statenr];
1520	if ((state != NULL) && (state->reached == 0)) {
1521#ifdef DEBUG_REGEXP_GRAPH
1522	    printf("Removed unreachable state %d\n", statenr);
1523#endif
1524	    xmlRegFreeState(state);
1525	    ctxt->states[statenr] = NULL;
1526	}
1527    }
1528
1529}
1530
1531/**
1532 * xmlFACompareAtoms:
1533 * @atom1:  an atom
1534 * @atom2:  an atom
1535 *
1536 * Compares two atoms to check whether they are equivatents
1537 *
1538 * Returns 1 if yes and 0 otherwise
1539 */
1540static int
1541xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
1542    if (atom1 == atom2)
1543	return(1);
1544    if ((atom1 == NULL) || (atom2 == NULL))
1545	return(0);
1546
1547    if (atom1->type != atom2->type)
1548	return(0);
1549    switch (atom1->type) {
1550        case XML_REGEXP_STRING:
1551	    return(xmlStrEqual((xmlChar *)atom1->valuep,
1552			       (xmlChar *)atom2->valuep));
1553        case XML_REGEXP_EPSILON:
1554	    return(1);
1555        case XML_REGEXP_CHARVAL:
1556	    return(atom1->codepoint == atom2->codepoint);
1557        case XML_REGEXP_RANGES:
1558	    TODO;
1559	    return(0);
1560	default:
1561	    break;
1562    }
1563    return(1);
1564}
1565
1566/**
1567 * xmlFARecurseDeterminism:
1568 * @ctxt:  a regexp parser context
1569 *
1570 * Check whether the associated regexp is determinist,
1571 * should be called after xmlFAEliminateEpsilonTransitions()
1572 *
1573 */
1574static int
1575xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1576	                 int to, xmlRegAtomPtr atom) {
1577    int ret = 1;
1578    int transnr;
1579    xmlRegTransPtr t1;
1580
1581    if (state == NULL)
1582	return(ret);
1583    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1584	t1 = &(state->trans[transnr]);
1585	/*
1586	 * check transitions conflicting with the one looked at
1587	 */
1588	if (t1->atom == NULL) {
1589	    if (t1->to == -1)
1590		continue;
1591	    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1592		                           to, atom);
1593	    if (ret == 0)
1594		return(0);
1595	    continue;
1596	}
1597	if (t1->to != to)
1598	    continue;
1599	if (xmlFACompareAtoms(t1->atom, atom))
1600	    return(0);
1601    }
1602    return(ret);
1603}
1604
1605/**
1606 * xmlFAComputesDeterminism:
1607 * @ctxt:  a regexp parser context
1608 *
1609 * Check whether the associated regexp is determinist,
1610 * should be called after xmlFAEliminateEpsilonTransitions()
1611 *
1612 */
1613static int
1614xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
1615    int statenr, transnr;
1616    xmlRegStatePtr state;
1617    xmlRegTransPtr t1, t2;
1618    int i;
1619    int ret = 1;
1620
1621#ifdef DEBUG_REGEXP_GRAPH
1622    printf("xmlFAComputesDeterminism\n");
1623    xmlRegPrintCtxt(stdout, ctxt);
1624#endif
1625    if (ctxt->determinist != -1)
1626	return(ctxt->determinist);
1627
1628    /*
1629     * Check for all states that there isn't 2 transitions
1630     * with the same atom and a different target.
1631     */
1632    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1633	state = ctxt->states[statenr];
1634	if (state == NULL)
1635	    continue;
1636	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1637	    t1 = &(state->trans[transnr]);
1638	    /*
1639	     * Determinism checks in case of counted or all transitions
1640	     * will have to be handled separately
1641	     */
1642	    if (t1->atom == NULL)
1643		continue;
1644	    if (t1->to == -1) /* eliminated */
1645		continue;
1646	    for (i = 0;i < transnr;i++) {
1647		t2 = &(state->trans[i]);
1648		if (t2->to == -1) /* eliminated */
1649		    continue;
1650		if (t2->atom != NULL) {
1651		    if (t1->to == t2->to) {
1652			if (xmlFACompareAtoms(t1->atom, t2->atom))
1653			    t2->to = -1; /* eliminate */
1654		    } else {
1655			/* not determinist ! */
1656			if (xmlFACompareAtoms(t1->atom, t2->atom))
1657			    ret = 0;
1658		    }
1659		} else if (t1->to != -1) {
1660		    /*
1661		     * do the closure in case of remaining specific
1662		     * epsilon transitions like choices or all
1663		     */
1664		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1665						   t2->to, t2->atom);
1666		    if (ret == 0)
1667			return(0);
1668		}
1669	    }
1670	    if (ret == 0)
1671		break;
1672	}
1673	if (ret == 0)
1674	    break;
1675    }
1676    ctxt->determinist = ret;
1677    return(ret);
1678}
1679
1680/************************************************************************
1681 * 									*
1682 *	Routines to check input against transition atoms		*
1683 * 									*
1684 ************************************************************************/
1685
1686static int
1687xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
1688	                  int start, int end, const xmlChar *blockName) {
1689    int ret = 0;
1690
1691    switch (type) {
1692        case XML_REGEXP_STRING:
1693        case XML_REGEXP_SUBREG:
1694        case XML_REGEXP_RANGES:
1695        case XML_REGEXP_EPSILON:
1696	    return(-1);
1697        case XML_REGEXP_ANYCHAR:
1698	    ret = ((codepoint != '\n') && (codepoint != '\r'));
1699	    break;
1700        case XML_REGEXP_CHARVAL:
1701	    ret = ((codepoint >= start) && (codepoint <= end));
1702	    break;
1703        case XML_REGEXP_NOTSPACE:
1704	    neg = !neg;
1705        case XML_REGEXP_ANYSPACE:
1706	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
1707		   (codepoint == '\t') || (codepoint == ' '));
1708	    break;
1709        case XML_REGEXP_NOTINITNAME:
1710	    neg = !neg;
1711        case XML_REGEXP_INITNAME:
1712	    ret = (xmlIsLetter(codepoint) ||
1713		   (codepoint == '_') || (codepoint == ':'));
1714	    break;
1715        case XML_REGEXP_NOTNAMECHAR:
1716	    neg = !neg;
1717        case XML_REGEXP_NAMECHAR:
1718	    ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
1719		   (codepoint == '.') || (codepoint == '-') ||
1720		   (codepoint == '_') || (codepoint == ':') ||
1721		   xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
1722	    break;
1723        case XML_REGEXP_NOTDECIMAL:
1724	    neg = !neg;
1725        case XML_REGEXP_DECIMAL:
1726	    ret = xmlUCSIsCatNd(codepoint);
1727	    break;
1728        case XML_REGEXP_REALCHAR:
1729	    neg = !neg;
1730        case XML_REGEXP_NOTREALCHAR:
1731	    ret = xmlUCSIsCatP(codepoint);
1732	    if (ret == 0)
1733		ret = xmlUCSIsCatZ(codepoint);
1734	    if (ret == 0)
1735		ret = xmlUCSIsCatC(codepoint);
1736	    break;
1737        case XML_REGEXP_LETTER:
1738	    ret = xmlUCSIsCatL(codepoint);
1739	    break;
1740        case XML_REGEXP_LETTER_UPPERCASE:
1741	    ret = xmlUCSIsCatLu(codepoint);
1742	    break;
1743        case XML_REGEXP_LETTER_LOWERCASE:
1744	    ret = xmlUCSIsCatLl(codepoint);
1745	    break;
1746        case XML_REGEXP_LETTER_TITLECASE:
1747	    ret = xmlUCSIsCatLt(codepoint);
1748	    break;
1749        case XML_REGEXP_LETTER_MODIFIER:
1750	    ret = xmlUCSIsCatLm(codepoint);
1751	    break;
1752        case XML_REGEXP_LETTER_OTHERS:
1753	    ret = xmlUCSIsCatLo(codepoint);
1754	    break;
1755        case XML_REGEXP_MARK:
1756	    ret = xmlUCSIsCatM(codepoint);
1757	    break;
1758        case XML_REGEXP_MARK_NONSPACING:
1759	    ret = xmlUCSIsCatMn(codepoint);
1760	    break;
1761        case XML_REGEXP_MARK_SPACECOMBINING:
1762	    ret = xmlUCSIsCatMc(codepoint);
1763	    break;
1764        case XML_REGEXP_MARK_ENCLOSING:
1765	    ret = xmlUCSIsCatMe(codepoint);
1766	    break;
1767        case XML_REGEXP_NUMBER:
1768	    ret = xmlUCSIsCatN(codepoint);
1769	    break;
1770        case XML_REGEXP_NUMBER_DECIMAL:
1771	    ret = xmlUCSIsCatNd(codepoint);
1772	    break;
1773        case XML_REGEXP_NUMBER_LETTER:
1774	    ret = xmlUCSIsCatNl(codepoint);
1775	    break;
1776        case XML_REGEXP_NUMBER_OTHERS:
1777	    ret = xmlUCSIsCatNo(codepoint);
1778	    break;
1779        case XML_REGEXP_PUNCT:
1780	    ret = xmlUCSIsCatP(codepoint);
1781	    break;
1782        case XML_REGEXP_PUNCT_CONNECTOR:
1783	    ret = xmlUCSIsCatPc(codepoint);
1784	    break;
1785        case XML_REGEXP_PUNCT_DASH:
1786	    ret = xmlUCSIsCatPd(codepoint);
1787	    break;
1788        case XML_REGEXP_PUNCT_OPEN:
1789	    ret = xmlUCSIsCatPs(codepoint);
1790	    break;
1791        case XML_REGEXP_PUNCT_CLOSE:
1792	    ret = xmlUCSIsCatPe(codepoint);
1793	    break;
1794        case XML_REGEXP_PUNCT_INITQUOTE:
1795	    ret = xmlUCSIsCatPi(codepoint);
1796	    break;
1797        case XML_REGEXP_PUNCT_FINQUOTE:
1798	    ret = xmlUCSIsCatPf(codepoint);
1799	    break;
1800        case XML_REGEXP_PUNCT_OTHERS:
1801	    ret = xmlUCSIsCatPo(codepoint);
1802	    break;
1803        case XML_REGEXP_SEPAR:
1804	    ret = xmlUCSIsCatZ(codepoint);
1805	    break;
1806        case XML_REGEXP_SEPAR_SPACE:
1807	    ret = xmlUCSIsCatZs(codepoint);
1808	    break;
1809        case XML_REGEXP_SEPAR_LINE:
1810	    ret = xmlUCSIsCatZl(codepoint);
1811	    break;
1812        case XML_REGEXP_SEPAR_PARA:
1813	    ret = xmlUCSIsCatZp(codepoint);
1814	    break;
1815        case XML_REGEXP_SYMBOL:
1816	    ret = xmlUCSIsCatS(codepoint);
1817	    break;
1818        case XML_REGEXP_SYMBOL_MATH:
1819	    ret = xmlUCSIsCatSm(codepoint);
1820	    break;
1821        case XML_REGEXP_SYMBOL_CURRENCY:
1822	    ret = xmlUCSIsCatSc(codepoint);
1823	    break;
1824        case XML_REGEXP_SYMBOL_MODIFIER:
1825	    ret = xmlUCSIsCatSk(codepoint);
1826	    break;
1827        case XML_REGEXP_SYMBOL_OTHERS:
1828	    ret = xmlUCSIsCatSo(codepoint);
1829	    break;
1830        case XML_REGEXP_OTHER:
1831	    ret = xmlUCSIsCatC(codepoint);
1832	    break;
1833        case XML_REGEXP_OTHER_CONTROL:
1834	    ret = xmlUCSIsCatCc(codepoint);
1835	    break;
1836        case XML_REGEXP_OTHER_FORMAT:
1837	    ret = xmlUCSIsCatCf(codepoint);
1838	    break;
1839        case XML_REGEXP_OTHER_PRIVATE:
1840	    ret = xmlUCSIsCatCo(codepoint);
1841	    break;
1842        case XML_REGEXP_OTHER_NA:
1843	    /* ret = xmlUCSIsCatCn(codepoint); */
1844	    /* Seems it doesn't exist anymore in recent Unicode releases */
1845	    ret = 0;
1846	    break;
1847        case XML_REGEXP_BLOCK_NAME:
1848	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
1849	    break;
1850    }
1851    if (neg)
1852	return(!ret);
1853    return(ret);
1854}
1855
1856static int
1857xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
1858    int i, ret = 0;
1859    xmlRegRangePtr range;
1860
1861    if ((atom == NULL) || (!xmlIsChar(codepoint)))
1862	return(-1);
1863
1864    switch (atom->type) {
1865        case XML_REGEXP_SUBREG:
1866        case XML_REGEXP_EPSILON:
1867	    return(-1);
1868        case XML_REGEXP_CHARVAL:
1869            return(codepoint == atom->codepoint);
1870        case XML_REGEXP_RANGES: {
1871	    int accept = 0;
1872	    for (i = 0;i < atom->nbRanges;i++) {
1873		range = atom->ranges[i];
1874		if (range->neg) {
1875		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1876						0, range->start, range->end,
1877						range->blockName);
1878		    if (ret != 0)
1879			return(0); /* excluded char */
1880		} else {
1881		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1882						0, range->start, range->end,
1883						range->blockName);
1884		    if (ret != 0)
1885			accept = 1; /* might still be excluded */
1886		}
1887	    }
1888	    return(accept);
1889	}
1890        case XML_REGEXP_STRING:
1891	    printf("TODO: XML_REGEXP_STRING\n");
1892	    return(-1);
1893        case XML_REGEXP_ANYCHAR:
1894        case XML_REGEXP_ANYSPACE:
1895        case XML_REGEXP_NOTSPACE:
1896        case XML_REGEXP_INITNAME:
1897        case XML_REGEXP_NOTINITNAME:
1898        case XML_REGEXP_NAMECHAR:
1899        case XML_REGEXP_NOTNAMECHAR:
1900        case XML_REGEXP_DECIMAL:
1901        case XML_REGEXP_NOTDECIMAL:
1902        case XML_REGEXP_REALCHAR:
1903        case XML_REGEXP_NOTREALCHAR:
1904        case XML_REGEXP_LETTER:
1905        case XML_REGEXP_LETTER_UPPERCASE:
1906        case XML_REGEXP_LETTER_LOWERCASE:
1907        case XML_REGEXP_LETTER_TITLECASE:
1908        case XML_REGEXP_LETTER_MODIFIER:
1909        case XML_REGEXP_LETTER_OTHERS:
1910        case XML_REGEXP_MARK:
1911        case XML_REGEXP_MARK_NONSPACING:
1912        case XML_REGEXP_MARK_SPACECOMBINING:
1913        case XML_REGEXP_MARK_ENCLOSING:
1914        case XML_REGEXP_NUMBER:
1915        case XML_REGEXP_NUMBER_DECIMAL:
1916        case XML_REGEXP_NUMBER_LETTER:
1917        case XML_REGEXP_NUMBER_OTHERS:
1918        case XML_REGEXP_PUNCT:
1919        case XML_REGEXP_PUNCT_CONNECTOR:
1920        case XML_REGEXP_PUNCT_DASH:
1921        case XML_REGEXP_PUNCT_OPEN:
1922        case XML_REGEXP_PUNCT_CLOSE:
1923        case XML_REGEXP_PUNCT_INITQUOTE:
1924        case XML_REGEXP_PUNCT_FINQUOTE:
1925        case XML_REGEXP_PUNCT_OTHERS:
1926        case XML_REGEXP_SEPAR:
1927        case XML_REGEXP_SEPAR_SPACE:
1928        case XML_REGEXP_SEPAR_LINE:
1929        case XML_REGEXP_SEPAR_PARA:
1930        case XML_REGEXP_SYMBOL:
1931        case XML_REGEXP_SYMBOL_MATH:
1932        case XML_REGEXP_SYMBOL_CURRENCY:
1933        case XML_REGEXP_SYMBOL_MODIFIER:
1934        case XML_REGEXP_SYMBOL_OTHERS:
1935        case XML_REGEXP_OTHER:
1936        case XML_REGEXP_OTHER_CONTROL:
1937        case XML_REGEXP_OTHER_FORMAT:
1938        case XML_REGEXP_OTHER_PRIVATE:
1939        case XML_REGEXP_OTHER_NA:
1940	case XML_REGEXP_BLOCK_NAME:
1941	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
1942		                            (const xmlChar *)atom->valuep);
1943	    if (atom->neg)
1944		ret = !ret;
1945	    break;
1946    }
1947    return(ret);
1948}
1949
1950/************************************************************************
1951 * 									*
1952 *	Saving an restoring state of an execution context		*
1953 * 									*
1954 ************************************************************************/
1955
1956#ifdef DEBUG_REGEXP_EXEC
1957static void
1958xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
1959    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
1960    if (exec->inputStack != NULL) {
1961	int i;
1962	printf(": ");
1963	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
1964	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
1965    } else {
1966	printf(": %s", &(exec->inputString[exec->index]));
1967    }
1968    printf("\n");
1969}
1970#endif
1971
1972static void
1973xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
1974#ifdef DEBUG_REGEXP_EXEC
1975    printf("saving ");
1976    exec->transno++;
1977    xmlFARegDebugExec(exec);
1978    exec->transno--;
1979#endif
1980
1981    if (exec->maxRollbacks == 0) {
1982	exec->maxRollbacks = 4;
1983	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
1984		                             sizeof(xmlRegExecRollback));
1985	if (exec->rollbacks == NULL) {
1986	    fprintf(stderr, "exec save: allocation failed");
1987	    exec->maxRollbacks = 0;
1988	    return;
1989	}
1990	memset(exec->rollbacks, 0,
1991	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
1992    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
1993	xmlRegExecRollback *tmp;
1994	int len = exec->maxRollbacks;
1995
1996	exec->maxRollbacks *= 2;
1997	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
1998			exec->maxRollbacks * sizeof(xmlRegExecRollback));
1999	if (tmp == NULL) {
2000	    fprintf(stderr, "exec save: allocation failed");
2001	    exec->maxRollbacks /= 2;
2002	    return;
2003	}
2004	exec->rollbacks = tmp;
2005	tmp = &exec->rollbacks[len];
2006	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
2007    }
2008    exec->rollbacks[exec->nbRollbacks].state = exec->state;
2009    exec->rollbacks[exec->nbRollbacks].index = exec->index;
2010    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
2011    if (exec->comp->nbCounters > 0) {
2012	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2013	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
2014		xmlMalloc(exec->comp->nbCounters * sizeof(int));
2015	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2016		fprintf(stderr, "exec save: allocation failed");
2017		exec->status = -5;
2018		return;
2019	    }
2020	}
2021	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
2022	       exec->comp->nbCounters * sizeof(int));
2023    }
2024    exec->nbRollbacks++;
2025}
2026
2027static void
2028xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
2029    if (exec->nbRollbacks <= 0) {
2030	exec->status = -1;
2031#ifdef DEBUG_REGEXP_EXEC
2032	printf("rollback failed on empty stack\n");
2033#endif
2034	return;
2035    }
2036    exec->nbRollbacks--;
2037    exec->state = exec->rollbacks[exec->nbRollbacks].state;
2038    exec->index = exec->rollbacks[exec->nbRollbacks].index;
2039    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
2040    if (exec->comp->nbCounters > 0) {
2041	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2042	    fprintf(stderr, "exec save: allocation failed");
2043	    exec->status = -6;
2044	    return;
2045	}
2046	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
2047	       exec->comp->nbCounters * sizeof(int));
2048    }
2049
2050#ifdef DEBUG_REGEXP_EXEC
2051    printf("restored ");
2052    xmlFARegDebugExec(exec);
2053#endif
2054}
2055
2056/************************************************************************
2057 * 									*
2058 *	Verifyer, running an input against a compiled regexp		*
2059 * 									*
2060 ************************************************************************/
2061
2062static int
2063xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
2064    xmlRegExecCtxt execval;
2065    xmlRegExecCtxtPtr exec = &execval;
2066    int ret, codepoint, len;
2067
2068    exec->inputString = content;
2069    exec->index = 0;
2070    exec->determinist = 1;
2071    exec->maxRollbacks = 0;
2072    exec->nbRollbacks = 0;
2073    exec->rollbacks = NULL;
2074    exec->status = 0;
2075    exec->comp = comp;
2076    exec->state = comp->states[0];
2077    exec->transno = 0;
2078    exec->transcount = 0;
2079    if (comp->nbCounters > 0) {
2080	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2081	if (exec->counts == NULL)
2082	    return(-1);
2083        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2084    } else
2085	exec->counts = NULL;
2086    while ((exec->status == 0) &&
2087	   ((exec->inputString[exec->index] != 0) ||
2088	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2089	xmlRegTransPtr trans;
2090	xmlRegAtomPtr atom;
2091
2092	/*
2093	 * End of input on non-terminal state, rollback, however we may
2094	 * still have epsilon like transition for counted transitions
2095	 * on counters, in that case don't break too early.
2096	 */
2097	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
2098	    goto rollback;
2099
2100	exec->transcount = 0;
2101	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2102	    trans = &exec->state->trans[exec->transno];
2103	    if (trans->to < 0)
2104		continue;
2105	    atom = trans->atom;
2106	    ret = 0;
2107	    if (trans->count >= 0) {
2108		int count;
2109		xmlRegCounterPtr counter;
2110
2111		/*
2112		 * A counted transition.
2113		 */
2114
2115		count = exec->counts[trans->count];
2116		counter = &exec->comp->counters[trans->count];
2117#ifdef DEBUG_REGEXP_EXEC
2118		printf("testing count %d: val %d, min %d, max %d\n",
2119		       trans->count, count, counter->min,  counter->max);
2120#endif
2121		ret = ((count >= counter->min) && (count <= counter->max));
2122	    } else if (atom == NULL) {
2123		fprintf(stderr, "epsilon transition left at runtime\n");
2124		exec->status = -2;
2125		break;
2126	    } else if (exec->inputString[exec->index] != 0) {
2127                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2128		ret = xmlRegCheckCharacter(atom, codepoint);
2129		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2130		    xmlRegStatePtr to = comp->states[trans->to];
2131
2132		    /*
2133		     * this is a multiple input sequence
2134		     */
2135		    if (exec->state->nbTrans > exec->transno + 1) {
2136			xmlFARegExecSave(exec);
2137		    }
2138		    exec->transcount = 1;
2139		    do {
2140			/*
2141			 * Try to progress as much as possible on the input
2142			 */
2143			if (exec->transcount == atom->max) {
2144			    break;
2145			}
2146			exec->index += len;
2147			/*
2148			 * End of input: stop here
2149			 */
2150			if (exec->inputString[exec->index] == 0) {
2151			    exec->index -= len;
2152			    break;
2153			}
2154			if (exec->transcount >= atom->min) {
2155			    int transno = exec->transno;
2156			    xmlRegStatePtr state = exec->state;
2157
2158			    /*
2159			     * The transition is acceptable save it
2160			     */
2161			    exec->transno = -1; /* trick */
2162			    exec->state = to;
2163			    xmlFARegExecSave(exec);
2164			    exec->transno = transno;
2165			    exec->state = state;
2166			}
2167			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2168				              len);
2169			ret = xmlRegCheckCharacter(atom, codepoint);
2170			exec->transcount++;
2171		    } while (ret == 1);
2172		    if (exec->transcount < atom->min)
2173			ret = 0;
2174
2175		    /*
2176		     * If the last check failed but one transition was found
2177		     * possible, rollback
2178		     */
2179		    if (ret < 0)
2180			ret = 0;
2181		    if (ret == 0) {
2182			goto rollback;
2183		    }
2184		}
2185	    }
2186	    if (ret == 1) {
2187		if (exec->state->nbTrans > exec->transno + 1) {
2188		    xmlFARegExecSave(exec);
2189		}
2190		if (trans->counter >= 0) {
2191#ifdef DEBUG_REGEXP_EXEC
2192		    printf("Increasing count %d\n", trans->counter);
2193#endif
2194		    exec->counts[trans->counter]++;
2195		}
2196#ifdef DEBUG_REGEXP_EXEC
2197		printf("entering state %d\n", trans->to);
2198#endif
2199		exec->state = comp->states[trans->to];
2200		exec->transno = 0;
2201		if (trans->atom != NULL) {
2202		    exec->index += len;
2203		}
2204		goto progress;
2205	    } else if (ret < 0) {
2206		exec->status = -4;
2207		break;
2208	    }
2209	}
2210	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2211rollback:
2212	    /*
2213	     * Failed to find a way out
2214	     */
2215	    exec->determinist = 0;
2216	    xmlFARegExecRollBack(exec);
2217	}
2218progress:
2219	continue;
2220    }
2221    if (exec->rollbacks != NULL) {
2222	if (exec->counts != NULL) {
2223	    int i;
2224
2225	    for (i = 0;i < exec->maxRollbacks;i++)
2226		if (exec->rollbacks[i].counts != NULL)
2227		    xmlFree(exec->rollbacks[i].counts);
2228	}
2229	xmlFree(exec->rollbacks);
2230    }
2231    if (exec->counts != NULL)
2232	xmlFree(exec->counts);
2233    if (exec->status == 0)
2234	return(1);
2235    if (exec->status == -1)
2236	return(0);
2237    return(exec->status);
2238}
2239
2240/************************************************************************
2241 * 									*
2242 *	Progressive interface to the verifyer one atom at a time	*
2243 * 									*
2244 ************************************************************************/
2245
2246/**
2247 * xmlRegExecCtxtPtr:
2248 * @comp: a precompiled regular expression
2249 * @callback: a callback function used for handling progresses in the
2250 *            automata matching phase
2251 * @data: the context data associated to the callback in this context
2252 *
2253 * Build a context used for progressive evaluation of a regexp.
2254 */
2255xmlRegExecCtxtPtr
2256xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
2257    xmlRegExecCtxtPtr exec;
2258
2259    if (comp == NULL)
2260	return(NULL);
2261    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
2262    if (exec == NULL) {
2263	return(NULL);
2264    }
2265    memset(exec, 0, sizeof(xmlRegExecCtxt));
2266    exec->inputString = NULL;
2267    exec->index = 0;
2268    exec->determinist = 1;
2269    exec->maxRollbacks = 0;
2270    exec->nbRollbacks = 0;
2271    exec->rollbacks = NULL;
2272    exec->status = 0;
2273    exec->comp = comp;
2274    if (comp->compact == NULL)
2275	exec->state = comp->states[0];
2276    exec->transno = 0;
2277    exec->transcount = 0;
2278    exec->callback = callback;
2279    exec->data = data;
2280    if (comp->nbCounters > 0) {
2281	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2282	if (exec->counts == NULL) {
2283	    xmlFree(exec);
2284	    return(NULL);
2285	}
2286        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2287    } else
2288	exec->counts = NULL;
2289    exec->inputStackMax = 0;
2290    exec->inputStackNr = 0;
2291    exec->inputStack = NULL;
2292    return(exec);
2293}
2294
2295/**
2296 * xmlRegFreeExecCtxt:
2297 * @exec: a regular expression evaulation context
2298 *
2299 * Free the structures associated to a regular expression evaulation context.
2300 */
2301void
2302xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
2303    if (exec == NULL)
2304	return;
2305
2306    if (exec->rollbacks != NULL) {
2307	if (exec->counts != NULL) {
2308	    int i;
2309
2310	    for (i = 0;i < exec->maxRollbacks;i++)
2311		if (exec->rollbacks[i].counts != NULL)
2312		    xmlFree(exec->rollbacks[i].counts);
2313	}
2314	xmlFree(exec->rollbacks);
2315    }
2316    if (exec->counts != NULL)
2317	xmlFree(exec->counts);
2318    if (exec->inputStack != NULL) {
2319	int i;
2320
2321	for (i = 0;i < exec->inputStackNr;i++)
2322	    xmlFree(exec->inputStack[i].value);
2323	xmlFree(exec->inputStack);
2324    }
2325    xmlFree(exec);
2326}
2327
2328static void
2329xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2330	                    void *data) {
2331#ifdef DEBUG_PUSH
2332    printf("saving value: %d:%s\n", exec->inputStackNr, value);
2333#endif
2334    if (exec->inputStackMax == 0) {
2335	exec->inputStackMax = 4;
2336	exec->inputStack = (xmlRegInputTokenPtr)
2337	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
2338	if (exec->inputStack == NULL) {
2339	    fprintf(stderr, "push input: allocation failed");
2340	    exec->inputStackMax = 0;
2341	    return;
2342	}
2343    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
2344	xmlRegInputTokenPtr tmp;
2345
2346	exec->inputStackMax *= 2;
2347	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
2348			exec->inputStackMax * sizeof(xmlRegInputToken));
2349	if (tmp == NULL) {
2350	    fprintf(stderr, "push input: allocation failed");
2351	    exec->inputStackMax /= 2;
2352	    return;
2353	}
2354	exec->inputStack = tmp;
2355    }
2356    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
2357    exec->inputStack[exec->inputStackNr].data = data;
2358    exec->inputStackNr++;
2359    exec->inputStack[exec->inputStackNr].value = NULL;
2360    exec->inputStack[exec->inputStackNr].data = NULL;
2361}
2362
2363
2364/**
2365 * xmlRegCompactPushString:
2366 * @exec: a regexp execution context
2367 * @comp:  the precompiled exec with a compact table
2368 * @value: a string token input
2369 * @data: data associated to the token to reuse in callbacks
2370 *
2371 * Push one input token in the execution context
2372 *
2373 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2374 *     a negative value in case of error.
2375 */
2376static int
2377xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
2378	                xmlRegexpPtr comp,
2379	                const xmlChar *value,
2380	                void *data) {
2381    int state = exec->index;
2382    int i, target;
2383
2384    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
2385	return(-1);
2386
2387    if (value == NULL) {
2388	/*
2389	 * are we at a final state ?
2390	 */
2391	if (comp->compact[state * (comp->nbstrings + 1)] ==
2392            XML_REGEXP_FINAL_STATE)
2393	    return(1);
2394	return(0);
2395    }
2396
2397#ifdef DEBUG_PUSH
2398    printf("value pushed: %s\n", value);
2399#endif
2400
2401    /*
2402     * Examine all outside transition from current state
2403     */
2404    for (i = 0;i < comp->nbstrings;i++) {
2405	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
2406	if ((target > 0) && (target <= comp->nbstates)) {
2407	    target--; /* to avoid 0 */
2408	    if (xmlStrEqual(comp->stringMap[i], value)) {
2409		exec->index = target;
2410#ifdef DEBUG_PUSH
2411		printf("entering state %d\n", target);
2412#endif
2413		if (comp->compact[target * (comp->nbstrings + 1)] ==
2414		    XML_REGEXP_FINAL_STATE)
2415		    return(1);
2416		return(0);
2417	    }
2418	}
2419    }
2420    /*
2421     * Failed to find an exit transition out from current state for the
2422     * current token
2423     */
2424#ifdef DEBUG_PUSH
2425    printf("failed to find a transition for %s on state %d\n", value, state);
2426#endif
2427    exec->status = -1;
2428    return(-1);
2429}
2430
2431/**
2432 * xmlRegExecPushString:
2433 * @exec: a regexp execution context
2434 * @value: a string token input
2435 * @data: data associated to the token to reuse in callbacks
2436 *
2437 * Push one input token in the execution context
2438 *
2439 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2440 *     a negative value in case of error.
2441 */
2442int
2443xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2444	             void *data) {
2445    xmlRegTransPtr trans;
2446    xmlRegAtomPtr atom;
2447    int ret;
2448    int final = 0;
2449
2450    if (exec == NULL)
2451	return(-1);
2452    if (exec->comp == NULL)
2453	return(-1);
2454    if (exec->status != 0)
2455	return(exec->status);
2456
2457    if (exec->comp->compact != NULL)
2458	return(xmlRegCompactPushString(exec, exec->comp, value, data));
2459
2460    if (value == NULL) {
2461        if (exec->state->type == XML_REGEXP_FINAL_STATE)
2462	    return(1);
2463	final = 1;
2464    }
2465
2466#ifdef DEBUG_PUSH
2467    printf("value pushed: %s\n", value);
2468#endif
2469    /*
2470     * If we have an active rollback stack push the new value there
2471     * and get back to where we were left
2472     */
2473    if ((value != NULL) && (exec->inputStackNr > 0)) {
2474	xmlFARegExecSaveInputString(exec, value, data);
2475	value = exec->inputStack[exec->index].value;
2476	data = exec->inputStack[exec->index].data;
2477#ifdef DEBUG_PUSH
2478	printf("value loaded: %s\n", value);
2479#endif
2480    }
2481
2482    while ((exec->status == 0) &&
2483	   ((value != NULL) ||
2484	    ((final == 1) &&
2485	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
2486
2487	/*
2488	 * End of input on non-terminal state, rollback, however we may
2489	 * still have epsilon like transition for counted transitions
2490	 * on counters, in that case don't break too early.
2491	 */
2492	if ((value == NULL) && (exec->counts == NULL))
2493	    goto rollback;
2494
2495	exec->transcount = 0;
2496	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2497	    trans = &exec->state->trans[exec->transno];
2498	    if (trans->to < 0)
2499		continue;
2500	    atom = trans->atom;
2501	    ret = 0;
2502	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
2503		int i;
2504		int count;
2505		xmlRegTransPtr t;
2506		xmlRegCounterPtr counter;
2507
2508		ret = 0;
2509
2510#ifdef DEBUG_PUSH
2511		printf("testing all lax %d\n", trans->count);
2512#endif
2513		/*
2514		 * Check all counted transitions from the current state
2515		 */
2516		if ((value == NULL) && (final)) {
2517		    ret = 1;
2518		} else if (value != NULL) {
2519		    for (i = 0;i < exec->state->nbTrans;i++) {
2520			t = &exec->state->trans[i];
2521			if ((t->counter < 0) || (t == trans))
2522			    continue;
2523			counter = &exec->comp->counters[t->counter];
2524			count = exec->counts[t->counter];
2525			if ((count < counter->max) &&
2526		            (t->atom != NULL) &&
2527			    (xmlStrEqual(value, t->atom->valuep))) {
2528			    ret = 0;
2529			    break;
2530			}
2531			if ((count >= counter->min) &&
2532			    (count < counter->max) &&
2533			    (xmlStrEqual(value, t->atom->valuep))) {
2534			    ret = 1;
2535			    break;
2536			}
2537		    }
2538		}
2539	    } else if (trans->count == REGEXP_ALL_COUNTER) {
2540		int i;
2541		int count;
2542		xmlRegTransPtr t;
2543		xmlRegCounterPtr counter;
2544
2545		ret = 1;
2546
2547#ifdef DEBUG_PUSH
2548		printf("testing all %d\n", trans->count);
2549#endif
2550		/*
2551		 * Check all counted transitions from the current state
2552		 */
2553		for (i = 0;i < exec->state->nbTrans;i++) {
2554                    t = &exec->state->trans[i];
2555		    if ((t->counter < 0) || (t == trans))
2556			continue;
2557                    counter = &exec->comp->counters[t->counter];
2558		    count = exec->counts[t->counter];
2559		    if ((count < counter->min) || (count > counter->max)) {
2560			ret = 0;
2561			break;
2562		    }
2563		}
2564	    } else if (trans->count >= 0) {
2565		int count;
2566		xmlRegCounterPtr counter;
2567
2568		/*
2569		 * A counted transition.
2570		 */
2571
2572		count = exec->counts[trans->count];
2573		counter = &exec->comp->counters[trans->count];
2574#ifdef DEBUG_PUSH
2575		printf("testing count %d: val %d, min %d, max %d\n",
2576		       trans->count, count, counter->min,  counter->max);
2577#endif
2578		ret = ((count >= counter->min) && (count <= counter->max));
2579	    } else if (atom == NULL) {
2580		fprintf(stderr, "epsilon transition left at runtime\n");
2581		exec->status = -2;
2582		break;
2583	    } else if (value != NULL) {
2584		ret = xmlStrEqual(value, atom->valuep);
2585		if ((ret == 1) && (trans->counter >= 0)) {
2586		    xmlRegCounterPtr counter;
2587		    int count;
2588
2589		    count = exec->counts[trans->counter];
2590		    counter = &exec->comp->counters[trans->counter];
2591		    if (count >= counter->max)
2592			ret = 0;
2593		}
2594
2595		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2596		    xmlRegStatePtr to = exec->comp->states[trans->to];
2597
2598		    /*
2599		     * this is a multiple input sequence
2600		     */
2601		    if (exec->state->nbTrans > exec->transno + 1) {
2602			if (exec->inputStackNr <= 0) {
2603			    xmlFARegExecSaveInputString(exec, value, data);
2604			}
2605			xmlFARegExecSave(exec);
2606		    }
2607		    exec->transcount = 1;
2608		    do {
2609			/*
2610			 * Try to progress as much as possible on the input
2611			 */
2612			if (exec->transcount == atom->max) {
2613			    break;
2614			}
2615			exec->index++;
2616			value = exec->inputStack[exec->index].value;
2617			data = exec->inputStack[exec->index].data;
2618#ifdef DEBUG_PUSH
2619			printf("value loaded: %s\n", value);
2620#endif
2621
2622			/*
2623			 * End of input: stop here
2624			 */
2625			if (value == NULL) {
2626			    exec->index --;
2627			    break;
2628			}
2629			if (exec->transcount >= atom->min) {
2630			    int transno = exec->transno;
2631			    xmlRegStatePtr state = exec->state;
2632
2633			    /*
2634			     * The transition is acceptable save it
2635			     */
2636			    exec->transno = -1; /* trick */
2637			    exec->state = to;
2638			    if (exec->inputStackNr <= 0) {
2639				xmlFARegExecSaveInputString(exec, value, data);
2640			    }
2641			    xmlFARegExecSave(exec);
2642			    exec->transno = transno;
2643			    exec->state = state;
2644			}
2645			ret = xmlStrEqual(value, atom->valuep);
2646			exec->transcount++;
2647		    } while (ret == 1);
2648		    if (exec->transcount < atom->min)
2649			ret = 0;
2650
2651		    /*
2652		     * If the last check failed but one transition was found
2653		     * possible, rollback
2654		     */
2655		    if (ret < 0)
2656			ret = 0;
2657		    if (ret == 0) {
2658			goto rollback;
2659		    }
2660		}
2661	    }
2662	    if (ret == 1) {
2663		if ((exec->callback != NULL) && (atom != NULL)) {
2664		    exec->callback(exec->data, atom->valuep,
2665			           atom->data, data);
2666		}
2667		if (exec->state->nbTrans > exec->transno + 1) {
2668		    if (exec->inputStackNr <= 0) {
2669			xmlFARegExecSaveInputString(exec, value, data);
2670		    }
2671		    xmlFARegExecSave(exec);
2672		}
2673		if (trans->counter >= 0) {
2674#ifdef DEBUG_PUSH
2675		    printf("Increasing count %d\n", trans->counter);
2676#endif
2677		    exec->counts[trans->counter]++;
2678		}
2679#ifdef DEBUG_PUSH
2680		printf("entering state %d\n", trans->to);
2681#endif
2682		exec->state = exec->comp->states[trans->to];
2683		exec->transno = 0;
2684		if (trans->atom != NULL) {
2685		    if (exec->inputStack != NULL) {
2686			exec->index++;
2687			if (exec->index < exec->inputStackNr) {
2688			    value = exec->inputStack[exec->index].value;
2689			    data = exec->inputStack[exec->index].data;
2690#ifdef DEBUG_PUSH
2691			    printf("value loaded: %s\n", value);
2692#endif
2693			} else {
2694			    value = NULL;
2695			    data = NULL;
2696#ifdef DEBUG_PUSH
2697			    printf("end of input\n");
2698#endif
2699			}
2700		    } else {
2701			value = NULL;
2702			data = NULL;
2703#ifdef DEBUG_PUSH
2704			printf("end of input\n");
2705#endif
2706		    }
2707		}
2708		goto progress;
2709	    } else if (ret < 0) {
2710		exec->status = -4;
2711		break;
2712	    }
2713	}
2714	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2715rollback:
2716	    /*
2717	     * Failed to find a way out
2718	     */
2719	    exec->determinist = 0;
2720	    xmlFARegExecRollBack(exec);
2721	    if (exec->status == 0) {
2722		value = exec->inputStack[exec->index].value;
2723		data = exec->inputStack[exec->index].data;
2724#ifdef DEBUG_PUSH
2725		printf("value loaded: %s\n", value);
2726#endif
2727	    }
2728	}
2729progress:
2730	continue;
2731    }
2732    if (exec->status == 0) {
2733        return(exec->state->type == XML_REGEXP_FINAL_STATE);
2734    }
2735    return(exec->status);
2736}
2737
2738#if 0
2739static int
2740xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
2741    xmlRegTransPtr trans;
2742    xmlRegAtomPtr atom;
2743    int ret;
2744    int codepoint, len;
2745
2746    if (exec == NULL)
2747	return(-1);
2748    if (exec->status != 0)
2749	return(exec->status);
2750
2751    while ((exec->status == 0) &&
2752	   ((exec->inputString[exec->index] != 0) ||
2753	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2754
2755	/*
2756	 * End of input on non-terminal state, rollback, however we may
2757	 * still have epsilon like transition for counted transitions
2758	 * on counters, in that case don't break too early.
2759	 */
2760	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
2761	    goto rollback;
2762
2763	exec->transcount = 0;
2764	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2765	    trans = &exec->state->trans[exec->transno];
2766	    if (trans->to < 0)
2767		continue;
2768	    atom = trans->atom;
2769	    ret = 0;
2770	    if (trans->count >= 0) {
2771		int count;
2772		xmlRegCounterPtr counter;
2773
2774		/*
2775		 * A counted transition.
2776		 */
2777
2778		count = exec->counts[trans->count];
2779		counter = &exec->comp->counters[trans->count];
2780#ifdef DEBUG_REGEXP_EXEC
2781		printf("testing count %d: val %d, min %d, max %d\n",
2782		       trans->count, count, counter->min,  counter->max);
2783#endif
2784		ret = ((count >= counter->min) && (count <= counter->max));
2785	    } else if (atom == NULL) {
2786		fprintf(stderr, "epsilon transition left at runtime\n");
2787		exec->status = -2;
2788		break;
2789	    } else if (exec->inputString[exec->index] != 0) {
2790                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2791		ret = xmlRegCheckCharacter(atom, codepoint);
2792		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2793		    xmlRegStatePtr to = exec->comp->states[trans->to];
2794
2795		    /*
2796		     * this is a multiple input sequence
2797		     */
2798		    if (exec->state->nbTrans > exec->transno + 1) {
2799			xmlFARegExecSave(exec);
2800		    }
2801		    exec->transcount = 1;
2802		    do {
2803			/*
2804			 * Try to progress as much as possible on the input
2805			 */
2806			if (exec->transcount == atom->max) {
2807			    break;
2808			}
2809			exec->index += len;
2810			/*
2811			 * End of input: stop here
2812			 */
2813			if (exec->inputString[exec->index] == 0) {
2814			    exec->index -= len;
2815			    break;
2816			}
2817			if (exec->transcount >= atom->min) {
2818			    int transno = exec->transno;
2819			    xmlRegStatePtr state = exec->state;
2820
2821			    /*
2822			     * The transition is acceptable save it
2823			     */
2824			    exec->transno = -1; /* trick */
2825			    exec->state = to;
2826			    xmlFARegExecSave(exec);
2827			    exec->transno = transno;
2828			    exec->state = state;
2829			}
2830			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2831				              len);
2832			ret = xmlRegCheckCharacter(atom, codepoint);
2833			exec->transcount++;
2834		    } while (ret == 1);
2835		    if (exec->transcount < atom->min)
2836			ret = 0;
2837
2838		    /*
2839		     * If the last check failed but one transition was found
2840		     * possible, rollback
2841		     */
2842		    if (ret < 0)
2843			ret = 0;
2844		    if (ret == 0) {
2845			goto rollback;
2846		    }
2847		}
2848	    }
2849	    if (ret == 1) {
2850		if (exec->state->nbTrans > exec->transno + 1) {
2851		    xmlFARegExecSave(exec);
2852		}
2853		if (trans->counter >= 0) {
2854#ifdef DEBUG_REGEXP_EXEC
2855		    printf("Increasing count %d\n", trans->counter);
2856#endif
2857		    exec->counts[trans->counter]++;
2858		}
2859#ifdef DEBUG_REGEXP_EXEC
2860		printf("entering state %d\n", trans->to);
2861#endif
2862		exec->state = exec->comp->states[trans->to];
2863		exec->transno = 0;
2864		if (trans->atom != NULL) {
2865		    exec->index += len;
2866		}
2867		goto progress;
2868	    } else if (ret < 0) {
2869		exec->status = -4;
2870		break;
2871	    }
2872	}
2873	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2874rollback:
2875	    /*
2876	     * Failed to find a way out
2877	     */
2878	    exec->determinist = 0;
2879	    xmlFARegExecRollBack(exec);
2880	}
2881progress:
2882	continue;
2883    }
2884}
2885#endif
2886/************************************************************************
2887 * 									*
2888 *	Parser for the Shemas Datatype Regular Expressions		*
2889 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
2890 * 									*
2891 ************************************************************************/
2892
2893/**
2894 * xmlFAIsChar:
2895 * @ctxt:  a regexp parser context
2896 *
2897 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
2898 */
2899static int
2900xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
2901    int cur;
2902    int len;
2903
2904    cur = CUR_SCHAR(ctxt->cur, len);
2905    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
2906	(cur == '*') || (cur == '+') || (cur == '(') ||
2907	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
2908	(cur == 0x5D) || (cur == 0))
2909	return(-1);
2910    return(cur);
2911}
2912
2913/**
2914 * xmlFAParseCharProp:
2915 * @ctxt:  a regexp parser context
2916 *
2917 * [27]   charProp   ::=   IsCategory | IsBlock
2918 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
2919 *                       Separators | Symbols | Others
2920 * [29]   Letters   ::=   'L' [ultmo]?
2921 * [30]   Marks   ::=   'M' [nce]?
2922 * [31]   Numbers   ::=   'N' [dlo]?
2923 * [32]   Punctuation   ::=   'P' [cdseifo]?
2924 * [33]   Separators   ::=   'Z' [slp]?
2925 * [34]   Symbols   ::=   'S' [mcko]?
2926 * [35]   Others   ::=   'C' [cfon]?
2927 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
2928 */
2929static void
2930xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
2931    int cur;
2932    xmlRegAtomType type = 0;
2933    xmlChar *blockName = NULL;
2934
2935    cur = CUR;
2936    if (cur == 'L') {
2937	NEXT;
2938	cur = CUR;
2939	if (cur == 'u') {
2940	    NEXT;
2941	    type = XML_REGEXP_LETTER_UPPERCASE;
2942	} else if (cur == 'l') {
2943	    NEXT;
2944	    type = XML_REGEXP_LETTER_LOWERCASE;
2945	} else if (cur == 't') {
2946	    NEXT;
2947	    type = XML_REGEXP_LETTER_TITLECASE;
2948	} else if (cur == 'm') {
2949	    NEXT;
2950	    type = XML_REGEXP_LETTER_MODIFIER;
2951	} else if (cur == 'o') {
2952	    NEXT;
2953	    type = XML_REGEXP_LETTER_OTHERS;
2954	} else {
2955	    type = XML_REGEXP_LETTER;
2956	}
2957    } else if (cur == 'M') {
2958	NEXT;
2959	cur = CUR;
2960	if (cur == 'n') {
2961	    NEXT;
2962	    /* nonspacing */
2963	    type = XML_REGEXP_MARK_NONSPACING;
2964	} else if (cur == 'c') {
2965	    NEXT;
2966	    /* spacing combining */
2967	    type = XML_REGEXP_MARK_SPACECOMBINING;
2968	} else if (cur == 'e') {
2969	    NEXT;
2970	    /* enclosing */
2971	    type = XML_REGEXP_MARK_ENCLOSING;
2972	} else {
2973	    /* all marks */
2974	    type = XML_REGEXP_MARK;
2975	}
2976    } else if (cur == 'N') {
2977	NEXT;
2978	cur = CUR;
2979	if (cur == 'd') {
2980	    NEXT;
2981	    /* digital */
2982	    type = XML_REGEXP_NUMBER_DECIMAL;
2983	} else if (cur == 'l') {
2984	    NEXT;
2985	    /* letter */
2986	    type = XML_REGEXP_NUMBER_LETTER;
2987	} else if (cur == 'o') {
2988	    NEXT;
2989	    /* other */
2990	    type = XML_REGEXP_NUMBER_OTHERS;
2991	} else {
2992	    /* all numbers */
2993	    type = XML_REGEXP_NUMBER;
2994	}
2995    } else if (cur == 'P') {
2996	NEXT;
2997	cur = CUR;
2998	if (cur == 'c') {
2999	    NEXT;
3000	    /* connector */
3001	    type = XML_REGEXP_PUNCT_CONNECTOR;
3002	} else if (cur == 'd') {
3003	    NEXT;
3004	    /* dash */
3005	    type = XML_REGEXP_PUNCT_DASH;
3006	} else if (cur == 's') {
3007	    NEXT;
3008	    /* open */
3009	    type = XML_REGEXP_PUNCT_OPEN;
3010	} else if (cur == 'e') {
3011	    NEXT;
3012	    /* close */
3013	    type = XML_REGEXP_PUNCT_CLOSE;
3014	} else if (cur == 'i') {
3015	    NEXT;
3016	    /* initial quote */
3017	    type = XML_REGEXP_PUNCT_INITQUOTE;
3018	} else if (cur == 'f') {
3019	    NEXT;
3020	    /* final quote */
3021	    type = XML_REGEXP_PUNCT_FINQUOTE;
3022	} else if (cur == 'o') {
3023	    NEXT;
3024	    /* other */
3025	    type = XML_REGEXP_PUNCT_OTHERS;
3026	} else {
3027	    /* all punctuation */
3028	    type = XML_REGEXP_PUNCT;
3029	}
3030    } else if (cur == 'Z') {
3031	NEXT;
3032	cur = CUR;
3033	if (cur == 's') {
3034	    NEXT;
3035	    /* space */
3036	    type = XML_REGEXP_SEPAR_SPACE;
3037	} else if (cur == 'l') {
3038	    NEXT;
3039	    /* line */
3040	    type = XML_REGEXP_SEPAR_LINE;
3041	} else if (cur == 'p') {
3042	    NEXT;
3043	    /* paragraph */
3044	    type = XML_REGEXP_SEPAR_PARA;
3045	} else {
3046	    /* all separators */
3047	    type = XML_REGEXP_SEPAR;
3048	}
3049    } else if (cur == 'S') {
3050	NEXT;
3051	cur = CUR;
3052	if (cur == 'm') {
3053	    NEXT;
3054	    type = XML_REGEXP_SYMBOL_MATH;
3055	    /* math */
3056	} else if (cur == 'c') {
3057	    NEXT;
3058	    type = XML_REGEXP_SYMBOL_CURRENCY;
3059	    /* currency */
3060	} else if (cur == 'k') {
3061	    NEXT;
3062	    type = XML_REGEXP_SYMBOL_MODIFIER;
3063	    /* modifiers */
3064	} else if (cur == 'o') {
3065	    NEXT;
3066	    type = XML_REGEXP_SYMBOL_OTHERS;
3067	    /* other */
3068	} else {
3069	    /* all symbols */
3070	    type = XML_REGEXP_SYMBOL;
3071	}
3072    } else if (cur == 'C') {
3073	NEXT;
3074	cur = CUR;
3075	if (cur == 'c') {
3076	    NEXT;
3077	    /* control */
3078	    type = XML_REGEXP_OTHER_CONTROL;
3079	} else if (cur == 'f') {
3080	    NEXT;
3081	    /* format */
3082	    type = XML_REGEXP_OTHER_FORMAT;
3083	} else if (cur == 'o') {
3084	    NEXT;
3085	    /* private use */
3086	    type = XML_REGEXP_OTHER_PRIVATE;
3087	} else if (cur == 'n') {
3088	    NEXT;
3089	    /* not assigned */
3090	    type = XML_REGEXP_OTHER_NA;
3091	} else {
3092	    /* all others */
3093	    type = XML_REGEXP_OTHER;
3094	}
3095    } else if (cur == 'I') {
3096	const xmlChar *start;
3097	NEXT;
3098	cur = CUR;
3099	if (cur != 's') {
3100	    ERROR("IsXXXX expected");
3101	    return;
3102	}
3103	NEXT;
3104	start = ctxt->cur;
3105	cur = CUR;
3106	if (((cur >= 'a') && (cur <= 'z')) ||
3107	    ((cur >= 'A') && (cur <= 'Z')) ||
3108	    ((cur >= '0') && (cur <= '9')) ||
3109	    (cur == 0x2D)) {
3110	    NEXT;
3111	    cur = CUR;
3112	    while (((cur >= 'a') && (cur <= 'z')) ||
3113		((cur >= 'A') && (cur <= 'Z')) ||
3114		((cur >= '0') && (cur <= '9')) ||
3115		(cur == 0x2D)) {
3116		NEXT;
3117		cur = CUR;
3118	    }
3119	}
3120	type = XML_REGEXP_BLOCK_NAME;
3121	blockName = xmlStrndup(start, ctxt->cur - start);
3122    } else {
3123	ERROR("Unknown char property");
3124	return;
3125    }
3126    if (ctxt->atom == NULL) {
3127	ctxt->atom = xmlRegNewAtom(ctxt, type);
3128	if (ctxt->atom != NULL)
3129	    ctxt->atom->valuep = blockName;
3130    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3131        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3132		           type, 0, 0, blockName);
3133    }
3134}
3135
3136/**
3137 * xmlFAParseCharClassEsc:
3138 * @ctxt:  a regexp parser context
3139 *
3140 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
3141 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
3142 * [25] catEsc   ::=   '\p{' charProp '}'
3143 * [26] complEsc ::=   '\P{' charProp '}'
3144 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
3145 */
3146static void
3147xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
3148    int cur;
3149
3150    if (CUR == '.') {
3151	if (ctxt->atom == NULL) {
3152	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
3153	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3154	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3155			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
3156	}
3157	NEXT;
3158	return;
3159    }
3160    if (CUR != '\\') {
3161	ERROR("Escaped sequence: expecting \\");
3162	return;
3163    }
3164    NEXT;
3165    cur = CUR;
3166    if (cur == 'p') {
3167	NEXT;
3168	if (CUR != '{') {
3169	    ERROR("Expecting '{'");
3170	    return;
3171	}
3172	NEXT;
3173	xmlFAParseCharProp(ctxt);
3174	if (CUR != '}') {
3175	    ERROR("Expecting '}'");
3176	    return;
3177	}
3178	NEXT;
3179    } else if (cur == 'P') {
3180	NEXT;
3181	if (CUR != '{') {
3182	    ERROR("Expecting '{'");
3183	    return;
3184	}
3185	NEXT;
3186	xmlFAParseCharProp(ctxt);
3187	ctxt->atom->neg = 1;
3188	if (CUR != '}') {
3189	    ERROR("Expecting '}'");
3190	    return;
3191	}
3192	NEXT;
3193    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
3194	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
3195	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
3196	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
3197	(cur == 0x5E)) {
3198	if (ctxt->atom == NULL) {
3199	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3200	    if (ctxt->atom != NULL)
3201		ctxt->atom->codepoint = cur;
3202	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3203	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3204			       XML_REGEXP_CHARVAL, cur, cur, NULL);
3205	}
3206	NEXT;
3207    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
3208	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
3209	(cur == 'w') || (cur == 'W')) {
3210	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
3211
3212	switch (cur) {
3213	    case 's':
3214		type = XML_REGEXP_ANYSPACE;
3215		break;
3216	    case 'S':
3217		type = XML_REGEXP_NOTSPACE;
3218		break;
3219	    case 'i':
3220		type = XML_REGEXP_INITNAME;
3221		break;
3222	    case 'I':
3223		type = XML_REGEXP_NOTINITNAME;
3224		break;
3225	    case 'c':
3226		type = XML_REGEXP_NAMECHAR;
3227		break;
3228	    case 'C':
3229		type = XML_REGEXP_NOTNAMECHAR;
3230		break;
3231	    case 'd':
3232		type = XML_REGEXP_DECIMAL;
3233		break;
3234	    case 'D':
3235		type = XML_REGEXP_NOTDECIMAL;
3236		break;
3237	    case 'w':
3238		type = XML_REGEXP_REALCHAR;
3239		break;
3240	    case 'W':
3241		type = XML_REGEXP_NOTREALCHAR;
3242		break;
3243	}
3244	NEXT;
3245	if (ctxt->atom == NULL) {
3246	    ctxt->atom = xmlRegNewAtom(ctxt, type);
3247	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3248	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3249			       type, 0, 0, NULL);
3250	}
3251    }
3252}
3253
3254/**
3255 * xmlFAParseCharRef:
3256 * @ctxt:  a regexp parser context
3257 *
3258 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
3259 */
3260static int
3261xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
3262    int ret = 0, cur;
3263
3264    if ((CUR != '&') || (NXT(1) != '#'))
3265	return(-1);
3266    NEXT;
3267    NEXT;
3268    cur = CUR;
3269    if (cur == 'x') {
3270	NEXT;
3271	cur = CUR;
3272	if (((cur >= '0') && (cur <= '9')) ||
3273	    ((cur >= 'a') && (cur <= 'f')) ||
3274	    ((cur >= 'A') && (cur <= 'F'))) {
3275	    while (((cur >= '0') && (cur <= '9')) ||
3276		   ((cur >= 'A') && (cur <= 'F'))) {
3277		if ((cur >= '0') && (cur <= '9'))
3278		    ret = ret * 16 + cur - '0';
3279		else if ((cur >= 'a') && (cur <= 'f'))
3280		    ret = ret * 16 + 10 + (cur - 'a');
3281		else
3282		    ret = ret * 16 + 10 + (cur - 'A');
3283		NEXT;
3284		cur = CUR;
3285	    }
3286	} else {
3287	    ERROR("Char ref: expecting [0-9A-F]");
3288	    return(-1);
3289	}
3290    } else {
3291	if ((cur >= '0') && (cur <= '9')) {
3292	    while ((cur >= '0') && (cur <= '9')) {
3293		ret = ret * 10 + cur - '0';
3294		NEXT;
3295		cur = CUR;
3296	    }
3297	} else {
3298	    ERROR("Char ref: expecting [0-9]");
3299	    return(-1);
3300	}
3301    }
3302    if (cur != ';') {
3303	ERROR("Char ref: expecting ';'");
3304	return(-1);
3305    } else {
3306	NEXT;
3307    }
3308    return(ret);
3309}
3310
3311/**
3312 * xmlFAParseCharRange:
3313 * @ctxt:  a regexp parser context
3314 *
3315 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
3316 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
3317 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
3318 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
3319 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
3320 */
3321static void
3322xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
3323    int cur;
3324    int start = -1;
3325    int end = -1;
3326
3327    if ((CUR == '&') && (NXT(1) == '#')) {
3328	end = start = xmlFAParseCharRef(ctxt);
3329        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3330	                   XML_REGEXP_CHARVAL, start, end, NULL);
3331	return;
3332    }
3333    cur = CUR;
3334    if (cur == '\\') {
3335	NEXT;
3336	cur = CUR;
3337	switch (cur) {
3338	    case 'n': start = 0xA; break;
3339	    case 'r': start = 0xD; break;
3340	    case 't': start = 0x9; break;
3341	    case '\\': case '|': case '.': case '-': case '^': case '?':
3342	    case '*': case '+': case '{': case '}': case '(': case ')':
3343	    case '[': case ']':
3344		start = cur; break;
3345	    default:
3346		ERROR("Invalid escape value");
3347		return;
3348	}
3349	end = start;
3350    } else if ((cur != 0x5B) && (cur != 0x5D)) {
3351	end = start = cur;
3352    } else {
3353	ERROR("Expecting a char range");
3354	return;
3355    }
3356    NEXT;
3357    if (start == '-') {
3358	return;
3359    }
3360    cur = CUR;
3361    if (cur != '-') {
3362        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3363		              XML_REGEXP_CHARVAL, start, end, NULL);
3364	return;
3365    }
3366    NEXT;
3367    cur = CUR;
3368    if (cur == '\\') {
3369	NEXT;
3370	cur = CUR;
3371	switch (cur) {
3372	    case 'n': end = 0xA; break;
3373	    case 'r': end = 0xD; break;
3374	    case 't': end = 0x9; break;
3375	    case '\\': case '|': case '.': case '-': case '^': case '?':
3376	    case '*': case '+': case '{': case '}': case '(': case ')':
3377	    case '[': case ']':
3378		end = cur; break;
3379	    default:
3380		ERROR("Invalid escape value");
3381		return;
3382	}
3383    } else if ((cur != 0x5B) && (cur != 0x5D)) {
3384	end = cur;
3385    } else {
3386	ERROR("Expecting the end of a char range");
3387	return;
3388    }
3389    NEXT;
3390    /* TODO check that the values are acceptable character ranges for XML */
3391    if (end < start) {
3392	ERROR("End of range is before start of range");
3393    } else {
3394        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3395		           XML_REGEXP_CHARVAL, start, end, NULL);
3396    }
3397    return;
3398}
3399
3400/**
3401 * xmlFAParsePosCharGroup:
3402 * @ctxt:  a regexp parser context
3403 *
3404 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
3405 */
3406static void
3407xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
3408    do {
3409	if ((CUR == '\\') || (CUR == '.')) {
3410	    xmlFAParseCharClassEsc(ctxt);
3411	} else {
3412	    xmlFAParseCharRange(ctxt);
3413	}
3414    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
3415	     (ctxt->error == 0));
3416}
3417
3418/**
3419 * xmlFAParseCharGroup:
3420 * @ctxt:  a regexp parser context
3421 *
3422 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
3423 * [15]   negCharGroup ::= '^' posCharGroup
3424 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
3425 * [12]   charClassExpr ::= '[' charGroup ']'
3426 */
3427static void
3428xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
3429    int n = ctxt->neg;
3430    while ((CUR != ']') && (ctxt->error == 0)) {
3431	if (CUR == '^') {
3432	    int neg = ctxt->neg;
3433
3434	    NEXT;
3435	    ctxt->neg = !ctxt->neg;
3436	    xmlFAParsePosCharGroup(ctxt);
3437	    ctxt->neg = neg;
3438	} else if (CUR == '-') {
3439	    NEXT;
3440	    ctxt->neg = !ctxt->neg;
3441	    if (CUR != '[') {
3442		ERROR("charClassExpr: '[' expected");
3443		break;
3444	    }
3445	    NEXT;
3446	    xmlFAParseCharGroup(ctxt);
3447	    if (CUR == ']') {
3448		NEXT;
3449	    } else {
3450		ERROR("charClassExpr: ']' expected");
3451		break;
3452	    }
3453	    break;
3454	} else if (CUR != ']') {
3455	    xmlFAParsePosCharGroup(ctxt);
3456	}
3457    }
3458    ctxt->neg = n;
3459}
3460
3461/**
3462 * xmlFAParseCharClass:
3463 * @ctxt:  a regexp parser context
3464 *
3465 * [11]   charClass   ::=     charClassEsc | charClassExpr
3466 * [12]   charClassExpr   ::=   '[' charGroup ']'
3467 */
3468static void
3469xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
3470    if (CUR == '[') {
3471	NEXT;
3472	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
3473	if (ctxt->atom == NULL)
3474	    return;
3475	xmlFAParseCharGroup(ctxt);
3476	if (CUR == ']') {
3477	    NEXT;
3478	} else {
3479	    ERROR("xmlFAParseCharClass: ']' expected");
3480	}
3481    } else {
3482	xmlFAParseCharClassEsc(ctxt);
3483    }
3484}
3485
3486/**
3487 * xmlFAParseQuantExact:
3488 * @ctxt:  a regexp parser context
3489 *
3490 * [8]   QuantExact   ::=   [0-9]+
3491 */
3492static int
3493xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
3494    int ret = 0;
3495    int ok = 0;
3496
3497    while ((CUR >= '0') && (CUR <= '9')) {
3498	ret = ret * 10 + (CUR - '0');
3499	ok = 1;
3500	NEXT;
3501    }
3502    if (ok != 1) {
3503	return(-1);
3504    }
3505    return(ret);
3506}
3507
3508/**
3509 * xmlFAParseQuantifier:
3510 * @ctxt:  a regexp parser context
3511 *
3512 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
3513 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
3514 * [6]   quantRange   ::=   QuantExact ',' QuantExact
3515 * [7]   quantMin   ::=   QuantExact ','
3516 * [8]   QuantExact   ::=   [0-9]+
3517 */
3518static int
3519xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
3520    int cur;
3521
3522    cur = CUR;
3523    if ((cur == '?') || (cur == '*') || (cur == '+')) {
3524	if (ctxt->atom != NULL) {
3525	    if (cur == '?')
3526		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
3527	    else if (cur == '*')
3528		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
3529	    else if (cur == '+')
3530		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
3531	}
3532	NEXT;
3533	return(1);
3534    }
3535    if (cur == '{') {
3536	int min = 0, max = 0;
3537
3538	NEXT;
3539	cur = xmlFAParseQuantExact(ctxt);
3540	if (cur >= 0)
3541	    min = cur;
3542	if (CUR == ',') {
3543	    NEXT;
3544	    cur = xmlFAParseQuantExact(ctxt);
3545	    if (cur >= 0)
3546		max = cur;
3547	}
3548	if (CUR == '}') {
3549	    NEXT;
3550	} else {
3551	    ERROR("Unterminated quantifier");
3552	}
3553	if (max == 0)
3554	    max = min;
3555	if (ctxt->atom != NULL) {
3556	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
3557	    ctxt->atom->min = min;
3558	    ctxt->atom->max = max;
3559	}
3560	return(1);
3561    }
3562    return(0);
3563}
3564
3565/**
3566 * xmlFAParseAtom:
3567 * @ctxt:  a regexp parser context
3568 *
3569 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
3570 */
3571static int
3572xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
3573    int codepoint, len;
3574
3575    codepoint = xmlFAIsChar(ctxt);
3576    if (codepoint > 0) {
3577	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3578	if (ctxt->atom == NULL)
3579	    return(-1);
3580	codepoint = CUR_SCHAR(ctxt->cur, len);
3581	ctxt->atom->codepoint = codepoint;
3582	NEXTL(len);
3583	return(1);
3584    } else if (CUR == '|') {
3585	return(0);
3586    } else if (CUR == 0) {
3587	return(0);
3588    } else if (CUR == ')') {
3589	return(0);
3590    } else if (CUR == '(') {
3591	xmlRegStatePtr start, oldend;
3592
3593	NEXT;
3594	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
3595	start = ctxt->state;
3596	oldend = ctxt->end;
3597	ctxt->end = NULL;
3598	ctxt->atom = NULL;
3599	xmlFAParseRegExp(ctxt, 0);
3600	if (CUR == ')') {
3601	    NEXT;
3602	} else {
3603	    ERROR("xmlFAParseAtom: expecting ')'");
3604	}
3605	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
3606	if (ctxt->atom == NULL)
3607	    return(-1);
3608	ctxt->atom->start = start;
3609	ctxt->atom->stop = ctxt->state;
3610	ctxt->end = oldend;
3611	return(1);
3612    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
3613	xmlFAParseCharClass(ctxt);
3614	return(1);
3615    }
3616    return(0);
3617}
3618
3619/**
3620 * xmlFAParsePiece:
3621 * @ctxt:  a regexp parser context
3622 *
3623 * [3]   piece   ::=   atom quantifier?
3624 */
3625static int
3626xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
3627    int ret;
3628
3629    ctxt->atom = NULL;
3630    ret = xmlFAParseAtom(ctxt);
3631    if (ret == 0)
3632	return(0);
3633    if (ctxt->atom == NULL) {
3634	ERROR("internal: no atom generated");
3635    }
3636    xmlFAParseQuantifier(ctxt);
3637    return(1);
3638}
3639
3640/**
3641 * xmlFAParseBranch:
3642 * @ctxt:  a regexp parser context
3643 * @first:  is taht the first
3644 *
3645 * [2]   branch   ::=   piece*
3646 */
3647static void
3648xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) {
3649    xmlRegStatePtr previous;
3650    xmlRegAtomPtr prevatom = NULL;
3651    int ret;
3652
3653    previous = ctxt->state;
3654    ret = xmlFAParsePiece(ctxt);
3655    if (ret != 0) {
3656	if (first) {
3657	    xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3658	    previous = ctxt->state;
3659	} else {
3660	    prevatom = ctxt->atom;
3661	}
3662	ctxt->atom = NULL;
3663    }
3664    while ((ret != 0) && (ctxt->error == 0)) {
3665	ret = xmlFAParsePiece(ctxt);
3666	if (ret != 0) {
3667	    if (first) {
3668		xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3669	    } else {
3670		xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom);
3671		prevatom = ctxt->atom;
3672	    }
3673	    previous = ctxt->state;
3674	    ctxt->atom = NULL;
3675	}
3676    }
3677    if (!first) {
3678	xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom);
3679    }
3680}
3681
3682/**
3683 * xmlFAParseRegExp:
3684 * @ctxt:  a regexp parser context
3685 * @top:  is that the top-level expressions ?
3686 *
3687 * [1]   regExp   ::=     branch  ( '|' branch )*
3688 */
3689static void
3690xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
3691    xmlRegStatePtr start, end, oldend;
3692
3693    oldend = ctxt->end;
3694
3695    start = ctxt->state;
3696    xmlFAParseBranch(ctxt, (ctxt->end == NULL));
3697    if (CUR != '|') {
3698	ctxt->end = ctxt->state;
3699	return;
3700    }
3701    end = ctxt->state;
3702    while ((CUR == '|') && (ctxt->error == 0)) {
3703	NEXT;
3704	ctxt->state = start;
3705	ctxt->end = end;
3706	xmlFAParseBranch(ctxt, 0);
3707    }
3708    if (!top)
3709	ctxt->end = oldend;
3710}
3711
3712/************************************************************************
3713 * 									*
3714 * 			The basic API					*
3715 * 									*
3716 ************************************************************************/
3717
3718/**
3719 * xmlRegexpPrint:
3720 * @output: the file for the output debug
3721 * @regexp: the compiled regexp
3722 *
3723 * Print the content of the compiled regular expression
3724 */
3725void
3726xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
3727    int i;
3728
3729    fprintf(output, " regexp: ");
3730    if (regexp == NULL) {
3731	fprintf(output, "NULL\n");
3732	return;
3733    }
3734    fprintf(output, "'%s' ", regexp->string);
3735    fprintf(output, "\n");
3736    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
3737    for (i = 0;i < regexp->nbAtoms; i++) {
3738	fprintf(output, " %02d ", i);
3739	xmlRegPrintAtom(output, regexp->atoms[i]);
3740    }
3741    fprintf(output, "%d states:", regexp->nbStates);
3742    fprintf(output, "\n");
3743    for (i = 0;i < regexp->nbStates; i++) {
3744	xmlRegPrintState(output, regexp->states[i]);
3745    }
3746    fprintf(output, "%d counters:\n", regexp->nbCounters);
3747    for (i = 0;i < regexp->nbCounters; i++) {
3748	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
3749		                                regexp->counters[i].max);
3750    }
3751}
3752
3753/**
3754 * xmlRegexpCompile:
3755 * @regexp:  a regular expression string
3756 *
3757 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
3758 * Appendix F and build an automata suitable for testing strings against
3759 * that regular expression
3760 *
3761 * Returns the compiled expression or NULL in case of error
3762 */
3763xmlRegexpPtr
3764xmlRegexpCompile(const xmlChar *regexp) {
3765    xmlRegexpPtr ret;
3766    xmlRegParserCtxtPtr ctxt;
3767
3768    ctxt = xmlRegNewParserCtxt(regexp);
3769    if (ctxt == NULL)
3770	return(NULL);
3771
3772    /* initialize the parser */
3773    ctxt->end = NULL;
3774    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3775    xmlRegStatePush(ctxt, ctxt->start);
3776
3777    /* parse the expression building an automata */
3778    xmlFAParseRegExp(ctxt, 1);
3779    if (CUR != 0) {
3780	ERROR("xmlFAParseRegExp: extra characters");
3781    }
3782    ctxt->end = ctxt->state;
3783    ctxt->start->type = XML_REGEXP_START_STATE;
3784    ctxt->end->type = XML_REGEXP_FINAL_STATE;
3785
3786    /* remove the Epsilon except for counted transitions */
3787    xmlFAEliminateEpsilonTransitions(ctxt);
3788
3789
3790    if (ctxt->error != 0) {
3791	xmlRegFreeParserCtxt(ctxt);
3792	return(NULL);
3793    }
3794    ret = xmlRegEpxFromParse(ctxt);
3795    xmlRegFreeParserCtxt(ctxt);
3796    return(ret);
3797}
3798
3799/**
3800 * xmlRegexpExec:
3801 * @comp:  the compiled regular expression
3802 * @content:  the value to check against the regular expression
3803 *
3804 * Check if the regular expression generate the value
3805 *
3806 * Returns 1 if it matches, 0 if not and a negativa value in case of error
3807 */
3808int
3809xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
3810    if ((comp == NULL) || (content == NULL))
3811	return(-1);
3812    return(xmlFARegExec(comp, content));
3813}
3814
3815/**
3816 * xmlRegexpIsDeterminist:
3817 * @comp:  the compiled regular expression
3818 *
3819 * Check if the regular expression is determinist
3820 *
3821 * Returns 1 if it yes, 0 if not and a negativa value in case of error
3822 */
3823int
3824xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
3825    xmlAutomataPtr am;
3826    int ret;
3827
3828    if (comp == NULL)
3829	return(-1);
3830    if (comp->determinist != -1)
3831	return(comp->determinist);
3832
3833    am = xmlNewAutomata();
3834    am->nbAtoms = comp->nbAtoms;
3835    am->atoms = comp->atoms;
3836    am->nbStates = comp->nbStates;
3837    am->states = comp->states;
3838    am->determinist = -1;
3839    ret = xmlFAComputesDeterminism(am);
3840    am->atoms = NULL;
3841    am->states = NULL;
3842    xmlFreeAutomata(am);
3843    return(ret);
3844}
3845
3846/**
3847 * xmlRegFreeRegexp:
3848 * @regexp:  the regexp
3849 *
3850 * Free a regexp
3851 */
3852void
3853xmlRegFreeRegexp(xmlRegexpPtr regexp) {
3854    int i;
3855    if (regexp == NULL)
3856	return;
3857
3858    if (regexp->string != NULL)
3859	xmlFree(regexp->string);
3860    if (regexp->states != NULL) {
3861	for (i = 0;i < regexp->nbStates;i++)
3862	    xmlRegFreeState(regexp->states[i]);
3863	xmlFree(regexp->states);
3864    }
3865    if (regexp->atoms != NULL) {
3866	for (i = 0;i < regexp->nbAtoms;i++)
3867	    xmlRegFreeAtom(regexp->atoms[i]);
3868	xmlFree(regexp->atoms);
3869    }
3870    if (regexp->counters != NULL)
3871	xmlFree(regexp->counters);
3872    if (regexp->compact != NULL)
3873	xmlFree(regexp->compact);
3874    if (regexp->stringMap != NULL) {
3875	for (i = 0; i < regexp->nbstrings;i++)
3876	    xmlFree(regexp->stringMap[i]);
3877	xmlFree(regexp->stringMap);
3878    }
3879
3880    xmlFree(regexp);
3881}
3882
3883#ifdef LIBXML_AUTOMATA_ENABLED
3884/************************************************************************
3885 * 									*
3886 * 			The Automata interface				*
3887 * 									*
3888 ************************************************************************/
3889
3890/**
3891 * xmlNewAutomata:
3892 *
3893 * Create a new automata
3894 *
3895 * Returns the new object or NULL in case of failure
3896 */
3897xmlAutomataPtr
3898xmlNewAutomata(void) {
3899    xmlAutomataPtr ctxt;
3900
3901    ctxt = xmlRegNewParserCtxt(NULL);
3902    if (ctxt == NULL)
3903	return(NULL);
3904
3905    /* initialize the parser */
3906    ctxt->end = NULL;
3907    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3908    xmlRegStatePush(ctxt, ctxt->start);
3909
3910    return(ctxt);
3911}
3912
3913/**
3914 * xmlFreeAutomata:
3915 * @am: an automata
3916 *
3917 * Free an automata
3918 */
3919void
3920xmlFreeAutomata(xmlAutomataPtr am) {
3921    if (am == NULL)
3922	return;
3923    xmlRegFreeParserCtxt(am);
3924}
3925
3926/**
3927 * xmlAutomataGetInitState:
3928 * @am: an automata
3929 *
3930 * Returns the initial state of the automata
3931 */
3932xmlAutomataStatePtr
3933xmlAutomataGetInitState(xmlAutomataPtr am) {
3934    if (am == NULL)
3935	return(NULL);
3936    return(am->start);
3937}
3938
3939/**
3940 * xmlAutomataSetFinalState:
3941 * @am: an automata
3942 * @state: a state in this automata
3943 *
3944 * Makes that state a final state
3945 *
3946 * Returns 0 or -1 in case of error
3947 */
3948int
3949xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
3950    if ((am == NULL) || (state == NULL))
3951	return(-1);
3952    state->type = XML_REGEXP_FINAL_STATE;
3953    return(0);
3954}
3955
3956/**
3957 * xmlAutomataNewTransition:
3958 * @am: an automata
3959 * @from: the starting point of the transition
3960 * @to: the target point of the transition or NULL
3961 * @token: the input string associated to that transition
3962 * @data: data passed to the callback function if the transition is activated
3963 *
3964 * If @to is NULL, this create first a new target state in the automata
3965 * and then adds a transition from the @from state to the target state
3966 * activated by the value of @token
3967 *
3968 * Returns the target state or NULL in case of error
3969 */
3970xmlAutomataStatePtr
3971xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
3972			 xmlAutomataStatePtr to, const xmlChar *token,
3973			 void *data) {
3974    xmlRegAtomPtr atom;
3975
3976    if ((am == NULL) || (from == NULL) || (token == NULL))
3977	return(NULL);
3978    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
3979    atom->data = data;
3980    if (atom == NULL)
3981	return(NULL);
3982    atom->valuep = xmlStrdup(token);
3983
3984    xmlFAGenerateTransitions(am, from, to, atom);
3985    if (to == NULL)
3986	return(am->state);
3987    return(to);
3988}
3989
3990/**
3991 * xmlAutomataNewCountTrans:
3992 * @am: an automata
3993 * @from: the starting point of the transition
3994 * @to: the target point of the transition or NULL
3995 * @token: the input string associated to that transition
3996 * @min:  the minimum successive occurences of token
3997 * @min:  the maximum successive occurences of token
3998 *
3999 * If @to is NULL, this create first a new target state in the automata
4000 * and then adds a transition from the @from state to the target state
4001 * activated by a succession of input of value @token and whose number
4002 * is between @min and @max
4003 *
4004 * Returns the target state or NULL in case of error
4005 */
4006xmlAutomataStatePtr
4007xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4008			 xmlAutomataStatePtr to, const xmlChar *token,
4009			 int min, int max, void *data) {
4010    xmlRegAtomPtr atom;
4011
4012    if ((am == NULL) || (from == NULL) || (token == NULL))
4013	return(NULL);
4014    if (min < 0)
4015	return(NULL);
4016    if ((max < min) || (max < 1))
4017	return(NULL);
4018    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4019    if (atom == NULL)
4020	return(NULL);
4021    atom->valuep = xmlStrdup(token);
4022    atom->data = data;
4023    if (min == 0)
4024	atom->min = 1;
4025    else
4026	atom->min = min;
4027    atom->max = max;
4028
4029    xmlFAGenerateTransitions(am, from, to, atom);
4030    if (to == NULL)
4031	to = am->state;
4032    if (to == NULL)
4033	return(NULL);
4034    if (min == 0)
4035	xmlFAGenerateEpsilonTransition(am, from, to);
4036    return(to);
4037}
4038
4039/**
4040 * xmlAutomataNewOnceTrans:
4041 * @am: an automata
4042 * @from: the starting point of the transition
4043 * @to: the target point of the transition or NULL
4044 * @token: the input string associated to that transition
4045 * @min:  the minimum successive occurences of token
4046 * @min:  the maximum successive occurences of token
4047 *
4048 * If @to is NULL, this create first a new target state in the automata
4049 * and then adds a transition from the @from state to the target state
4050 * activated by a succession of input of value @token and whose number
4051 * is between @min and @max, moreover that transistion can only be crossed
4052 * once.
4053 *
4054 * Returns the target state or NULL in case of error
4055 */
4056xmlAutomataStatePtr
4057xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4058			 xmlAutomataStatePtr to, const xmlChar *token,
4059			 int min, int max, void *data) {
4060    xmlRegAtomPtr atom;
4061    int counter;
4062
4063    if ((am == NULL) || (from == NULL) || (token == NULL))
4064	return(NULL);
4065    if (min < 1)
4066	return(NULL);
4067    if ((max < min) || (max < 1))
4068	return(NULL);
4069    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4070    if (atom == NULL)
4071	return(NULL);
4072    atom->valuep = xmlStrdup(token);
4073    atom->data = data;
4074    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
4075    if (min == 0)
4076	atom->min = 1;
4077    else
4078	atom->min = min;
4079    atom->max = max;
4080    /*
4081     * associate a counter to the transition.
4082     */
4083    counter = xmlRegGetCounter(am);
4084    am->counters[counter].min = 1;
4085    am->counters[counter].max = 1;
4086
4087    /* xmlFAGenerateTransitions(am, from, to, atom); */
4088    if (to == NULL) {
4089	to = xmlRegNewState(am);
4090	xmlRegStatePush(am, to);
4091    }
4092    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
4093    xmlRegAtomPush(am, atom);
4094    am->state = to;
4095    if (to == NULL)
4096	to = am->state;
4097    if (to == NULL)
4098	return(NULL);
4099    return(to);
4100}
4101
4102/**
4103 * xmlAutomataNewState:
4104 * @am: an automata
4105 *
4106 * Create a new disconnected state in the automata
4107 *
4108 * Returns the new state or NULL in case of error
4109 */
4110xmlAutomataStatePtr
4111xmlAutomataNewState(xmlAutomataPtr am) {
4112    xmlAutomataStatePtr to;
4113
4114    if (am == NULL)
4115	return(NULL);
4116    to = xmlRegNewState(am);
4117    xmlRegStatePush(am, to);
4118    return(to);
4119}
4120
4121/**
4122 * xmlAutomataNewTransition:
4123 * @am: an automata
4124 * @from: the starting point of the transition
4125 * @to: the target point of the transition or NULL
4126 *
4127 * If @to is NULL, this create first a new target state in the automata
4128 * and then adds a an epsilon transition from the @from state to the
4129 * target state
4130 *
4131 * Returns the target state or NULL in case of error
4132 */
4133xmlAutomataStatePtr
4134xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
4135		      xmlAutomataStatePtr to) {
4136    if ((am == NULL) || (from == NULL))
4137	return(NULL);
4138    xmlFAGenerateEpsilonTransition(am, from, to);
4139    if (to == NULL)
4140	return(am->state);
4141    return(to);
4142}
4143
4144/**
4145 * xmlAutomataNewAllTrans:
4146 * @am: an automata
4147 * @from: the starting point of the transition
4148 * @to: the target point of the transition or NULL
4149 *
4150 * If @to is NULL, this create first a new target state in the automata
4151 * and then adds a an ALL transition from the @from state to the
4152 * target state. That transition is an epsilon transition allowed only when
4153 * all transitions from the @from node have been activated.
4154 *
4155 * Returns the target state or NULL in case of error
4156 */
4157xmlAutomataStatePtr
4158xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4159		       xmlAutomataStatePtr to, int lax) {
4160    if ((am == NULL) || (from == NULL))
4161	return(NULL);
4162    xmlFAGenerateAllTransition(am, from, to, lax);
4163    if (to == NULL)
4164	return(am->state);
4165    return(to);
4166}
4167
4168/**
4169 * xmlAutomataNewCounter:
4170 * @am: an automata
4171 * @min:  the minimal value on the counter
4172 * @max:  the maximal value on the counter
4173 *
4174 * Create a new counter
4175 *
4176 * Returns the counter number or -1 in case of error
4177 */
4178int
4179xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
4180    int ret;
4181
4182    if (am == NULL)
4183	return(-1);
4184
4185    ret = xmlRegGetCounter(am);
4186    if (ret < 0)
4187	return(-1);
4188    am->counters[ret].min = min;
4189    am->counters[ret].max = max;
4190    return(ret);
4191}
4192
4193/**
4194 * xmlAutomataNewCountedTrans:
4195 * @am: an automata
4196 * @from: the starting point of the transition
4197 * @to: the target point of the transition or NULL
4198 * @counter: the counter associated to that transition
4199 *
4200 * If @to is NULL, this create first a new target state in the automata
4201 * and then adds an epsilon transition from the @from state to the target state
4202 * which will increment the counter provided
4203 *
4204 * Returns the target state or NULL in case of error
4205 */
4206xmlAutomataStatePtr
4207xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4208		xmlAutomataStatePtr to, int counter) {
4209    if ((am == NULL) || (from == NULL) || (counter < 0))
4210	return(NULL);
4211    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
4212    if (to == NULL)
4213	return(am->state);
4214    return(to);
4215}
4216
4217/**
4218 * xmlAutomataNewCounterTrans:
4219 * @am: an automata
4220 * @from: the starting point of the transition
4221 * @to: the target point of the transition or NULL
4222 * @counter: the counter associated to that transition
4223 *
4224 * If @to is NULL, this create first a new target state in the automata
4225 * and then adds an epsilon transition from the @from state to the target state
4226 * which will be allowed only if the counter is within the right range.
4227 *
4228 * Returns the target state or NULL in case of error
4229 */
4230xmlAutomataStatePtr
4231xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4232		xmlAutomataStatePtr to, int counter) {
4233    if ((am == NULL) || (from == NULL) || (counter < 0))
4234	return(NULL);
4235    xmlFAGenerateCountedTransition(am, from, to, counter);
4236    if (to == NULL)
4237	return(am->state);
4238    return(to);
4239}
4240
4241/**
4242 * xmlAutomataCompile:
4243 * @am: an automata
4244 *
4245 * Compile the automata into a Reg Exp ready for being executed.
4246 * The automata should be free after this point.
4247 *
4248 * Returns the compiled regexp or NULL in case of error
4249 */
4250xmlRegexpPtr
4251xmlAutomataCompile(xmlAutomataPtr am) {
4252    xmlRegexpPtr ret;
4253
4254    xmlFAEliminateEpsilonTransitions(am);
4255    /* xmlFAComputesDeterminism(am); */
4256    ret = xmlRegEpxFromParse(am);
4257
4258    return(ret);
4259}
4260
4261/**
4262 * xmlAutomataIsDeterminist:
4263 * @am: an automata
4264 *
4265 * Checks if an automata is determinist.
4266 *
4267 * Returns 1 if true, 0 if not, and -1 in case of error
4268 */
4269int
4270xmlAutomataIsDeterminist(xmlAutomataPtr am) {
4271    int ret;
4272
4273    if (am == NULL)
4274	return(-1);
4275
4276    ret = xmlFAComputesDeterminism(am);
4277    return(ret);
4278}
4279#endif /* LIBXML_AUTOMATA_ENABLED */
4280#endif /* LIBXML_REGEXP_ENABLED */
4281