xmlregexp.c revision 52b48c7a7bfb338f434d39f9fc3e54768e301575
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications thise includes:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22#include <stdio.h>
23#include <string.h>
24#include <libxml/tree.h>
25#include <libxml/parserInternals.h>
26#include <libxml/xmlregexp.h>
27#include <libxml/xmlautomata.h>
28#include <libxml/xmlunicode.h>
29
30/* #define DEBUG_REGEXP_GRAPH  */
31/* #define DEBUG_REGEXP_EXEC */
32/* #define DEBUG_PUSH */
33/* #define DEBUG_COMPACTION */
34
35#define ERROR(str) ctxt->error = 1;					\
36    xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
37#define NEXT ctxt->cur++
38#define CUR (*(ctxt->cur))
39#define NXT(index) (ctxt->cur[index])
40
41#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
42#define NEXTL(l) ctxt->cur += l;
43
44/**
45 * TODO:
46 *
47 * macro to flag unimplemented blocks
48 */
49#define TODO 								\
50    xmlGenericError(xmlGenericErrorContext,				\
51	    "Unimplemented block at %s:%d\n",				\
52            __FILE__, __LINE__);
53
54
55/************************************************************************
56 * 									*
57 * 			Datatypes and structures			*
58 * 									*
59 ************************************************************************/
60
61typedef enum {
62    XML_REGEXP_EPSILON = 1,
63    XML_REGEXP_CHARVAL,
64    XML_REGEXP_RANGES,
65    XML_REGEXP_SUBREG,
66    XML_REGEXP_STRING,
67    XML_REGEXP_ANYCHAR, /* . */
68    XML_REGEXP_ANYSPACE, /* \s */
69    XML_REGEXP_NOTSPACE, /* \S */
70    XML_REGEXP_INITNAME, /* \l */
71    XML_REGEXP_NOTINITNAME, /* \l */
72    XML_REGEXP_NAMECHAR, /* \c */
73    XML_REGEXP_NOTNAMECHAR, /* \C */
74    XML_REGEXP_DECIMAL, /* \d */
75    XML_REGEXP_NOTDECIMAL, /* \d */
76    XML_REGEXP_REALCHAR, /* \w */
77    XML_REGEXP_NOTREALCHAR, /* \w */
78    XML_REGEXP_LETTER,
79    XML_REGEXP_LETTER_UPPERCASE,
80    XML_REGEXP_LETTER_LOWERCASE,
81    XML_REGEXP_LETTER_TITLECASE,
82    XML_REGEXP_LETTER_MODIFIER,
83    XML_REGEXP_LETTER_OTHERS,
84    XML_REGEXP_MARK,
85    XML_REGEXP_MARK_NONSPACING,
86    XML_REGEXP_MARK_SPACECOMBINING,
87    XML_REGEXP_MARK_ENCLOSING,
88    XML_REGEXP_NUMBER,
89    XML_REGEXP_NUMBER_DECIMAL,
90    XML_REGEXP_NUMBER_LETTER,
91    XML_REGEXP_NUMBER_OTHERS,
92    XML_REGEXP_PUNCT,
93    XML_REGEXP_PUNCT_CONNECTOR,
94    XML_REGEXP_PUNCT_DASH,
95    XML_REGEXP_PUNCT_OPEN,
96    XML_REGEXP_PUNCT_CLOSE,
97    XML_REGEXP_PUNCT_INITQUOTE,
98    XML_REGEXP_PUNCT_FINQUOTE,
99    XML_REGEXP_PUNCT_OTHERS,
100    XML_REGEXP_SEPAR,
101    XML_REGEXP_SEPAR_SPACE,
102    XML_REGEXP_SEPAR_LINE,
103    XML_REGEXP_SEPAR_PARA,
104    XML_REGEXP_SYMBOL,
105    XML_REGEXP_SYMBOL_MATH,
106    XML_REGEXP_SYMBOL_CURRENCY,
107    XML_REGEXP_SYMBOL_MODIFIER,
108    XML_REGEXP_SYMBOL_OTHERS,
109    XML_REGEXP_OTHER,
110    XML_REGEXP_OTHER_CONTROL,
111    XML_REGEXP_OTHER_FORMAT,
112    XML_REGEXP_OTHER_PRIVATE,
113    XML_REGEXP_OTHER_NA,
114    XML_REGEXP_BLOCK_NAME
115} xmlRegAtomType;
116
117typedef enum {
118    XML_REGEXP_QUANT_EPSILON = 1,
119    XML_REGEXP_QUANT_ONCE,
120    XML_REGEXP_QUANT_OPT,
121    XML_REGEXP_QUANT_MULT,
122    XML_REGEXP_QUANT_PLUS,
123    XML_REGEXP_QUANT_ONCEONLY,
124    XML_REGEXP_QUANT_ALL,
125    XML_REGEXP_QUANT_RANGE
126} xmlRegQuantType;
127
128typedef enum {
129    XML_REGEXP_START_STATE = 1,
130    XML_REGEXP_FINAL_STATE,
131    XML_REGEXP_TRANS_STATE
132} xmlRegStateType;
133
134typedef enum {
135    XML_REGEXP_MARK_NORMAL = 0,
136    XML_REGEXP_MARK_START,
137    XML_REGEXP_MARK_VISITED
138} xmlRegMarkedType;
139
140typedef struct _xmlRegRange xmlRegRange;
141typedef xmlRegRange *xmlRegRangePtr;
142
143struct _xmlRegRange {
144    int neg;
145    xmlRegAtomType type;
146    int start;
147    int end;
148    xmlChar *blockName;
149};
150
151typedef struct _xmlRegAtom xmlRegAtom;
152typedef xmlRegAtom *xmlRegAtomPtr;
153
154typedef struct _xmlAutomataState xmlRegState;
155typedef xmlRegState *xmlRegStatePtr;
156
157struct _xmlRegAtom {
158    int no;
159    xmlRegAtomType type;
160    xmlRegQuantType quant;
161    int min;
162    int max;
163
164    void *valuep;
165    void *valuep2;
166    int neg;
167    int codepoint;
168    xmlRegStatePtr start;
169    xmlRegStatePtr stop;
170    int maxRanges;
171    int nbRanges;
172    xmlRegRangePtr *ranges;
173    void *data;
174};
175
176typedef struct _xmlRegCounter xmlRegCounter;
177typedef xmlRegCounter *xmlRegCounterPtr;
178
179struct _xmlRegCounter {
180    int min;
181    int max;
182};
183
184typedef struct _xmlRegTrans xmlRegTrans;
185typedef xmlRegTrans *xmlRegTransPtr;
186
187struct _xmlRegTrans {
188    xmlRegAtomPtr atom;
189    int to;
190    int counter;
191    int count;
192};
193
194struct _xmlAutomataState {
195    xmlRegStateType type;
196    xmlRegMarkedType mark;
197    xmlRegMarkedType reached;
198    int no;
199
200    int maxTrans;
201    int nbTrans;
202    xmlRegTrans *trans;
203};
204
205typedef struct _xmlAutomata xmlRegParserCtxt;
206typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
207
208struct _xmlAutomata {
209    xmlChar *string;
210    xmlChar *cur;
211
212    int error;
213    int neg;
214
215    xmlRegStatePtr start;
216    xmlRegStatePtr end;
217    xmlRegStatePtr state;
218
219    xmlRegAtomPtr atom;
220
221    int maxAtoms;
222    int nbAtoms;
223    xmlRegAtomPtr *atoms;
224
225    int maxStates;
226    int nbStates;
227    xmlRegStatePtr *states;
228
229    int maxCounters;
230    int nbCounters;
231    xmlRegCounter *counters;
232
233    int determinist;
234};
235
236struct _xmlRegexp {
237    xmlChar *string;
238    int nbStates;
239    xmlRegStatePtr *states;
240    int nbAtoms;
241    xmlRegAtomPtr *atoms;
242    int nbCounters;
243    xmlRegCounter *counters;
244    int determinist;
245    /*
246     * That's the compact form for determinists automatas
247     */
248    int nbstates;
249    int *compact;
250    void **transdata;
251    int nbstrings;
252    xmlChar **stringMap;
253};
254
255typedef struct _xmlRegExecRollback xmlRegExecRollback;
256typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
257
258struct _xmlRegExecRollback {
259    xmlRegStatePtr state;/* the current state */
260    int index;		/* the index in the input stack */
261    int nextbranch;	/* the next transition to explore in that state */
262    int *counts;	/* save the automate state if it has some */
263};
264
265typedef struct _xmlRegInputToken xmlRegInputToken;
266typedef xmlRegInputToken *xmlRegInputTokenPtr;
267
268struct _xmlRegInputToken {
269    xmlChar *value;
270    void *data;
271};
272
273struct _xmlRegExecCtxt {
274    int status;		/* execution status != 0 indicate an error */
275    int determinist;	/* did we found an inderterministic behaviour */
276    xmlRegexpPtr comp;	/* the compiled regexp */
277    xmlRegExecCallbacks callback;
278    void *data;
279
280    xmlRegStatePtr state;/* the current state */
281    int transno;	/* the current transition on that state */
282    int transcount;	/* the number of char in char counted transitions */
283
284    /*
285     * A stack of rollback states
286     */
287    int maxRollbacks;
288    int nbRollbacks;
289    xmlRegExecRollback *rollbacks;
290
291    /*
292     * The state of the automata if any
293     */
294    int *counts;
295
296    /*
297     * The input stack
298     */
299    int inputStackMax;
300    int inputStackNr;
301    int index;
302    int *charStack;
303    const xmlChar *inputString; /* when operating on characters */
304    xmlRegInputTokenPtr inputStack;/* when operating on strings */
305
306};
307
308#define REGEXP_ALL_COUNTER	0x123456
309#define REGEXP_ALL_LAX_COUNTER	0x123457
310
311static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
312static void xmlRegFreeState(xmlRegStatePtr state);
313static void xmlRegFreeAtom(xmlRegAtomPtr atom);
314
315/************************************************************************
316 * 									*
317 * 			Allocation/Deallocation				*
318 * 									*
319 ************************************************************************/
320
321static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
322/**
323 * xmlRegEpxFromParse:
324 * @ctxt:  the parser context used to build it
325 *
326 * Allocate a new regexp and fill it with the reult from the parser
327 *
328 * Returns the new regexp or NULL in case of error
329 */
330static xmlRegexpPtr
331xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
332    xmlRegexpPtr ret;
333
334    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
335    if (ret == NULL)
336	return(NULL);
337    memset(ret, 0, sizeof(xmlRegexp));
338    ret->string = ctxt->string;
339    ctxt->string = NULL;
340    ret->nbStates = ctxt->nbStates;
341    ctxt->nbStates = 0;
342    ret->states = ctxt->states;
343    ctxt->states = NULL;
344    ret->nbAtoms = ctxt->nbAtoms;
345    ctxt->nbAtoms = 0;
346    ret->atoms = ctxt->atoms;
347    ctxt->atoms = NULL;
348    ret->nbCounters = ctxt->nbCounters;
349    ctxt->nbCounters = 0;
350    ret->counters = ctxt->counters;
351    ctxt->counters = NULL;
352    ret->determinist = ctxt->determinist;
353
354    if ((ret->determinist != 0) &&
355	(ret->nbCounters == 0) &&
356	(ret->atoms != NULL) &&
357	(ret->atoms[0] != NULL) &&
358	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
359	int i, j, nbstates = 0, nbatoms = 0;
360	int *stateRemap;
361	int *stringRemap;
362	int *transitions;
363	void **transdata;
364	xmlChar **stringMap;
365        xmlChar *value;
366
367	/*
368	 * Switch to a compact representation
369	 * 1/ counting the effective number of states left
370	 * 2/ conting the unique number of atoms, and check that
371	 *    they are all of the string type
372	 * 3/ build a table state x atom for the transitions
373	 */
374
375	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
376	for (i = 0;i < ret->nbStates;i++) {
377	    if (ret->states[i] != NULL) {
378		stateRemap[i] = nbstates;
379		nbstates++;
380	    } else {
381		stateRemap[i] = -1;
382	    }
383	}
384#ifdef DEBUG_COMPACTION
385	printf("Final: %d states\n", nbstates);
386#endif
387	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
388	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
389	for (i = 0;i < ret->nbAtoms;i++) {
390	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
391		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
392		value = ret->atoms[i]->valuep;
393                for (j = 0;j < nbatoms;j++) {
394		    if (xmlStrEqual(stringMap[j], value)) {
395			stringRemap[i] = j;
396			break;
397		    }
398		}
399		if (j >= nbatoms) {
400		    stringRemap[i] = nbatoms;
401		    stringMap[nbatoms] = xmlStrdup(value);
402		    nbatoms++;
403		}
404	    } else {
405		xmlFree(stateRemap);
406		xmlFree(stringRemap);
407		for (i = 0;i < nbatoms;i++)
408		    xmlFree(stringMap[i]);
409		xmlFree(stringMap);
410		goto fail_compact;
411	    }
412	}
413#ifdef DEBUG_COMPACTION
414	printf("Final: %d atoms\n", nbatoms);
415#endif
416
417	/*
418	 * Allocate the transition table. The first entry for each
419	 * state correspond to the state type.
420	 */
421	transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int));
422	transdata = NULL;
423	memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int));
424
425	for (i = 0;i < ret->nbStates;i++) {
426	    int stateno, atomno, targetno, prev;
427	    xmlRegStatePtr state;
428	    xmlRegTransPtr trans;
429
430	    stateno = stateRemap[i];
431	    if (stateno == -1)
432		continue;
433	    state = ret->states[i];
434
435	    transitions[stateno * (nbatoms + 1)] = state->type;
436
437	    for (j = 0;j < state->nbTrans;j++) {
438		trans = &(state->trans[j]);
439		if ((trans->to == -1) || (trans->atom == NULL))
440		    continue;
441                atomno = stringRemap[trans->atom->no];
442		if ((trans->atom->data != NULL) && (transdata == NULL)) {
443		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
444			                            sizeof(void *));
445		    if (transdata != NULL)
446			memset(transdata, 0,
447			       nbstates * nbatoms * sizeof(void *));
448		}
449		targetno = stateRemap[trans->to];
450		/*
451		 * if the same atome can generate transition to 2 different
452		 * states then it means the automata is not determinist and
453		 * the compact form can't be used !
454		 */
455		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
456		if (prev != 0) {
457		    if (prev != targetno + 1) {
458			ret->determinist = 0;
459#ifdef DEBUG_COMPACTION
460			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
461			       i, j, trans->atom->no, trans->to, atomno, targetno);
462			printf("       previous to is %d\n", prev);
463#endif
464			ret->determinist = 0;
465			if (transdata != NULL)
466			    xmlFree(transdata);
467			xmlFree(transitions);
468			xmlFree(stateRemap);
469			xmlFree(stringRemap);
470			for (i = 0;i < nbatoms;i++)
471			    xmlFree(stringMap[i]);
472			xmlFree(stringMap);
473			goto fail_compact;
474		    }
475		} else {
476#if 0
477		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
478			   i, j, trans->atom->no, trans->to, atomno, targetno);
479#endif
480		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
481			targetno + 1; /* to avoid 0 */
482		    if (transdata != NULL)
483			transdata[stateno * nbatoms + atomno] =
484			    trans->atom->data;
485		}
486	    }
487	}
488	ret->determinist = 1;
489#ifdef DEBUG_COMPACTION
490	/*
491	 * Debug
492	 */
493	for (i = 0;i < nbstates;i++) {
494	    for (j = 0;j < nbatoms + 1;j++) {
495                printf("%02d ", transitions[i * (nbatoms + 1) + j]);
496	    }
497	    printf("\n");
498	}
499	printf("\n");
500#endif
501	/*
502	 * Cleanup of the old data
503	 */
504	if (ret->states != NULL) {
505	    for (i = 0;i < ret->nbStates;i++)
506		xmlRegFreeState(ret->states[i]);
507	    xmlFree(ret->states);
508	}
509	ret->states = NULL;
510	ret->nbStates = 0;
511	if (ret->atoms != NULL) {
512	    for (i = 0;i < ret->nbAtoms;i++)
513		xmlRegFreeAtom(ret->atoms[i]);
514	    xmlFree(ret->atoms);
515	}
516	ret->atoms = NULL;
517	ret->nbAtoms = 0;
518
519	ret->compact = transitions;
520	ret->transdata = transdata;
521	ret->stringMap = stringMap;
522	ret->nbstrings = nbatoms;
523	ret->nbstates = nbstates;
524	xmlFree(stateRemap);
525	xmlFree(stringRemap);
526    }
527fail_compact:
528    return(ret);
529}
530
531/**
532 * xmlRegNewParserCtxt:
533 * @string:  the string to parse
534 *
535 * Allocate a new regexp parser context
536 *
537 * Returns the new context or NULL in case of error
538 */
539static xmlRegParserCtxtPtr
540xmlRegNewParserCtxt(const xmlChar *string) {
541    xmlRegParserCtxtPtr ret;
542
543    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
544    if (ret == NULL)
545	return(NULL);
546    memset(ret, 0, sizeof(xmlRegParserCtxt));
547    if (string != NULL)
548	ret->string = xmlStrdup(string);
549    ret->cur = ret->string;
550    ret->neg = 0;
551    ret->error = 0;
552    ret->determinist = -1;
553    return(ret);
554}
555
556/**
557 * xmlRegNewRange:
558 * @ctxt:  the regexp parser context
559 * @neg:  is that negative
560 * @type:  the type of range
561 * @start:  the start codepoint
562 * @end:  the end codepoint
563 *
564 * Allocate a new regexp range
565 *
566 * Returns the new range or NULL in case of error
567 */
568static xmlRegRangePtr
569xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
570	       int neg, xmlRegAtomType type, int start, int end) {
571    xmlRegRangePtr ret;
572
573    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
574    if (ret == NULL) {
575	ERROR("failed to allocate regexp range");
576	return(NULL);
577    }
578    ret->neg = neg;
579    ret->type = type;
580    ret->start = start;
581    ret->end = end;
582    return(ret);
583}
584
585/**
586 * xmlRegFreeRange:
587 * @range:  the regexp range
588 *
589 * Free a regexp range
590 */
591static void
592xmlRegFreeRange(xmlRegRangePtr range) {
593    if (range == NULL)
594	return;
595
596    if (range->blockName != NULL)
597	xmlFree(range->blockName);
598    xmlFree(range);
599}
600
601/**
602 * xmlRegNewAtom:
603 * @ctxt:  the regexp parser context
604 * @type:  the type of atom
605 *
606 * Allocate a new regexp range
607 *
608 * Returns the new atom or NULL in case of error
609 */
610static xmlRegAtomPtr
611xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
612    xmlRegAtomPtr ret;
613
614    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
615    if (ret == NULL) {
616	ERROR("failed to allocate regexp atom");
617	return(NULL);
618    }
619    memset(ret, 0, sizeof(xmlRegAtom));
620    ret->type = type;
621    ret->quant = XML_REGEXP_QUANT_ONCE;
622    ret->min = 0;
623    ret->max = 0;
624    return(ret);
625}
626
627/**
628 * xmlRegFreeAtom:
629 * @atom:  the regexp atom
630 *
631 * Free a regexp atom
632 */
633static void
634xmlRegFreeAtom(xmlRegAtomPtr atom) {
635    int i;
636
637    if (atom == NULL)
638	return;
639
640    for (i = 0;i < atom->nbRanges;i++)
641	xmlRegFreeRange(atom->ranges[i]);
642    if (atom->ranges != NULL)
643	xmlFree(atom->ranges);
644    if (atom->type == XML_REGEXP_STRING)
645	xmlFree(atom->valuep);
646    xmlFree(atom);
647}
648
649static xmlRegStatePtr
650xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
651    xmlRegStatePtr ret;
652
653    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
654    if (ret == NULL) {
655	ERROR("failed to allocate regexp state");
656	return(NULL);
657    }
658    memset(ret, 0, sizeof(xmlRegState));
659    ret->type = XML_REGEXP_TRANS_STATE;
660    ret->mark = XML_REGEXP_MARK_NORMAL;
661    return(ret);
662}
663
664/**
665 * xmlRegFreeState:
666 * @state:  the regexp state
667 *
668 * Free a regexp state
669 */
670static void
671xmlRegFreeState(xmlRegStatePtr state) {
672    if (state == NULL)
673	return;
674
675    if (state->trans != NULL)
676	xmlFree(state->trans);
677    xmlFree(state);
678}
679
680/**
681 * xmlRegFreeParserCtxt:
682 * @ctxt:  the regexp parser context
683 *
684 * Free a regexp parser context
685 */
686static void
687xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
688    int i;
689    if (ctxt == NULL)
690	return;
691
692    if (ctxt->string != NULL)
693	xmlFree(ctxt->string);
694    if (ctxt->states != NULL) {
695	for (i = 0;i < ctxt->nbStates;i++)
696	    xmlRegFreeState(ctxt->states[i]);
697	xmlFree(ctxt->states);
698    }
699    if (ctxt->atoms != NULL) {
700	for (i = 0;i < ctxt->nbAtoms;i++)
701	    xmlRegFreeAtom(ctxt->atoms[i]);
702	xmlFree(ctxt->atoms);
703    }
704    if (ctxt->counters != NULL)
705	xmlFree(ctxt->counters);
706    xmlFree(ctxt);
707}
708
709/************************************************************************
710 * 									*
711 * 			Display of Data structures			*
712 * 									*
713 ************************************************************************/
714
715static void
716xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
717    switch (type) {
718        case XML_REGEXP_EPSILON:
719	    fprintf(output, "epsilon "); break;
720        case XML_REGEXP_CHARVAL:
721	    fprintf(output, "charval "); break;
722        case XML_REGEXP_RANGES:
723	    fprintf(output, "ranges "); break;
724        case XML_REGEXP_SUBREG:
725	    fprintf(output, "subexpr "); break;
726        case XML_REGEXP_STRING:
727	    fprintf(output, "string "); break;
728        case XML_REGEXP_ANYCHAR:
729	    fprintf(output, "anychar "); break;
730        case XML_REGEXP_ANYSPACE:
731	    fprintf(output, "anyspace "); break;
732        case XML_REGEXP_NOTSPACE:
733	    fprintf(output, "notspace "); break;
734        case XML_REGEXP_INITNAME:
735	    fprintf(output, "initname "); break;
736        case XML_REGEXP_NOTINITNAME:
737	    fprintf(output, "notinitname "); break;
738        case XML_REGEXP_NAMECHAR:
739	    fprintf(output, "namechar "); break;
740        case XML_REGEXP_NOTNAMECHAR:
741	    fprintf(output, "notnamechar "); break;
742        case XML_REGEXP_DECIMAL:
743	    fprintf(output, "decimal "); break;
744        case XML_REGEXP_NOTDECIMAL:
745	    fprintf(output, "notdecimal "); break;
746        case XML_REGEXP_REALCHAR:
747	    fprintf(output, "realchar "); break;
748        case XML_REGEXP_NOTREALCHAR:
749	    fprintf(output, "notrealchar "); break;
750        case XML_REGEXP_LETTER:
751            fprintf(output, "LETTER "); break;
752        case XML_REGEXP_LETTER_UPPERCASE:
753            fprintf(output, "LETTER_UPPERCASE "); break;
754        case XML_REGEXP_LETTER_LOWERCASE:
755            fprintf(output, "LETTER_LOWERCASE "); break;
756        case XML_REGEXP_LETTER_TITLECASE:
757            fprintf(output, "LETTER_TITLECASE "); break;
758        case XML_REGEXP_LETTER_MODIFIER:
759            fprintf(output, "LETTER_MODIFIER "); break;
760        case XML_REGEXP_LETTER_OTHERS:
761            fprintf(output, "LETTER_OTHERS "); break;
762        case XML_REGEXP_MARK:
763            fprintf(output, "MARK "); break;
764        case XML_REGEXP_MARK_NONSPACING:
765            fprintf(output, "MARK_NONSPACING "); break;
766        case XML_REGEXP_MARK_SPACECOMBINING:
767            fprintf(output, "MARK_SPACECOMBINING "); break;
768        case XML_REGEXP_MARK_ENCLOSING:
769            fprintf(output, "MARK_ENCLOSING "); break;
770        case XML_REGEXP_NUMBER:
771            fprintf(output, "NUMBER "); break;
772        case XML_REGEXP_NUMBER_DECIMAL:
773            fprintf(output, "NUMBER_DECIMAL "); break;
774        case XML_REGEXP_NUMBER_LETTER:
775            fprintf(output, "NUMBER_LETTER "); break;
776        case XML_REGEXP_NUMBER_OTHERS:
777            fprintf(output, "NUMBER_OTHERS "); break;
778        case XML_REGEXP_PUNCT:
779            fprintf(output, "PUNCT "); break;
780        case XML_REGEXP_PUNCT_CONNECTOR:
781            fprintf(output, "PUNCT_CONNECTOR "); break;
782        case XML_REGEXP_PUNCT_DASH:
783            fprintf(output, "PUNCT_DASH "); break;
784        case XML_REGEXP_PUNCT_OPEN:
785            fprintf(output, "PUNCT_OPEN "); break;
786        case XML_REGEXP_PUNCT_CLOSE:
787            fprintf(output, "PUNCT_CLOSE "); break;
788        case XML_REGEXP_PUNCT_INITQUOTE:
789            fprintf(output, "PUNCT_INITQUOTE "); break;
790        case XML_REGEXP_PUNCT_FINQUOTE:
791            fprintf(output, "PUNCT_FINQUOTE "); break;
792        case XML_REGEXP_PUNCT_OTHERS:
793            fprintf(output, "PUNCT_OTHERS "); break;
794        case XML_REGEXP_SEPAR:
795            fprintf(output, "SEPAR "); break;
796        case XML_REGEXP_SEPAR_SPACE:
797            fprintf(output, "SEPAR_SPACE "); break;
798        case XML_REGEXP_SEPAR_LINE:
799            fprintf(output, "SEPAR_LINE "); break;
800        case XML_REGEXP_SEPAR_PARA:
801            fprintf(output, "SEPAR_PARA "); break;
802        case XML_REGEXP_SYMBOL:
803            fprintf(output, "SYMBOL "); break;
804        case XML_REGEXP_SYMBOL_MATH:
805            fprintf(output, "SYMBOL_MATH "); break;
806        case XML_REGEXP_SYMBOL_CURRENCY:
807            fprintf(output, "SYMBOL_CURRENCY "); break;
808        case XML_REGEXP_SYMBOL_MODIFIER:
809            fprintf(output, "SYMBOL_MODIFIER "); break;
810        case XML_REGEXP_SYMBOL_OTHERS:
811            fprintf(output, "SYMBOL_OTHERS "); break;
812        case XML_REGEXP_OTHER:
813            fprintf(output, "OTHER "); break;
814        case XML_REGEXP_OTHER_CONTROL:
815            fprintf(output, "OTHER_CONTROL "); break;
816        case XML_REGEXP_OTHER_FORMAT:
817            fprintf(output, "OTHER_FORMAT "); break;
818        case XML_REGEXP_OTHER_PRIVATE:
819            fprintf(output, "OTHER_PRIVATE "); break;
820        case XML_REGEXP_OTHER_NA:
821            fprintf(output, "OTHER_NA "); break;
822        case XML_REGEXP_BLOCK_NAME:
823	    fprintf(output, "BLOCK "); break;
824    }
825}
826
827static void
828xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
829    switch (type) {
830        case XML_REGEXP_QUANT_EPSILON:
831	    fprintf(output, "epsilon "); break;
832        case XML_REGEXP_QUANT_ONCE:
833	    fprintf(output, "once "); break;
834        case XML_REGEXP_QUANT_OPT:
835	    fprintf(output, "? "); break;
836        case XML_REGEXP_QUANT_MULT:
837	    fprintf(output, "* "); break;
838        case XML_REGEXP_QUANT_PLUS:
839	    fprintf(output, "+ "); break;
840	case XML_REGEXP_QUANT_RANGE:
841	    fprintf(output, "range "); break;
842	case XML_REGEXP_QUANT_ONCEONLY:
843	    fprintf(output, "onceonly "); break;
844	case XML_REGEXP_QUANT_ALL:
845	    fprintf(output, "all "); break;
846    }
847}
848static void
849xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
850    fprintf(output, "  range: ");
851    if (range->neg)
852	fprintf(output, "negative ");
853    xmlRegPrintAtomType(output, range->type);
854    fprintf(output, "%c - %c\n", range->start, range->end);
855}
856
857static void
858xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
859    fprintf(output, " atom: ");
860    if (atom == NULL) {
861	fprintf(output, "NULL\n");
862	return;
863    }
864    xmlRegPrintAtomType(output, atom->type);
865    xmlRegPrintQuantType(output, atom->quant);
866    if (atom->quant == XML_REGEXP_QUANT_RANGE)
867	fprintf(output, "%d-%d ", atom->min, atom->max);
868    if (atom->type == XML_REGEXP_STRING)
869	fprintf(output, "'%s' ", (char *) atom->valuep);
870    if (atom->type == XML_REGEXP_CHARVAL)
871	fprintf(output, "char %c\n", atom->codepoint);
872    else if (atom->type == XML_REGEXP_RANGES) {
873	int i;
874	fprintf(output, "%d entries\n", atom->nbRanges);
875	for (i = 0; i < atom->nbRanges;i++)
876	    xmlRegPrintRange(output, atom->ranges[i]);
877    } else if (atom->type == XML_REGEXP_SUBREG) {
878	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
879    } else {
880	fprintf(output, "\n");
881    }
882}
883
884static void
885xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
886    fprintf(output, "  trans: ");
887    if (trans == NULL) {
888	fprintf(output, "NULL\n");
889	return;
890    }
891    if (trans->to < 0) {
892	fprintf(output, "removed\n");
893	return;
894    }
895    if (trans->counter >= 0) {
896	fprintf(output, "counted %d, ", trans->counter);
897    }
898    if (trans->count == REGEXP_ALL_COUNTER) {
899	fprintf(output, "all transition, ");
900    } else if (trans->count >= 0) {
901	fprintf(output, "count based %d, ", trans->count);
902    }
903    if (trans->atom == NULL) {
904	fprintf(output, "epsilon to %d\n", trans->to);
905	return;
906    }
907    if (trans->atom->type == XML_REGEXP_CHARVAL)
908	fprintf(output, "char %c ", trans->atom->codepoint);
909    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
910}
911
912static void
913xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
914    int i;
915
916    fprintf(output, " state: ");
917    if (state == NULL) {
918	fprintf(output, "NULL\n");
919	return;
920    }
921    if (state->type == XML_REGEXP_START_STATE)
922	fprintf(output, "START ");
923    if (state->type == XML_REGEXP_FINAL_STATE)
924	fprintf(output, "FINAL ");
925
926    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
927    for (i = 0;i < state->nbTrans; i++) {
928	xmlRegPrintTrans(output, &(state->trans[i]));
929    }
930}
931
932#ifdef DEBUG_REGEXP_GRAPH
933static void
934xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
935    int i;
936
937    fprintf(output, " ctxt: ");
938    if (ctxt == NULL) {
939	fprintf(output, "NULL\n");
940	return;
941    }
942    fprintf(output, "'%s' ", ctxt->string);
943    if (ctxt->error)
944	fprintf(output, "error ");
945    if (ctxt->neg)
946	fprintf(output, "neg ");
947    fprintf(output, "\n");
948    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
949    for (i = 0;i < ctxt->nbAtoms; i++) {
950	fprintf(output, " %02d ", i);
951	xmlRegPrintAtom(output, ctxt->atoms[i]);
952    }
953    if (ctxt->atom != NULL) {
954	fprintf(output, "current atom:\n");
955	xmlRegPrintAtom(output, ctxt->atom);
956    }
957    fprintf(output, "%d states:", ctxt->nbStates);
958    if (ctxt->start != NULL)
959	fprintf(output, " start: %d", ctxt->start->no);
960    if (ctxt->end != NULL)
961	fprintf(output, " end: %d", ctxt->end->no);
962    fprintf(output, "\n");
963    for (i = 0;i < ctxt->nbStates; i++) {
964	xmlRegPrintState(output, ctxt->states[i]);
965    }
966    fprintf(output, "%d counters:\n", ctxt->nbCounters);
967    for (i = 0;i < ctxt->nbCounters; i++) {
968	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
969		                                ctxt->counters[i].max);
970    }
971}
972#endif
973
974/************************************************************************
975 * 									*
976 *		 Finite Automata structures manipulations		*
977 * 									*
978 ************************************************************************/
979
980static void
981xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
982	           int neg, xmlRegAtomType type, int start, int end,
983		   xmlChar *blockName) {
984    xmlRegRangePtr range;
985
986    if (atom == NULL) {
987	ERROR("add range: atom is NULL");
988	return;
989    }
990    if (atom->type != XML_REGEXP_RANGES) {
991	ERROR("add range: atom is not ranges");
992	return;
993    }
994    if (atom->maxRanges == 0) {
995	atom->maxRanges = 4;
996	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
997		                             sizeof(xmlRegRangePtr));
998	if (atom->ranges == NULL) {
999	    ERROR("add range: allocation failed");
1000	    atom->maxRanges = 0;
1001	    return;
1002	}
1003    } else if (atom->nbRanges >= atom->maxRanges) {
1004	xmlRegRangePtr *tmp;
1005	atom->maxRanges *= 2;
1006	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
1007		                             sizeof(xmlRegRangePtr));
1008	if (tmp == NULL) {
1009	    ERROR("add range: allocation failed");
1010	    atom->maxRanges /= 2;
1011	    return;
1012	}
1013	atom->ranges = tmp;
1014    }
1015    range = xmlRegNewRange(ctxt, neg, type, start, end);
1016    if (range == NULL)
1017	return;
1018    range->blockName = blockName;
1019    atom->ranges[atom->nbRanges++] = range;
1020
1021}
1022
1023static int
1024xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
1025    if (ctxt->maxCounters == 0) {
1026	ctxt->maxCounters = 4;
1027	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
1028		                             sizeof(xmlRegCounter));
1029	if (ctxt->counters == NULL) {
1030	    ERROR("reg counter: allocation failed");
1031	    ctxt->maxCounters = 0;
1032	    return(-1);
1033	}
1034    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
1035	xmlRegCounter *tmp;
1036	ctxt->maxCounters *= 2;
1037	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
1038		                           sizeof(xmlRegCounter));
1039	if (tmp == NULL) {
1040	    ERROR("reg counter: allocation failed");
1041	    ctxt->maxCounters /= 2;
1042	    return(-1);
1043	}
1044	ctxt->counters = tmp;
1045    }
1046    ctxt->counters[ctxt->nbCounters].min = -1;
1047    ctxt->counters[ctxt->nbCounters].max = -1;
1048    return(ctxt->nbCounters++);
1049}
1050
1051static void
1052xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
1053    if (atom == NULL) {
1054	ERROR("atom push: atom is NULL");
1055	return;
1056    }
1057    if (ctxt->maxAtoms == 0) {
1058	ctxt->maxAtoms = 4;
1059	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
1060		                             sizeof(xmlRegAtomPtr));
1061	if (ctxt->atoms == NULL) {
1062	    ERROR("atom push: allocation failed");
1063	    ctxt->maxAtoms = 0;
1064	    return;
1065	}
1066    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
1067	xmlRegAtomPtr *tmp;
1068	ctxt->maxAtoms *= 2;
1069	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
1070		                             sizeof(xmlRegAtomPtr));
1071	if (tmp == NULL) {
1072	    ERROR("atom push: allocation failed");
1073	    ctxt->maxAtoms /= 2;
1074	    return;
1075	}
1076	ctxt->atoms = tmp;
1077    }
1078    atom->no = ctxt->nbAtoms;
1079    ctxt->atoms[ctxt->nbAtoms++] = atom;
1080}
1081
1082static void
1083xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1084	            xmlRegAtomPtr atom, xmlRegStatePtr target,
1085		    int counter, int count) {
1086    if (state == NULL) {
1087	ERROR("add state: state is NULL");
1088	return;
1089    }
1090    if (target == NULL) {
1091	ERROR("add state: target is NULL");
1092	return;
1093    }
1094    if (state->maxTrans == 0) {
1095	state->maxTrans = 4;
1096	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
1097		                             sizeof(xmlRegTrans));
1098	if (state->trans == NULL) {
1099	    ERROR("add range: allocation failed");
1100	    state->maxTrans = 0;
1101	    return;
1102	}
1103    } else if (state->nbTrans >= state->maxTrans) {
1104	xmlRegTrans *tmp;
1105	state->maxTrans *= 2;
1106	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
1107		                             sizeof(xmlRegTrans));
1108	if (tmp == NULL) {
1109	    ERROR("add range: allocation failed");
1110	    state->maxTrans /= 2;
1111	    return;
1112	}
1113	state->trans = tmp;
1114    }
1115#ifdef DEBUG_REGEXP_GRAPH
1116    printf("Add trans from %d to %d ", state->no, target->no);
1117    if (count == REGEXP_ALL_COUNTER)
1118	printf("all transition");
1119    else if (count >= 0)
1120	printf("count based %d", count);
1121    else if (counter >= 0)
1122	printf("counted %d", counter);
1123    else if (atom == NULL)
1124	printf("epsilon transition");
1125    printf("\n");
1126#endif
1127
1128    state->trans[state->nbTrans].atom = atom;
1129    state->trans[state->nbTrans].to = target->no;
1130    state->trans[state->nbTrans].counter = counter;
1131    state->trans[state->nbTrans].count = count;
1132    state->nbTrans++;
1133}
1134
1135static void
1136xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
1137    if (ctxt->maxStates == 0) {
1138	ctxt->maxStates = 4;
1139	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
1140		                             sizeof(xmlRegStatePtr));
1141	if (ctxt->states == NULL) {
1142	    ERROR("add range: allocation failed");
1143	    ctxt->maxStates = 0;
1144	    return;
1145	}
1146    } else if (ctxt->nbStates >= ctxt->maxStates) {
1147	xmlRegStatePtr *tmp;
1148	ctxt->maxStates *= 2;
1149	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
1150		                             sizeof(xmlRegStatePtr));
1151	if (tmp == NULL) {
1152	    ERROR("add range: allocation failed");
1153	    ctxt->maxStates /= 2;
1154	    return;
1155	}
1156	ctxt->states = tmp;
1157    }
1158    state->no = ctxt->nbStates;
1159    ctxt->states[ctxt->nbStates++] = state;
1160}
1161
1162/**
1163 * xmlFAGenerateAllTransition:
1164 * @ctxt:  a regexp parser context
1165 * @from:  the from state
1166 * @to:  the target state or NULL for building a new one
1167 * @lax:
1168 *
1169 */
1170static void
1171xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
1172			   xmlRegStatePtr from, xmlRegStatePtr to,
1173			   int lax) {
1174    if (to == NULL) {
1175	to = xmlRegNewState(ctxt);
1176	xmlRegStatePush(ctxt, to);
1177	ctxt->state = to;
1178    }
1179    if (lax)
1180	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
1181    else
1182	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
1183}
1184
1185/**
1186 * xmlFAGenerateEpsilonTransition:
1187 * @ctxt:  a regexp parser context
1188 * @from:  the from state
1189 * @to:  the target state or NULL for building a new one
1190 *
1191 */
1192static void
1193xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1194			       xmlRegStatePtr from, xmlRegStatePtr to) {
1195    if (to == NULL) {
1196	to = xmlRegNewState(ctxt);
1197	xmlRegStatePush(ctxt, to);
1198	ctxt->state = to;
1199    }
1200    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
1201}
1202
1203/**
1204 * xmlFAGenerateCountedEpsilonTransition:
1205 * @ctxt:  a regexp parser context
1206 * @from:  the from state
1207 * @to:  the target state or NULL for building a new one
1208 * counter:  the counter for that transition
1209 *
1210 */
1211static void
1212xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
1213	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1214    if (to == NULL) {
1215	to = xmlRegNewState(ctxt);
1216	xmlRegStatePush(ctxt, to);
1217	ctxt->state = to;
1218    }
1219    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1220}
1221
1222/**
1223 * xmlFAGenerateCountedTransition:
1224 * @ctxt:  a regexp parser context
1225 * @from:  the from state
1226 * @to:  the target state or NULL for building a new one
1227 * counter:  the counter for that transition
1228 *
1229 */
1230static void
1231xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1232	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1233    if (to == NULL) {
1234	to = xmlRegNewState(ctxt);
1235	xmlRegStatePush(ctxt, to);
1236	ctxt->state = to;
1237    }
1238    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1239}
1240
1241/**
1242 * xmlFAGenerateTransitions:
1243 * @ctxt:  a regexp parser context
1244 * @from:  the from state
1245 * @to:  the target state or NULL for building a new one
1246 * @atom:  the atom generating the transition
1247 *
1248 */
1249static void
1250xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1251	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1252    if (atom == NULL) {
1253	ERROR("genrate transition: atom == NULL");
1254	return;
1255    }
1256    if (atom->type == XML_REGEXP_SUBREG) {
1257	/*
1258	 * this is a subexpression handling one should not need to
1259	 * create a new node excep for XML_REGEXP_QUANT_RANGE.
1260	 */
1261	xmlRegAtomPush(ctxt, atom);
1262	if ((to != NULL) && (atom->stop != to) &&
1263	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1264	    /*
1265	     * Generate an epsilon transition to link to the target
1266	     */
1267	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1268	}
1269	switch (atom->quant) {
1270	    case XML_REGEXP_QUANT_OPT:
1271		atom->quant = XML_REGEXP_QUANT_ONCE;
1272		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1273		break;
1274	    case XML_REGEXP_QUANT_MULT:
1275		atom->quant = XML_REGEXP_QUANT_ONCE;
1276		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1277		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1278		break;
1279	    case XML_REGEXP_QUANT_PLUS:
1280		atom->quant = XML_REGEXP_QUANT_ONCE;
1281		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1282		break;
1283	    case XML_REGEXP_QUANT_RANGE: {
1284		int counter;
1285		xmlRegStatePtr newstate;
1286
1287		/*
1288		 * This one is nasty:
1289		 *   1/ register a new counter
1290		 *   2/ register an epsilon transition associated to
1291		 *      this counter going from atom->stop to atom->start
1292		 *   3/ create a new state
1293		 *   4/ generate a counted transition from atom->stop to
1294		 *      that state
1295		 */
1296		counter = xmlRegGetCounter(ctxt);
1297		ctxt->counters[counter].min = atom->min - 1;
1298		ctxt->counters[counter].max = atom->max - 1;
1299		atom->min = 0;
1300		atom->max = 0;
1301		atom->quant = XML_REGEXP_QUANT_ONCE;
1302		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1303			                              atom->start, counter);
1304		if (to != NULL) {
1305		    newstate = to;
1306		} else {
1307		    newstate = xmlRegNewState(ctxt);
1308		    xmlRegStatePush(ctxt, newstate);
1309		    ctxt->state = newstate;
1310		}
1311		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1312			                       newstate, counter);
1313	    }
1314	    default:
1315		break;
1316	}
1317	return;
1318    } else {
1319	if (to == NULL) {
1320	    to = xmlRegNewState(ctxt);
1321	    xmlRegStatePush(ctxt, to);
1322	}
1323	xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1324	xmlRegAtomPush(ctxt, atom);
1325	ctxt->state = to;
1326    }
1327    switch (atom->quant) {
1328	case XML_REGEXP_QUANT_OPT:
1329	    atom->quant = XML_REGEXP_QUANT_ONCE;
1330	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1331	    break;
1332	case XML_REGEXP_QUANT_MULT:
1333	    atom->quant = XML_REGEXP_QUANT_ONCE;
1334	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1335	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1336	    break;
1337	case XML_REGEXP_QUANT_PLUS:
1338	    atom->quant = XML_REGEXP_QUANT_ONCE;
1339	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1340	    break;
1341	default:
1342	    break;
1343    }
1344}
1345
1346/**
1347 * xmlFAReduceEpsilonTransitions:
1348 * @ctxt:  a regexp parser context
1349 * @fromnr:  the from state
1350 * @tonr:  the to state
1351 * @cpunter:  should that transition be associted to a counted
1352 *
1353 */
1354static void
1355xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1356	                      int tonr, int counter) {
1357    int transnr;
1358    xmlRegStatePtr from;
1359    xmlRegStatePtr to;
1360
1361#ifdef DEBUG_REGEXP_GRAPH
1362    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1363#endif
1364    from = ctxt->states[fromnr];
1365    if (from == NULL)
1366	return;
1367    to = ctxt->states[tonr];
1368    if (to == NULL)
1369	return;
1370    if ((to->mark == XML_REGEXP_MARK_START) ||
1371	(to->mark == XML_REGEXP_MARK_VISITED))
1372	return;
1373
1374    to->mark = XML_REGEXP_MARK_VISITED;
1375    if (to->type == XML_REGEXP_FINAL_STATE) {
1376#ifdef DEBUG_REGEXP_GRAPH
1377	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1378#endif
1379	from->type = XML_REGEXP_FINAL_STATE;
1380    }
1381    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1382	if (to->trans[transnr].atom == NULL) {
1383	    /*
1384	     * Don't remove counted transitions
1385	     * Don't loop either
1386	     */
1387	    if (to->trans[transnr].to != fromnr) {
1388		if (to->trans[transnr].count >= 0) {
1389		    int newto = to->trans[transnr].to;
1390
1391		    xmlRegStateAddTrans(ctxt, from, NULL,
1392					ctxt->states[newto],
1393					-1, to->trans[transnr].count);
1394		} else {
1395#ifdef DEBUG_REGEXP_GRAPH
1396		    printf("Found epsilon trans %d from %d to %d\n",
1397			   transnr, tonr, to->trans[transnr].to);
1398#endif
1399		    if (to->trans[transnr].counter >= 0) {
1400			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1401					      to->trans[transnr].to,
1402					      to->trans[transnr].counter);
1403		    } else {
1404			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1405					      to->trans[transnr].to,
1406					      counter);
1407		    }
1408		}
1409	    }
1410	} else {
1411	    int newto = to->trans[transnr].to;
1412
1413	    if (to->trans[transnr].counter >= 0) {
1414		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1415				    ctxt->states[newto],
1416				    to->trans[transnr].counter, -1);
1417	    } else {
1418		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1419				    ctxt->states[newto], counter, -1);
1420	    }
1421	}
1422    }
1423    to->mark = XML_REGEXP_MARK_NORMAL;
1424}
1425
1426/**
1427 * xmlFAEliminateEpsilonTransitions:
1428 * @ctxt:  a regexp parser context
1429 *
1430 */
1431static void
1432xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1433    int statenr, transnr;
1434    xmlRegStatePtr state;
1435
1436    /*
1437     * build the completed transitions bypassing the epsilons
1438     * Use a marking algorithm to avoid loops
1439     */
1440    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1441	state = ctxt->states[statenr];
1442	if (state == NULL)
1443	    continue;
1444	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1445	    if ((state->trans[transnr].atom == NULL) &&
1446		(state->trans[transnr].to >= 0)) {
1447		if (state->trans[transnr].to == statenr) {
1448		    state->trans[transnr].to = -1;
1449#ifdef DEBUG_REGEXP_GRAPH
1450		    printf("Removed loopback epsilon trans %d on %d\n",
1451			   transnr, statenr);
1452#endif
1453		} else if (state->trans[transnr].count < 0) {
1454		    int newto = state->trans[transnr].to;
1455
1456#ifdef DEBUG_REGEXP_GRAPH
1457		    printf("Found epsilon trans %d from %d to %d\n",
1458			   transnr, statenr, newto);
1459#endif
1460		    state->mark = XML_REGEXP_MARK_START;
1461		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1462				      newto, state->trans[transnr].counter);
1463		    state->mark = XML_REGEXP_MARK_NORMAL;
1464#ifdef DEBUG_REGEXP_GRAPH
1465		} else {
1466		    printf("Found counted transition %d on %d\n",
1467			   transnr, statenr);
1468#endif
1469	        }
1470	    }
1471	}
1472    }
1473    /*
1474     * Eliminate the epsilon transitions
1475     */
1476    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1477	state = ctxt->states[statenr];
1478	if (state == NULL)
1479	    continue;
1480	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1481	    if ((state->trans[transnr].atom == NULL) &&
1482		(state->trans[transnr].count < 0) &&
1483		(state->trans[transnr].to >= 0)) {
1484		state->trans[transnr].to = -1;
1485	    }
1486	}
1487    }
1488
1489    /*
1490     * Use this pass to detect unreachable states too
1491     */
1492    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1493	state = ctxt->states[statenr];
1494	if (state != NULL)
1495	    state->reached = 0;
1496    }
1497    state = ctxt->states[0];
1498    if (state != NULL)
1499	state->reached = 1;
1500    while (state != NULL) {
1501	xmlRegStatePtr target = NULL;
1502	state->reached = 2;
1503	/*
1504	 * Mark all state reachable from the current reachable state
1505	 */
1506	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1507	    if ((state->trans[transnr].to >= 0) &&
1508		((state->trans[transnr].atom != NULL) ||
1509		 (state->trans[transnr].count >= 0))) {
1510		int newto = state->trans[transnr].to;
1511
1512		if (ctxt->states[newto] == NULL)
1513		    continue;
1514		if (ctxt->states[newto]->reached == 0) {
1515		    ctxt->states[newto]->reached = 1;
1516		    target = ctxt->states[newto];
1517		}
1518	    }
1519	}
1520	/*
1521	 * find the next accessible state not explored
1522	 */
1523	if (target == NULL) {
1524	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
1525		state = ctxt->states[statenr];
1526		if ((state != NULL) && (state->reached == 1)) {
1527		    target = state;
1528		    break;
1529		}
1530	    }
1531	}
1532	state = target;
1533    }
1534    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1535	state = ctxt->states[statenr];
1536	if ((state != NULL) && (state->reached == 0)) {
1537#ifdef DEBUG_REGEXP_GRAPH
1538	    printf("Removed unreachable state %d\n", statenr);
1539#endif
1540	    xmlRegFreeState(state);
1541	    ctxt->states[statenr] = NULL;
1542	}
1543    }
1544
1545}
1546
1547/**
1548 * xmlFACompareAtoms:
1549 * @atom1:  an atom
1550 * @atom2:  an atom
1551 *
1552 * Compares two atoms to check whether they are equivatents
1553 *
1554 * Returns 1 if yes and 0 otherwise
1555 */
1556static int
1557xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
1558    if (atom1 == atom2)
1559	return(1);
1560    if ((atom1 == NULL) || (atom2 == NULL))
1561	return(0);
1562
1563    if (atom1->type != atom2->type)
1564	return(0);
1565    switch (atom1->type) {
1566        case XML_REGEXP_STRING:
1567	    return(xmlStrEqual((xmlChar *)atom1->valuep,
1568			       (xmlChar *)atom2->valuep));
1569        case XML_REGEXP_EPSILON:
1570	    return(1);
1571        case XML_REGEXP_CHARVAL:
1572	    return(atom1->codepoint == atom2->codepoint);
1573        case XML_REGEXP_RANGES:
1574	    TODO;
1575	    return(0);
1576	default:
1577	    break;
1578    }
1579    return(1);
1580}
1581
1582/**
1583 * xmlFARecurseDeterminism:
1584 * @ctxt:  a regexp parser context
1585 *
1586 * Check whether the associated regexp is determinist,
1587 * should be called after xmlFAEliminateEpsilonTransitions()
1588 *
1589 */
1590static int
1591xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
1592	                 int to, xmlRegAtomPtr atom) {
1593    int ret = 1;
1594    int transnr;
1595    xmlRegTransPtr t1;
1596
1597    if (state == NULL)
1598	return(ret);
1599    for (transnr = 0;transnr < state->nbTrans;transnr++) {
1600	t1 = &(state->trans[transnr]);
1601	/*
1602	 * check transitions conflicting with the one looked at
1603	 */
1604	if (t1->atom == NULL) {
1605	    if (t1->to == -1)
1606		continue;
1607	    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1608		                           to, atom);
1609	    if (ret == 0)
1610		return(0);
1611	    continue;
1612	}
1613	if (t1->to != to)
1614	    continue;
1615	if (xmlFACompareAtoms(t1->atom, atom))
1616	    return(0);
1617    }
1618    return(ret);
1619}
1620
1621/**
1622 * xmlFAComputesDeterminism:
1623 * @ctxt:  a regexp parser context
1624 *
1625 * Check whether the associated regexp is determinist,
1626 * should be called after xmlFAEliminateEpsilonTransitions()
1627 *
1628 */
1629static int
1630xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
1631    int statenr, transnr;
1632    xmlRegStatePtr state;
1633    xmlRegTransPtr t1, t2;
1634    int i;
1635    int ret = 1;
1636
1637#ifdef DEBUG_REGEXP_GRAPH
1638    printf("xmlFAComputesDeterminism\n");
1639    xmlRegPrintCtxt(stdout, ctxt);
1640#endif
1641    if (ctxt->determinist != -1)
1642	return(ctxt->determinist);
1643
1644    /*
1645     * Check for all states that there isn't 2 transitions
1646     * with the same atom and a different target.
1647     */
1648    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1649	state = ctxt->states[statenr];
1650	if (state == NULL)
1651	    continue;
1652	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1653	    t1 = &(state->trans[transnr]);
1654	    /*
1655	     * Determinism checks in case of counted or all transitions
1656	     * will have to be handled separately
1657	     */
1658	    if (t1->atom == NULL)
1659		continue;
1660	    if (t1->to == -1) /* eliminated */
1661		continue;
1662	    for (i = 0;i < transnr;i++) {
1663		t2 = &(state->trans[i]);
1664		if (t2->to == -1) /* eliminated */
1665		    continue;
1666		if (t2->atom != NULL) {
1667		    if (t1->to == t2->to) {
1668			if (xmlFACompareAtoms(t1->atom, t2->atom))
1669			    t2->to = -1; /* eliminate */
1670		    } else {
1671			/* not determinist ! */
1672			if (xmlFACompareAtoms(t1->atom, t2->atom))
1673			    ret = 0;
1674		    }
1675		} else if (t1->to != -1) {
1676		    /*
1677		     * do the closure in case of remaining specific
1678		     * epsilon transitions like choices or all
1679		     */
1680		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
1681						   t2->to, t2->atom);
1682		    if (ret == 0)
1683			return(0);
1684		}
1685	    }
1686	    if (ret == 0)
1687		break;
1688	}
1689	if (ret == 0)
1690	    break;
1691    }
1692    ctxt->determinist = ret;
1693    return(ret);
1694}
1695
1696/************************************************************************
1697 * 									*
1698 *	Routines to check input against transition atoms		*
1699 * 									*
1700 ************************************************************************/
1701
1702static int
1703xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
1704	                  int start, int end, const xmlChar *blockName) {
1705    int ret = 0;
1706
1707    switch (type) {
1708        case XML_REGEXP_STRING:
1709        case XML_REGEXP_SUBREG:
1710        case XML_REGEXP_RANGES:
1711        case XML_REGEXP_EPSILON:
1712	    return(-1);
1713        case XML_REGEXP_ANYCHAR:
1714	    ret = ((codepoint != '\n') && (codepoint != '\r'));
1715	    break;
1716        case XML_REGEXP_CHARVAL:
1717	    ret = ((codepoint >= start) && (codepoint <= end));
1718	    break;
1719        case XML_REGEXP_NOTSPACE:
1720	    neg = !neg;
1721        case XML_REGEXP_ANYSPACE:
1722	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
1723		   (codepoint == '\t') || (codepoint == ' '));
1724	    break;
1725        case XML_REGEXP_NOTINITNAME:
1726	    neg = !neg;
1727        case XML_REGEXP_INITNAME:
1728	    ret = (xmlIsLetter(codepoint) ||
1729		   (codepoint == '_') || (codepoint == ':'));
1730	    break;
1731        case XML_REGEXP_NOTNAMECHAR:
1732	    neg = !neg;
1733        case XML_REGEXP_NAMECHAR:
1734	    ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
1735		   (codepoint == '.') || (codepoint == '-') ||
1736		   (codepoint == '_') || (codepoint == ':') ||
1737		   xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
1738	    break;
1739        case XML_REGEXP_NOTDECIMAL:
1740	    neg = !neg;
1741        case XML_REGEXP_DECIMAL:
1742	    ret = xmlUCSIsCatNd(codepoint);
1743	    break;
1744        case XML_REGEXP_REALCHAR:
1745	    neg = !neg;
1746        case XML_REGEXP_NOTREALCHAR:
1747	    ret = xmlUCSIsCatP(codepoint);
1748	    if (ret == 0)
1749		ret = xmlUCSIsCatZ(codepoint);
1750	    if (ret == 0)
1751		ret = xmlUCSIsCatC(codepoint);
1752	    break;
1753        case XML_REGEXP_LETTER:
1754	    ret = xmlUCSIsCatL(codepoint);
1755	    break;
1756        case XML_REGEXP_LETTER_UPPERCASE:
1757	    ret = xmlUCSIsCatLu(codepoint);
1758	    break;
1759        case XML_REGEXP_LETTER_LOWERCASE:
1760	    ret = xmlUCSIsCatLl(codepoint);
1761	    break;
1762        case XML_REGEXP_LETTER_TITLECASE:
1763	    ret = xmlUCSIsCatLt(codepoint);
1764	    break;
1765        case XML_REGEXP_LETTER_MODIFIER:
1766	    ret = xmlUCSIsCatLm(codepoint);
1767	    break;
1768        case XML_REGEXP_LETTER_OTHERS:
1769	    ret = xmlUCSIsCatLo(codepoint);
1770	    break;
1771        case XML_REGEXP_MARK:
1772	    ret = xmlUCSIsCatM(codepoint);
1773	    break;
1774        case XML_REGEXP_MARK_NONSPACING:
1775	    ret = xmlUCSIsCatMn(codepoint);
1776	    break;
1777        case XML_REGEXP_MARK_SPACECOMBINING:
1778	    ret = xmlUCSIsCatMc(codepoint);
1779	    break;
1780        case XML_REGEXP_MARK_ENCLOSING:
1781	    ret = xmlUCSIsCatMe(codepoint);
1782	    break;
1783        case XML_REGEXP_NUMBER:
1784	    ret = xmlUCSIsCatN(codepoint);
1785	    break;
1786        case XML_REGEXP_NUMBER_DECIMAL:
1787	    ret = xmlUCSIsCatNd(codepoint);
1788	    break;
1789        case XML_REGEXP_NUMBER_LETTER:
1790	    ret = xmlUCSIsCatNl(codepoint);
1791	    break;
1792        case XML_REGEXP_NUMBER_OTHERS:
1793	    ret = xmlUCSIsCatNo(codepoint);
1794	    break;
1795        case XML_REGEXP_PUNCT:
1796	    ret = xmlUCSIsCatP(codepoint);
1797	    break;
1798        case XML_REGEXP_PUNCT_CONNECTOR:
1799	    ret = xmlUCSIsCatPc(codepoint);
1800	    break;
1801        case XML_REGEXP_PUNCT_DASH:
1802	    ret = xmlUCSIsCatPd(codepoint);
1803	    break;
1804        case XML_REGEXP_PUNCT_OPEN:
1805	    ret = xmlUCSIsCatPs(codepoint);
1806	    break;
1807        case XML_REGEXP_PUNCT_CLOSE:
1808	    ret = xmlUCSIsCatPe(codepoint);
1809	    break;
1810        case XML_REGEXP_PUNCT_INITQUOTE:
1811	    ret = xmlUCSIsCatPi(codepoint);
1812	    break;
1813        case XML_REGEXP_PUNCT_FINQUOTE:
1814	    ret = xmlUCSIsCatPf(codepoint);
1815	    break;
1816        case XML_REGEXP_PUNCT_OTHERS:
1817	    ret = xmlUCSIsCatPo(codepoint);
1818	    break;
1819        case XML_REGEXP_SEPAR:
1820	    ret = xmlUCSIsCatZ(codepoint);
1821	    break;
1822        case XML_REGEXP_SEPAR_SPACE:
1823	    ret = xmlUCSIsCatZs(codepoint);
1824	    break;
1825        case XML_REGEXP_SEPAR_LINE:
1826	    ret = xmlUCSIsCatZl(codepoint);
1827	    break;
1828        case XML_REGEXP_SEPAR_PARA:
1829	    ret = xmlUCSIsCatZp(codepoint);
1830	    break;
1831        case XML_REGEXP_SYMBOL:
1832	    ret = xmlUCSIsCatS(codepoint);
1833	    break;
1834        case XML_REGEXP_SYMBOL_MATH:
1835	    ret = xmlUCSIsCatSm(codepoint);
1836	    break;
1837        case XML_REGEXP_SYMBOL_CURRENCY:
1838	    ret = xmlUCSIsCatSc(codepoint);
1839	    break;
1840        case XML_REGEXP_SYMBOL_MODIFIER:
1841	    ret = xmlUCSIsCatSk(codepoint);
1842	    break;
1843        case XML_REGEXP_SYMBOL_OTHERS:
1844	    ret = xmlUCSIsCatSo(codepoint);
1845	    break;
1846        case XML_REGEXP_OTHER:
1847	    ret = xmlUCSIsCatC(codepoint);
1848	    break;
1849        case XML_REGEXP_OTHER_CONTROL:
1850	    ret = xmlUCSIsCatCc(codepoint);
1851	    break;
1852        case XML_REGEXP_OTHER_FORMAT:
1853	    ret = xmlUCSIsCatCf(codepoint);
1854	    break;
1855        case XML_REGEXP_OTHER_PRIVATE:
1856	    ret = xmlUCSIsCatCo(codepoint);
1857	    break;
1858        case XML_REGEXP_OTHER_NA:
1859	    /* ret = xmlUCSIsCatCn(codepoint); */
1860	    /* Seems it doesn't exist anymore in recent Unicode releases */
1861	    ret = 0;
1862	    break;
1863        case XML_REGEXP_BLOCK_NAME:
1864	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
1865	    break;
1866    }
1867    if (neg)
1868	return(!ret);
1869    return(ret);
1870}
1871
1872static int
1873xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
1874    int i, ret = 0;
1875    xmlRegRangePtr range;
1876
1877    if ((atom == NULL) || (!xmlIsChar(codepoint)))
1878	return(-1);
1879
1880    switch (atom->type) {
1881        case XML_REGEXP_SUBREG:
1882        case XML_REGEXP_EPSILON:
1883	    return(-1);
1884        case XML_REGEXP_CHARVAL:
1885            return(codepoint == atom->codepoint);
1886        case XML_REGEXP_RANGES: {
1887	    int accept = 0;
1888	    for (i = 0;i < atom->nbRanges;i++) {
1889		range = atom->ranges[i];
1890		if (range->neg) {
1891		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1892						0, range->start, range->end,
1893						range->blockName);
1894		    if (ret != 0)
1895			return(0); /* excluded char */
1896		} else {
1897		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1898						0, range->start, range->end,
1899						range->blockName);
1900		    if (ret != 0)
1901			accept = 1; /* might still be excluded */
1902		}
1903	    }
1904	    return(accept);
1905	}
1906        case XML_REGEXP_STRING:
1907	    printf("TODO: XML_REGEXP_STRING\n");
1908	    return(-1);
1909        case XML_REGEXP_ANYCHAR:
1910        case XML_REGEXP_ANYSPACE:
1911        case XML_REGEXP_NOTSPACE:
1912        case XML_REGEXP_INITNAME:
1913        case XML_REGEXP_NOTINITNAME:
1914        case XML_REGEXP_NAMECHAR:
1915        case XML_REGEXP_NOTNAMECHAR:
1916        case XML_REGEXP_DECIMAL:
1917        case XML_REGEXP_NOTDECIMAL:
1918        case XML_REGEXP_REALCHAR:
1919        case XML_REGEXP_NOTREALCHAR:
1920        case XML_REGEXP_LETTER:
1921        case XML_REGEXP_LETTER_UPPERCASE:
1922        case XML_REGEXP_LETTER_LOWERCASE:
1923        case XML_REGEXP_LETTER_TITLECASE:
1924        case XML_REGEXP_LETTER_MODIFIER:
1925        case XML_REGEXP_LETTER_OTHERS:
1926        case XML_REGEXP_MARK:
1927        case XML_REGEXP_MARK_NONSPACING:
1928        case XML_REGEXP_MARK_SPACECOMBINING:
1929        case XML_REGEXP_MARK_ENCLOSING:
1930        case XML_REGEXP_NUMBER:
1931        case XML_REGEXP_NUMBER_DECIMAL:
1932        case XML_REGEXP_NUMBER_LETTER:
1933        case XML_REGEXP_NUMBER_OTHERS:
1934        case XML_REGEXP_PUNCT:
1935        case XML_REGEXP_PUNCT_CONNECTOR:
1936        case XML_REGEXP_PUNCT_DASH:
1937        case XML_REGEXP_PUNCT_OPEN:
1938        case XML_REGEXP_PUNCT_CLOSE:
1939        case XML_REGEXP_PUNCT_INITQUOTE:
1940        case XML_REGEXP_PUNCT_FINQUOTE:
1941        case XML_REGEXP_PUNCT_OTHERS:
1942        case XML_REGEXP_SEPAR:
1943        case XML_REGEXP_SEPAR_SPACE:
1944        case XML_REGEXP_SEPAR_LINE:
1945        case XML_REGEXP_SEPAR_PARA:
1946        case XML_REGEXP_SYMBOL:
1947        case XML_REGEXP_SYMBOL_MATH:
1948        case XML_REGEXP_SYMBOL_CURRENCY:
1949        case XML_REGEXP_SYMBOL_MODIFIER:
1950        case XML_REGEXP_SYMBOL_OTHERS:
1951        case XML_REGEXP_OTHER:
1952        case XML_REGEXP_OTHER_CONTROL:
1953        case XML_REGEXP_OTHER_FORMAT:
1954        case XML_REGEXP_OTHER_PRIVATE:
1955        case XML_REGEXP_OTHER_NA:
1956	case XML_REGEXP_BLOCK_NAME:
1957	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
1958		                            (const xmlChar *)atom->valuep);
1959	    if (atom->neg)
1960		ret = !ret;
1961	    break;
1962    }
1963    return(ret);
1964}
1965
1966/************************************************************************
1967 * 									*
1968 *	Saving an restoring state of an execution context		*
1969 * 									*
1970 ************************************************************************/
1971
1972#ifdef DEBUG_REGEXP_EXEC
1973static void
1974xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
1975    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
1976    if (exec->inputStack != NULL) {
1977	int i;
1978	printf(": ");
1979	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
1980	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
1981    } else {
1982	printf(": %s", &(exec->inputString[exec->index]));
1983    }
1984    printf("\n");
1985}
1986#endif
1987
1988static void
1989xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
1990#ifdef DEBUG_REGEXP_EXEC
1991    printf("saving ");
1992    exec->transno++;
1993    xmlFARegDebugExec(exec);
1994    exec->transno--;
1995#endif
1996
1997    if (exec->maxRollbacks == 0) {
1998	exec->maxRollbacks = 4;
1999	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
2000		                             sizeof(xmlRegExecRollback));
2001	if (exec->rollbacks == NULL) {
2002	    fprintf(stderr, "exec save: allocation failed");
2003	    exec->maxRollbacks = 0;
2004	    return;
2005	}
2006	memset(exec->rollbacks, 0,
2007	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
2008    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
2009	xmlRegExecRollback *tmp;
2010	int len = exec->maxRollbacks;
2011
2012	exec->maxRollbacks *= 2;
2013	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
2014			exec->maxRollbacks * sizeof(xmlRegExecRollback));
2015	if (tmp == NULL) {
2016	    fprintf(stderr, "exec save: allocation failed");
2017	    exec->maxRollbacks /= 2;
2018	    return;
2019	}
2020	exec->rollbacks = tmp;
2021	tmp = &exec->rollbacks[len];
2022	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
2023    }
2024    exec->rollbacks[exec->nbRollbacks].state = exec->state;
2025    exec->rollbacks[exec->nbRollbacks].index = exec->index;
2026    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
2027    if (exec->comp->nbCounters > 0) {
2028	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2029	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
2030		xmlMalloc(exec->comp->nbCounters * sizeof(int));
2031	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2032		fprintf(stderr, "exec save: allocation failed");
2033		exec->status = -5;
2034		return;
2035	    }
2036	}
2037	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
2038	       exec->comp->nbCounters * sizeof(int));
2039    }
2040    exec->nbRollbacks++;
2041}
2042
2043static void
2044xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
2045    if (exec->nbRollbacks <= 0) {
2046	exec->status = -1;
2047#ifdef DEBUG_REGEXP_EXEC
2048	printf("rollback failed on empty stack\n");
2049#endif
2050	return;
2051    }
2052    exec->nbRollbacks--;
2053    exec->state = exec->rollbacks[exec->nbRollbacks].state;
2054    exec->index = exec->rollbacks[exec->nbRollbacks].index;
2055    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
2056    if (exec->comp->nbCounters > 0) {
2057	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
2058	    fprintf(stderr, "exec save: allocation failed");
2059	    exec->status = -6;
2060	    return;
2061	}
2062	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
2063	       exec->comp->nbCounters * sizeof(int));
2064    }
2065
2066#ifdef DEBUG_REGEXP_EXEC
2067    printf("restored ");
2068    xmlFARegDebugExec(exec);
2069#endif
2070}
2071
2072/************************************************************************
2073 * 									*
2074 *	Verifyer, running an input against a compiled regexp		*
2075 * 									*
2076 ************************************************************************/
2077
2078static int
2079xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
2080    xmlRegExecCtxt execval;
2081    xmlRegExecCtxtPtr exec = &execval;
2082    int ret, codepoint, len;
2083
2084    exec->inputString = content;
2085    exec->index = 0;
2086    exec->determinist = 1;
2087    exec->maxRollbacks = 0;
2088    exec->nbRollbacks = 0;
2089    exec->rollbacks = NULL;
2090    exec->status = 0;
2091    exec->comp = comp;
2092    exec->state = comp->states[0];
2093    exec->transno = 0;
2094    exec->transcount = 0;
2095    if (comp->nbCounters > 0) {
2096	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2097	if (exec->counts == NULL)
2098	    return(-1);
2099        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2100    } else
2101	exec->counts = NULL;
2102    while ((exec->status == 0) &&
2103	   ((exec->inputString[exec->index] != 0) ||
2104	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2105	xmlRegTransPtr trans;
2106	xmlRegAtomPtr atom;
2107
2108	/*
2109	 * End of input on non-terminal state, rollback, however we may
2110	 * still have epsilon like transition for counted transitions
2111	 * on counters, in that case don't break too early.
2112	 */
2113	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
2114	    goto rollback;
2115
2116	exec->transcount = 0;
2117	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2118	    trans = &exec->state->trans[exec->transno];
2119	    if (trans->to < 0)
2120		continue;
2121	    atom = trans->atom;
2122	    ret = 0;
2123	    if (trans->count >= 0) {
2124		int count;
2125		xmlRegCounterPtr counter;
2126
2127		/*
2128		 * A counted transition.
2129		 */
2130
2131		count = exec->counts[trans->count];
2132		counter = &exec->comp->counters[trans->count];
2133#ifdef DEBUG_REGEXP_EXEC
2134		printf("testing count %d: val %d, min %d, max %d\n",
2135		       trans->count, count, counter->min,  counter->max);
2136#endif
2137		ret = ((count >= counter->min) && (count <= counter->max));
2138	    } else if (atom == NULL) {
2139		fprintf(stderr, "epsilon transition left at runtime\n");
2140		exec->status = -2;
2141		break;
2142	    } else if (exec->inputString[exec->index] != 0) {
2143                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2144		ret = xmlRegCheckCharacter(atom, codepoint);
2145		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2146		    xmlRegStatePtr to = comp->states[trans->to];
2147
2148		    /*
2149		     * this is a multiple input sequence
2150		     */
2151		    if (exec->state->nbTrans > exec->transno + 1) {
2152			xmlFARegExecSave(exec);
2153		    }
2154		    exec->transcount = 1;
2155		    do {
2156			/*
2157			 * Try to progress as much as possible on the input
2158			 */
2159			if (exec->transcount == atom->max) {
2160			    break;
2161			}
2162			exec->index += len;
2163			/*
2164			 * End of input: stop here
2165			 */
2166			if (exec->inputString[exec->index] == 0) {
2167			    exec->index -= len;
2168			    break;
2169			}
2170			if (exec->transcount >= atom->min) {
2171			    int transno = exec->transno;
2172			    xmlRegStatePtr state = exec->state;
2173
2174			    /*
2175			     * The transition is acceptable save it
2176			     */
2177			    exec->transno = -1; /* trick */
2178			    exec->state = to;
2179			    xmlFARegExecSave(exec);
2180			    exec->transno = transno;
2181			    exec->state = state;
2182			}
2183			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2184				              len);
2185			ret = xmlRegCheckCharacter(atom, codepoint);
2186			exec->transcount++;
2187		    } while (ret == 1);
2188		    if (exec->transcount < atom->min)
2189			ret = 0;
2190
2191		    /*
2192		     * If the last check failed but one transition was found
2193		     * possible, rollback
2194		     */
2195		    if (ret < 0)
2196			ret = 0;
2197		    if (ret == 0) {
2198			goto rollback;
2199		    }
2200		}
2201	    }
2202	    if (ret == 1) {
2203		if (exec->state->nbTrans > exec->transno + 1) {
2204		    xmlFARegExecSave(exec);
2205		}
2206		if (trans->counter >= 0) {
2207#ifdef DEBUG_REGEXP_EXEC
2208		    printf("Increasing count %d\n", trans->counter);
2209#endif
2210		    exec->counts[trans->counter]++;
2211		}
2212#ifdef DEBUG_REGEXP_EXEC
2213		printf("entering state %d\n", trans->to);
2214#endif
2215		exec->state = comp->states[trans->to];
2216		exec->transno = 0;
2217		if (trans->atom != NULL) {
2218		    exec->index += len;
2219		}
2220		goto progress;
2221	    } else if (ret < 0) {
2222		exec->status = -4;
2223		break;
2224	    }
2225	}
2226	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2227rollback:
2228	    /*
2229	     * Failed to find a way out
2230	     */
2231	    exec->determinist = 0;
2232	    xmlFARegExecRollBack(exec);
2233	}
2234progress:
2235	continue;
2236    }
2237    if (exec->rollbacks != NULL) {
2238	if (exec->counts != NULL) {
2239	    int i;
2240
2241	    for (i = 0;i < exec->maxRollbacks;i++)
2242		if (exec->rollbacks[i].counts != NULL)
2243		    xmlFree(exec->rollbacks[i].counts);
2244	}
2245	xmlFree(exec->rollbacks);
2246    }
2247    if (exec->counts != NULL)
2248	xmlFree(exec->counts);
2249    if (exec->status == 0)
2250	return(1);
2251    if (exec->status == -1)
2252	return(0);
2253    return(exec->status);
2254}
2255
2256/************************************************************************
2257 * 									*
2258 *	Progressive interface to the verifyer one atom at a time	*
2259 * 									*
2260 ************************************************************************/
2261
2262/**
2263 * xmlRegNewExecCtxt:
2264 * @comp: a precompiled regular expression
2265 * @callback: a callback function used for handling progresses in the
2266 *            automata matching phase
2267 * @data: the context data associated to the callback in this context
2268 *
2269 * Build a context used for progressive evaluation of a regexp.
2270 *
2271 * Returns the new context
2272 */
2273xmlRegExecCtxtPtr
2274xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
2275    xmlRegExecCtxtPtr exec;
2276
2277    if (comp == NULL)
2278	return(NULL);
2279    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
2280    if (exec == NULL) {
2281	return(NULL);
2282    }
2283    memset(exec, 0, sizeof(xmlRegExecCtxt));
2284    exec->inputString = NULL;
2285    exec->index = 0;
2286    exec->determinist = 1;
2287    exec->maxRollbacks = 0;
2288    exec->nbRollbacks = 0;
2289    exec->rollbacks = NULL;
2290    exec->status = 0;
2291    exec->comp = comp;
2292    if (comp->compact == NULL)
2293	exec->state = comp->states[0];
2294    exec->transno = 0;
2295    exec->transcount = 0;
2296    exec->callback = callback;
2297    exec->data = data;
2298    if (comp->nbCounters > 0) {
2299	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
2300	if (exec->counts == NULL) {
2301	    xmlFree(exec);
2302	    return(NULL);
2303	}
2304        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
2305    } else
2306	exec->counts = NULL;
2307    exec->inputStackMax = 0;
2308    exec->inputStackNr = 0;
2309    exec->inputStack = NULL;
2310    return(exec);
2311}
2312
2313/**
2314 * xmlRegFreeExecCtxt:
2315 * @exec: a regular expression evaulation context
2316 *
2317 * Free the structures associated to a regular expression evaulation context.
2318 */
2319void
2320xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
2321    if (exec == NULL)
2322	return;
2323
2324    if (exec->rollbacks != NULL) {
2325	if (exec->counts != NULL) {
2326	    int i;
2327
2328	    for (i = 0;i < exec->maxRollbacks;i++)
2329		if (exec->rollbacks[i].counts != NULL)
2330		    xmlFree(exec->rollbacks[i].counts);
2331	}
2332	xmlFree(exec->rollbacks);
2333    }
2334    if (exec->counts != NULL)
2335	xmlFree(exec->counts);
2336    if (exec->inputStack != NULL) {
2337	int i;
2338
2339	for (i = 0;i < exec->inputStackNr;i++) {
2340	    if (exec->inputStack[i].value != NULL)
2341		xmlFree(exec->inputStack[i].value);
2342	}
2343	xmlFree(exec->inputStack);
2344    }
2345    xmlFree(exec);
2346}
2347
2348static void
2349xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2350	                    void *data) {
2351#ifdef DEBUG_PUSH
2352    printf("saving value: %d:%s\n", exec->inputStackNr, value);
2353#endif
2354    if (exec->inputStackMax == 0) {
2355	exec->inputStackMax = 4;
2356	exec->inputStack = (xmlRegInputTokenPtr)
2357	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
2358	if (exec->inputStack == NULL) {
2359	    fprintf(stderr, "push input: allocation failed");
2360	    exec->inputStackMax = 0;
2361	    return;
2362	}
2363    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
2364	xmlRegInputTokenPtr tmp;
2365
2366	exec->inputStackMax *= 2;
2367	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
2368			exec->inputStackMax * sizeof(xmlRegInputToken));
2369	if (tmp == NULL) {
2370	    fprintf(stderr, "push input: allocation failed");
2371	    exec->inputStackMax /= 2;
2372	    return;
2373	}
2374	exec->inputStack = tmp;
2375    }
2376    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
2377    exec->inputStack[exec->inputStackNr].data = data;
2378    exec->inputStackNr++;
2379    exec->inputStack[exec->inputStackNr].value = NULL;
2380    exec->inputStack[exec->inputStackNr].data = NULL;
2381}
2382
2383
2384/**
2385 * xmlRegCompactPushString:
2386 * @exec: a regexp execution context
2387 * @comp:  the precompiled exec with a compact table
2388 * @value: a string token input
2389 * @data: data associated to the token to reuse in callbacks
2390 *
2391 * Push one input token in the execution context
2392 *
2393 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2394 *     a negative value in case of error.
2395 */
2396static int
2397xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
2398	                xmlRegexpPtr comp,
2399	                const xmlChar *value,
2400	                void *data) {
2401    int state = exec->index;
2402    int i, target;
2403
2404    if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
2405	return(-1);
2406
2407    if (value == NULL) {
2408	/*
2409	 * are we at a final state ?
2410	 */
2411	if (comp->compact[state * (comp->nbstrings + 1)] ==
2412            XML_REGEXP_FINAL_STATE)
2413	    return(1);
2414	return(0);
2415    }
2416
2417#ifdef DEBUG_PUSH
2418    printf("value pushed: %s\n", value);
2419#endif
2420
2421    /*
2422     * Examine all outside transition from current state
2423     */
2424    for (i = 0;i < comp->nbstrings;i++) {
2425	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
2426	if ((target > 0) && (target <= comp->nbstates)) {
2427	    target--; /* to avoid 0 */
2428	    if (xmlStrEqual(comp->stringMap[i], value)) {
2429		exec->index = target;
2430		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
2431		    exec->callback(exec->data, value,
2432			  comp->transdata[state * comp->nbstrings + i], data);
2433		}
2434#ifdef DEBUG_PUSH
2435		printf("entering state %d\n", target);
2436#endif
2437		if (comp->compact[target * (comp->nbstrings + 1)] ==
2438		    XML_REGEXP_FINAL_STATE)
2439		    return(1);
2440		return(0);
2441	    }
2442	}
2443    }
2444    /*
2445     * Failed to find an exit transition out from current state for the
2446     * current token
2447     */
2448#ifdef DEBUG_PUSH
2449    printf("failed to find a transition for %s on state %d\n", value, state);
2450#endif
2451    exec->status = -1;
2452    return(-1);
2453}
2454
2455/**
2456 * xmlRegExecPushString:
2457 * @exec: a regexp execution context or NULL to indicate the end
2458 * @value: a string token input
2459 * @data: data associated to the token to reuse in callbacks
2460 *
2461 * Push one input token in the execution context
2462 *
2463 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2464 *     a negative value in case of error.
2465 */
2466int
2467xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
2468	             void *data) {
2469    xmlRegTransPtr trans;
2470    xmlRegAtomPtr atom;
2471    int ret;
2472    int final = 0;
2473
2474    if (exec == NULL)
2475	return(-1);
2476    if (exec->comp == NULL)
2477	return(-1);
2478    if (exec->status != 0)
2479	return(exec->status);
2480
2481    if (exec->comp->compact != NULL)
2482	return(xmlRegCompactPushString(exec, exec->comp, value, data));
2483
2484    if (value == NULL) {
2485        if (exec->state->type == XML_REGEXP_FINAL_STATE)
2486	    return(1);
2487	final = 1;
2488    }
2489
2490#ifdef DEBUG_PUSH
2491    printf("value pushed: %s\n", value);
2492#endif
2493    /*
2494     * If we have an active rollback stack push the new value there
2495     * and get back to where we were left
2496     */
2497    if ((value != NULL) && (exec->inputStackNr > 0)) {
2498	xmlFARegExecSaveInputString(exec, value, data);
2499	value = exec->inputStack[exec->index].value;
2500	data = exec->inputStack[exec->index].data;
2501#ifdef DEBUG_PUSH
2502	printf("value loaded: %s\n", value);
2503#endif
2504    }
2505
2506    while ((exec->status == 0) &&
2507	   ((value != NULL) ||
2508	    ((final == 1) &&
2509	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
2510
2511	/*
2512	 * End of input on non-terminal state, rollback, however we may
2513	 * still have epsilon like transition for counted transitions
2514	 * on counters, in that case don't break too early.
2515	 */
2516	if ((value == NULL) && (exec->counts == NULL))
2517	    goto rollback;
2518
2519	exec->transcount = 0;
2520	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2521	    trans = &exec->state->trans[exec->transno];
2522	    if (trans->to < 0)
2523		continue;
2524	    atom = trans->atom;
2525	    ret = 0;
2526	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
2527		int i;
2528		int count;
2529		xmlRegTransPtr t;
2530		xmlRegCounterPtr counter;
2531
2532		ret = 0;
2533
2534#ifdef DEBUG_PUSH
2535		printf("testing all lax %d\n", trans->count);
2536#endif
2537		/*
2538		 * Check all counted transitions from the current state
2539		 */
2540		if ((value == NULL) && (final)) {
2541		    ret = 1;
2542		} else if (value != NULL) {
2543		    for (i = 0;i < exec->state->nbTrans;i++) {
2544			t = &exec->state->trans[i];
2545			if ((t->counter < 0) || (t == trans))
2546			    continue;
2547			counter = &exec->comp->counters[t->counter];
2548			count = exec->counts[t->counter];
2549			if ((count < counter->max) &&
2550		            (t->atom != NULL) &&
2551			    (xmlStrEqual(value, t->atom->valuep))) {
2552			    ret = 0;
2553			    break;
2554			}
2555			if ((count >= counter->min) &&
2556			    (count < counter->max) &&
2557			    (xmlStrEqual(value, t->atom->valuep))) {
2558			    ret = 1;
2559			    break;
2560			}
2561		    }
2562		}
2563	    } else if (trans->count == REGEXP_ALL_COUNTER) {
2564		int i;
2565		int count;
2566		xmlRegTransPtr t;
2567		xmlRegCounterPtr counter;
2568
2569		ret = 1;
2570
2571#ifdef DEBUG_PUSH
2572		printf("testing all %d\n", trans->count);
2573#endif
2574		/*
2575		 * Check all counted transitions from the current state
2576		 */
2577		for (i = 0;i < exec->state->nbTrans;i++) {
2578                    t = &exec->state->trans[i];
2579		    if ((t->counter < 0) || (t == trans))
2580			continue;
2581                    counter = &exec->comp->counters[t->counter];
2582		    count = exec->counts[t->counter];
2583		    if ((count < counter->min) || (count > counter->max)) {
2584			ret = 0;
2585			break;
2586		    }
2587		}
2588	    } else if (trans->count >= 0) {
2589		int count;
2590		xmlRegCounterPtr counter;
2591
2592		/*
2593		 * A counted transition.
2594		 */
2595
2596		count = exec->counts[trans->count];
2597		counter = &exec->comp->counters[trans->count];
2598#ifdef DEBUG_PUSH
2599		printf("testing count %d: val %d, min %d, max %d\n",
2600		       trans->count, count, counter->min,  counter->max);
2601#endif
2602		ret = ((count >= counter->min) && (count <= counter->max));
2603	    } else if (atom == NULL) {
2604		fprintf(stderr, "epsilon transition left at runtime\n");
2605		exec->status = -2;
2606		break;
2607	    } else if (value != NULL) {
2608		ret = xmlStrEqual(value, atom->valuep);
2609		if ((ret == 1) && (trans->counter >= 0)) {
2610		    xmlRegCounterPtr counter;
2611		    int count;
2612
2613		    count = exec->counts[trans->counter];
2614		    counter = &exec->comp->counters[trans->counter];
2615		    if (count >= counter->max)
2616			ret = 0;
2617		}
2618
2619		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2620		    xmlRegStatePtr to = exec->comp->states[trans->to];
2621
2622		    /*
2623		     * this is a multiple input sequence
2624		     */
2625		    if (exec->state->nbTrans > exec->transno + 1) {
2626			if (exec->inputStackNr <= 0) {
2627			    xmlFARegExecSaveInputString(exec, value, data);
2628			}
2629			xmlFARegExecSave(exec);
2630		    }
2631		    exec->transcount = 1;
2632		    do {
2633			/*
2634			 * Try to progress as much as possible on the input
2635			 */
2636			if (exec->transcount == atom->max) {
2637			    break;
2638			}
2639			exec->index++;
2640			value = exec->inputStack[exec->index].value;
2641			data = exec->inputStack[exec->index].data;
2642#ifdef DEBUG_PUSH
2643			printf("value loaded: %s\n", value);
2644#endif
2645
2646			/*
2647			 * End of input: stop here
2648			 */
2649			if (value == NULL) {
2650			    exec->index --;
2651			    break;
2652			}
2653			if (exec->transcount >= atom->min) {
2654			    int transno = exec->transno;
2655			    xmlRegStatePtr state = exec->state;
2656
2657			    /*
2658			     * The transition is acceptable save it
2659			     */
2660			    exec->transno = -1; /* trick */
2661			    exec->state = to;
2662			    if (exec->inputStackNr <= 0) {
2663				xmlFARegExecSaveInputString(exec, value, data);
2664			    }
2665			    xmlFARegExecSave(exec);
2666			    exec->transno = transno;
2667			    exec->state = state;
2668			}
2669			ret = xmlStrEqual(value, atom->valuep);
2670			exec->transcount++;
2671		    } while (ret == 1);
2672		    if (exec->transcount < atom->min)
2673			ret = 0;
2674
2675		    /*
2676		     * If the last check failed but one transition was found
2677		     * possible, rollback
2678		     */
2679		    if (ret < 0)
2680			ret = 0;
2681		    if (ret == 0) {
2682			goto rollback;
2683		    }
2684		}
2685	    }
2686	    if (ret == 1) {
2687		if ((exec->callback != NULL) && (atom != NULL)) {
2688		    exec->callback(exec->data, atom->valuep,
2689			           atom->data, data);
2690		}
2691		if (exec->state->nbTrans > exec->transno + 1) {
2692		    if (exec->inputStackNr <= 0) {
2693			xmlFARegExecSaveInputString(exec, value, data);
2694		    }
2695		    xmlFARegExecSave(exec);
2696		}
2697		if (trans->counter >= 0) {
2698#ifdef DEBUG_PUSH
2699		    printf("Increasing count %d\n", trans->counter);
2700#endif
2701		    exec->counts[trans->counter]++;
2702		}
2703#ifdef DEBUG_PUSH
2704		printf("entering state %d\n", trans->to);
2705#endif
2706		exec->state = exec->comp->states[trans->to];
2707		exec->transno = 0;
2708		if (trans->atom != NULL) {
2709		    if (exec->inputStack != NULL) {
2710			exec->index++;
2711			if (exec->index < exec->inputStackNr) {
2712			    value = exec->inputStack[exec->index].value;
2713			    data = exec->inputStack[exec->index].data;
2714#ifdef DEBUG_PUSH
2715			    printf("value loaded: %s\n", value);
2716#endif
2717			} else {
2718			    value = NULL;
2719			    data = NULL;
2720#ifdef DEBUG_PUSH
2721			    printf("end of input\n");
2722#endif
2723			}
2724		    } else {
2725			value = NULL;
2726			data = NULL;
2727#ifdef DEBUG_PUSH
2728			printf("end of input\n");
2729#endif
2730		    }
2731		}
2732		goto progress;
2733	    } else if (ret < 0) {
2734		exec->status = -4;
2735		break;
2736	    }
2737	}
2738	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2739rollback:
2740	    /*
2741	     * Failed to find a way out
2742	     */
2743	    exec->determinist = 0;
2744	    xmlFARegExecRollBack(exec);
2745	    if (exec->status == 0) {
2746		value = exec->inputStack[exec->index].value;
2747		data = exec->inputStack[exec->index].data;
2748#ifdef DEBUG_PUSH
2749		printf("value loaded: %s\n", value);
2750#endif
2751	    }
2752	}
2753progress:
2754	continue;
2755    }
2756    if (exec->status == 0) {
2757        return(exec->state->type == XML_REGEXP_FINAL_STATE);
2758    }
2759    return(exec->status);
2760}
2761
2762/**
2763 * xmlRegExecPushString2:
2764 * @exec: a regexp execution context or NULL to indicate the end
2765 * @value: the first string token input
2766 * @value2: the second string token input
2767 * @data: data associated to the token to reuse in callbacks
2768 *
2769 * Push one input token in the execution context
2770 *
2771 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
2772 *     a negative value in case of error.
2773 */
2774int
2775xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
2776                      const xmlChar *value2, void *data) {
2777    xmlChar buf[150];
2778    int lenn, lenp, ret;
2779    xmlChar *str;
2780
2781    if (exec == NULL)
2782	return(-1);
2783    if (exec->comp == NULL)
2784	return(-1);
2785    if (exec->status != 0)
2786	return(exec->status);
2787
2788    if (value2 == NULL)
2789        return(xmlRegExecPushString(exec, value, data));
2790
2791    lenn = strlen((char *) value2);
2792    lenp = strlen((char *) value);
2793
2794    if (150 < lenn + lenp + 2) {
2795	str = (xmlChar *) xmlMalloc(lenn + lenp + 2);
2796	if (str == NULL) {
2797	    exec->status = -1;
2798	    return(-1);
2799	}
2800    } else {
2801	str = buf;
2802    }
2803    memcpy(&str[0], value, lenp);
2804    str[lenp] = '|';
2805    memcpy(&str[lenp + 1], value2, lenn);
2806    str[lenn + lenp + 1] = 0;
2807
2808    if (exec->comp->compact != NULL)
2809	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
2810    else
2811        ret = xmlRegExecPushString(exec, str, data);
2812
2813    if (str != buf)
2814        xmlFree(buf);
2815    return(ret);
2816}
2817
2818#if 0
2819static int
2820xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
2821    xmlRegTransPtr trans;
2822    xmlRegAtomPtr atom;
2823    int ret;
2824    int codepoint, len;
2825
2826    if (exec == NULL)
2827	return(-1);
2828    if (exec->status != 0)
2829	return(exec->status);
2830
2831    while ((exec->status == 0) &&
2832	   ((exec->inputString[exec->index] != 0) ||
2833	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2834
2835	/*
2836	 * End of input on non-terminal state, rollback, however we may
2837	 * still have epsilon like transition for counted transitions
2838	 * on counters, in that case don't break too early.
2839	 */
2840	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
2841	    goto rollback;
2842
2843	exec->transcount = 0;
2844	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2845	    trans = &exec->state->trans[exec->transno];
2846	    if (trans->to < 0)
2847		continue;
2848	    atom = trans->atom;
2849	    ret = 0;
2850	    if (trans->count >= 0) {
2851		int count;
2852		xmlRegCounterPtr counter;
2853
2854		/*
2855		 * A counted transition.
2856		 */
2857
2858		count = exec->counts[trans->count];
2859		counter = &exec->comp->counters[trans->count];
2860#ifdef DEBUG_REGEXP_EXEC
2861		printf("testing count %d: val %d, min %d, max %d\n",
2862		       trans->count, count, counter->min,  counter->max);
2863#endif
2864		ret = ((count >= counter->min) && (count <= counter->max));
2865	    } else if (atom == NULL) {
2866		fprintf(stderr, "epsilon transition left at runtime\n");
2867		exec->status = -2;
2868		break;
2869	    } else if (exec->inputString[exec->index] != 0) {
2870                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2871		ret = xmlRegCheckCharacter(atom, codepoint);
2872		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2873		    xmlRegStatePtr to = exec->comp->states[trans->to];
2874
2875		    /*
2876		     * this is a multiple input sequence
2877		     */
2878		    if (exec->state->nbTrans > exec->transno + 1) {
2879			xmlFARegExecSave(exec);
2880		    }
2881		    exec->transcount = 1;
2882		    do {
2883			/*
2884			 * Try to progress as much as possible on the input
2885			 */
2886			if (exec->transcount == atom->max) {
2887			    break;
2888			}
2889			exec->index += len;
2890			/*
2891			 * End of input: stop here
2892			 */
2893			if (exec->inputString[exec->index] == 0) {
2894			    exec->index -= len;
2895			    break;
2896			}
2897			if (exec->transcount >= atom->min) {
2898			    int transno = exec->transno;
2899			    xmlRegStatePtr state = exec->state;
2900
2901			    /*
2902			     * The transition is acceptable save it
2903			     */
2904			    exec->transno = -1; /* trick */
2905			    exec->state = to;
2906			    xmlFARegExecSave(exec);
2907			    exec->transno = transno;
2908			    exec->state = state;
2909			}
2910			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2911				              len);
2912			ret = xmlRegCheckCharacter(atom, codepoint);
2913			exec->transcount++;
2914		    } while (ret == 1);
2915		    if (exec->transcount < atom->min)
2916			ret = 0;
2917
2918		    /*
2919		     * If the last check failed but one transition was found
2920		     * possible, rollback
2921		     */
2922		    if (ret < 0)
2923			ret = 0;
2924		    if (ret == 0) {
2925			goto rollback;
2926		    }
2927		}
2928	    }
2929	    if (ret == 1) {
2930		if (exec->state->nbTrans > exec->transno + 1) {
2931		    xmlFARegExecSave(exec);
2932		}
2933		if (trans->counter >= 0) {
2934#ifdef DEBUG_REGEXP_EXEC
2935		    printf("Increasing count %d\n", trans->counter);
2936#endif
2937		    exec->counts[trans->counter]++;
2938		}
2939#ifdef DEBUG_REGEXP_EXEC
2940		printf("entering state %d\n", trans->to);
2941#endif
2942		exec->state = exec->comp->states[trans->to];
2943		exec->transno = 0;
2944		if (trans->atom != NULL) {
2945		    exec->index += len;
2946		}
2947		goto progress;
2948	    } else if (ret < 0) {
2949		exec->status = -4;
2950		break;
2951	    }
2952	}
2953	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2954rollback:
2955	    /*
2956	     * Failed to find a way out
2957	     */
2958	    exec->determinist = 0;
2959	    xmlFARegExecRollBack(exec);
2960	}
2961progress:
2962	continue;
2963    }
2964}
2965#endif
2966/************************************************************************
2967 * 									*
2968 *	Parser for the Shemas Datatype Regular Expressions		*
2969 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
2970 * 									*
2971 ************************************************************************/
2972
2973/**
2974 * xmlFAIsChar:
2975 * @ctxt:  a regexp parser context
2976 *
2977 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
2978 */
2979static int
2980xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
2981    int cur;
2982    int len;
2983
2984    cur = CUR_SCHAR(ctxt->cur, len);
2985    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
2986	(cur == '*') || (cur == '+') || (cur == '(') ||
2987	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
2988	(cur == 0x5D) || (cur == 0))
2989	return(-1);
2990    return(cur);
2991}
2992
2993/**
2994 * xmlFAParseCharProp:
2995 * @ctxt:  a regexp parser context
2996 *
2997 * [27]   charProp   ::=   IsCategory | IsBlock
2998 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
2999 *                       Separators | Symbols | Others
3000 * [29]   Letters   ::=   'L' [ultmo]?
3001 * [30]   Marks   ::=   'M' [nce]?
3002 * [31]   Numbers   ::=   'N' [dlo]?
3003 * [32]   Punctuation   ::=   'P' [cdseifo]?
3004 * [33]   Separators   ::=   'Z' [slp]?
3005 * [34]   Symbols   ::=   'S' [mcko]?
3006 * [35]   Others   ::=   'C' [cfon]?
3007 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
3008 */
3009static void
3010xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
3011    int cur;
3012    xmlRegAtomType type = 0;
3013    xmlChar *blockName = NULL;
3014
3015    cur = CUR;
3016    if (cur == 'L') {
3017	NEXT;
3018	cur = CUR;
3019	if (cur == 'u') {
3020	    NEXT;
3021	    type = XML_REGEXP_LETTER_UPPERCASE;
3022	} else if (cur == 'l') {
3023	    NEXT;
3024	    type = XML_REGEXP_LETTER_LOWERCASE;
3025	} else if (cur == 't') {
3026	    NEXT;
3027	    type = XML_REGEXP_LETTER_TITLECASE;
3028	} else if (cur == 'm') {
3029	    NEXT;
3030	    type = XML_REGEXP_LETTER_MODIFIER;
3031	} else if (cur == 'o') {
3032	    NEXT;
3033	    type = XML_REGEXP_LETTER_OTHERS;
3034	} else {
3035	    type = XML_REGEXP_LETTER;
3036	}
3037    } else if (cur == 'M') {
3038	NEXT;
3039	cur = CUR;
3040	if (cur == 'n') {
3041	    NEXT;
3042	    /* nonspacing */
3043	    type = XML_REGEXP_MARK_NONSPACING;
3044	} else if (cur == 'c') {
3045	    NEXT;
3046	    /* spacing combining */
3047	    type = XML_REGEXP_MARK_SPACECOMBINING;
3048	} else if (cur == 'e') {
3049	    NEXT;
3050	    /* enclosing */
3051	    type = XML_REGEXP_MARK_ENCLOSING;
3052	} else {
3053	    /* all marks */
3054	    type = XML_REGEXP_MARK;
3055	}
3056    } else if (cur == 'N') {
3057	NEXT;
3058	cur = CUR;
3059	if (cur == 'd') {
3060	    NEXT;
3061	    /* digital */
3062	    type = XML_REGEXP_NUMBER_DECIMAL;
3063	} else if (cur == 'l') {
3064	    NEXT;
3065	    /* letter */
3066	    type = XML_REGEXP_NUMBER_LETTER;
3067	} else if (cur == 'o') {
3068	    NEXT;
3069	    /* other */
3070	    type = XML_REGEXP_NUMBER_OTHERS;
3071	} else {
3072	    /* all numbers */
3073	    type = XML_REGEXP_NUMBER;
3074	}
3075    } else if (cur == 'P') {
3076	NEXT;
3077	cur = CUR;
3078	if (cur == 'c') {
3079	    NEXT;
3080	    /* connector */
3081	    type = XML_REGEXP_PUNCT_CONNECTOR;
3082	} else if (cur == 'd') {
3083	    NEXT;
3084	    /* dash */
3085	    type = XML_REGEXP_PUNCT_DASH;
3086	} else if (cur == 's') {
3087	    NEXT;
3088	    /* open */
3089	    type = XML_REGEXP_PUNCT_OPEN;
3090	} else if (cur == 'e') {
3091	    NEXT;
3092	    /* close */
3093	    type = XML_REGEXP_PUNCT_CLOSE;
3094	} else if (cur == 'i') {
3095	    NEXT;
3096	    /* initial quote */
3097	    type = XML_REGEXP_PUNCT_INITQUOTE;
3098	} else if (cur == 'f') {
3099	    NEXT;
3100	    /* final quote */
3101	    type = XML_REGEXP_PUNCT_FINQUOTE;
3102	} else if (cur == 'o') {
3103	    NEXT;
3104	    /* other */
3105	    type = XML_REGEXP_PUNCT_OTHERS;
3106	} else {
3107	    /* all punctuation */
3108	    type = XML_REGEXP_PUNCT;
3109	}
3110    } else if (cur == 'Z') {
3111	NEXT;
3112	cur = CUR;
3113	if (cur == 's') {
3114	    NEXT;
3115	    /* space */
3116	    type = XML_REGEXP_SEPAR_SPACE;
3117	} else if (cur == 'l') {
3118	    NEXT;
3119	    /* line */
3120	    type = XML_REGEXP_SEPAR_LINE;
3121	} else if (cur == 'p') {
3122	    NEXT;
3123	    /* paragraph */
3124	    type = XML_REGEXP_SEPAR_PARA;
3125	} else {
3126	    /* all separators */
3127	    type = XML_REGEXP_SEPAR;
3128	}
3129    } else if (cur == 'S') {
3130	NEXT;
3131	cur = CUR;
3132	if (cur == 'm') {
3133	    NEXT;
3134	    type = XML_REGEXP_SYMBOL_MATH;
3135	    /* math */
3136	} else if (cur == 'c') {
3137	    NEXT;
3138	    type = XML_REGEXP_SYMBOL_CURRENCY;
3139	    /* currency */
3140	} else if (cur == 'k') {
3141	    NEXT;
3142	    type = XML_REGEXP_SYMBOL_MODIFIER;
3143	    /* modifiers */
3144	} else if (cur == 'o') {
3145	    NEXT;
3146	    type = XML_REGEXP_SYMBOL_OTHERS;
3147	    /* other */
3148	} else {
3149	    /* all symbols */
3150	    type = XML_REGEXP_SYMBOL;
3151	}
3152    } else if (cur == 'C') {
3153	NEXT;
3154	cur = CUR;
3155	if (cur == 'c') {
3156	    NEXT;
3157	    /* control */
3158	    type = XML_REGEXP_OTHER_CONTROL;
3159	} else if (cur == 'f') {
3160	    NEXT;
3161	    /* format */
3162	    type = XML_REGEXP_OTHER_FORMAT;
3163	} else if (cur == 'o') {
3164	    NEXT;
3165	    /* private use */
3166	    type = XML_REGEXP_OTHER_PRIVATE;
3167	} else if (cur == 'n') {
3168	    NEXT;
3169	    /* not assigned */
3170	    type = XML_REGEXP_OTHER_NA;
3171	} else {
3172	    /* all others */
3173	    type = XML_REGEXP_OTHER;
3174	}
3175    } else if (cur == 'I') {
3176	const xmlChar *start;
3177	NEXT;
3178	cur = CUR;
3179	if (cur != 's') {
3180	    ERROR("IsXXXX expected");
3181	    return;
3182	}
3183	NEXT;
3184	start = ctxt->cur;
3185	cur = CUR;
3186	if (((cur >= 'a') && (cur <= 'z')) ||
3187	    ((cur >= 'A') && (cur <= 'Z')) ||
3188	    ((cur >= '0') && (cur <= '9')) ||
3189	    (cur == 0x2D)) {
3190	    NEXT;
3191	    cur = CUR;
3192	    while (((cur >= 'a') && (cur <= 'z')) ||
3193		((cur >= 'A') && (cur <= 'Z')) ||
3194		((cur >= '0') && (cur <= '9')) ||
3195		(cur == 0x2D)) {
3196		NEXT;
3197		cur = CUR;
3198	    }
3199	}
3200	type = XML_REGEXP_BLOCK_NAME;
3201	blockName = xmlStrndup(start, ctxt->cur - start);
3202    } else {
3203	ERROR("Unknown char property");
3204	return;
3205    }
3206    if (ctxt->atom == NULL) {
3207	ctxt->atom = xmlRegNewAtom(ctxt, type);
3208	if (ctxt->atom != NULL)
3209	    ctxt->atom->valuep = blockName;
3210    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3211        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3212		           type, 0, 0, blockName);
3213    }
3214}
3215
3216/**
3217 * xmlFAParseCharClassEsc:
3218 * @ctxt:  a regexp parser context
3219 *
3220 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
3221 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
3222 * [25] catEsc   ::=   '\p{' charProp '}'
3223 * [26] complEsc ::=   '\P{' charProp '}'
3224 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
3225 */
3226static void
3227xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
3228    int cur;
3229
3230    if (CUR == '.') {
3231	if (ctxt->atom == NULL) {
3232	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
3233	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3234	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3235			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
3236	}
3237	NEXT;
3238	return;
3239    }
3240    if (CUR != '\\') {
3241	ERROR("Escaped sequence: expecting \\");
3242	return;
3243    }
3244    NEXT;
3245    cur = CUR;
3246    if (cur == 'p') {
3247	NEXT;
3248	if (CUR != '{') {
3249	    ERROR("Expecting '{'");
3250	    return;
3251	}
3252	NEXT;
3253	xmlFAParseCharProp(ctxt);
3254	if (CUR != '}') {
3255	    ERROR("Expecting '}'");
3256	    return;
3257	}
3258	NEXT;
3259    } else if (cur == 'P') {
3260	NEXT;
3261	if (CUR != '{') {
3262	    ERROR("Expecting '{'");
3263	    return;
3264	}
3265	NEXT;
3266	xmlFAParseCharProp(ctxt);
3267	ctxt->atom->neg = 1;
3268	if (CUR != '}') {
3269	    ERROR("Expecting '}'");
3270	    return;
3271	}
3272	NEXT;
3273    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
3274	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
3275	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
3276	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
3277	(cur == 0x5E)) {
3278	if (ctxt->atom == NULL) {
3279	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3280	    if (ctxt->atom != NULL)
3281		ctxt->atom->codepoint = cur;
3282	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3283	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3284			       XML_REGEXP_CHARVAL, cur, cur, NULL);
3285	}
3286	NEXT;
3287    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
3288	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
3289	(cur == 'w') || (cur == 'W')) {
3290	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
3291
3292	switch (cur) {
3293	    case 's':
3294		type = XML_REGEXP_ANYSPACE;
3295		break;
3296	    case 'S':
3297		type = XML_REGEXP_NOTSPACE;
3298		break;
3299	    case 'i':
3300		type = XML_REGEXP_INITNAME;
3301		break;
3302	    case 'I':
3303		type = XML_REGEXP_NOTINITNAME;
3304		break;
3305	    case 'c':
3306		type = XML_REGEXP_NAMECHAR;
3307		break;
3308	    case 'C':
3309		type = XML_REGEXP_NOTNAMECHAR;
3310		break;
3311	    case 'd':
3312		type = XML_REGEXP_DECIMAL;
3313		break;
3314	    case 'D':
3315		type = XML_REGEXP_NOTDECIMAL;
3316		break;
3317	    case 'w':
3318		type = XML_REGEXP_REALCHAR;
3319		break;
3320	    case 'W':
3321		type = XML_REGEXP_NOTREALCHAR;
3322		break;
3323	}
3324	NEXT;
3325	if (ctxt->atom == NULL) {
3326	    ctxt->atom = xmlRegNewAtom(ctxt, type);
3327	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
3328	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3329			       type, 0, 0, NULL);
3330	}
3331    }
3332}
3333
3334/**
3335 * xmlFAParseCharRef:
3336 * @ctxt:  a regexp parser context
3337 *
3338 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
3339 */
3340static int
3341xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
3342    int ret = 0, cur;
3343
3344    if ((CUR != '&') || (NXT(1) != '#'))
3345	return(-1);
3346    NEXT;
3347    NEXT;
3348    cur = CUR;
3349    if (cur == 'x') {
3350	NEXT;
3351	cur = CUR;
3352	if (((cur >= '0') && (cur <= '9')) ||
3353	    ((cur >= 'a') && (cur <= 'f')) ||
3354	    ((cur >= 'A') && (cur <= 'F'))) {
3355	    while (((cur >= '0') && (cur <= '9')) ||
3356		   ((cur >= 'A') && (cur <= 'F'))) {
3357		if ((cur >= '0') && (cur <= '9'))
3358		    ret = ret * 16 + cur - '0';
3359		else if ((cur >= 'a') && (cur <= 'f'))
3360		    ret = ret * 16 + 10 + (cur - 'a');
3361		else
3362		    ret = ret * 16 + 10 + (cur - 'A');
3363		NEXT;
3364		cur = CUR;
3365	    }
3366	} else {
3367	    ERROR("Char ref: expecting [0-9A-F]");
3368	    return(-1);
3369	}
3370    } else {
3371	if ((cur >= '0') && (cur <= '9')) {
3372	    while ((cur >= '0') && (cur <= '9')) {
3373		ret = ret * 10 + cur - '0';
3374		NEXT;
3375		cur = CUR;
3376	    }
3377	} else {
3378	    ERROR("Char ref: expecting [0-9]");
3379	    return(-1);
3380	}
3381    }
3382    if (cur != ';') {
3383	ERROR("Char ref: expecting ';'");
3384	return(-1);
3385    } else {
3386	NEXT;
3387    }
3388    return(ret);
3389}
3390
3391/**
3392 * xmlFAParseCharRange:
3393 * @ctxt:  a regexp parser context
3394 *
3395 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
3396 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
3397 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
3398 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
3399 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
3400 */
3401static void
3402xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
3403    int cur;
3404    int start = -1;
3405    int end = -1;
3406
3407    if ((CUR == '&') && (NXT(1) == '#')) {
3408	end = start = xmlFAParseCharRef(ctxt);
3409        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3410	                   XML_REGEXP_CHARVAL, start, end, NULL);
3411	return;
3412    }
3413    cur = CUR;
3414    if (cur == '\\') {
3415	NEXT;
3416	cur = CUR;
3417	switch (cur) {
3418	    case 'n': start = 0xA; break;
3419	    case 'r': start = 0xD; break;
3420	    case 't': start = 0x9; break;
3421	    case '\\': case '|': case '.': case '-': case '^': case '?':
3422	    case '*': case '+': case '{': case '}': case '(': case ')':
3423	    case '[': case ']':
3424		start = cur; break;
3425	    default:
3426		ERROR("Invalid escape value");
3427		return;
3428	}
3429	end = start;
3430    } else if ((cur != 0x5B) && (cur != 0x5D)) {
3431	end = start = cur;
3432    } else {
3433	ERROR("Expecting a char range");
3434	return;
3435    }
3436    NEXT;
3437    if (start == '-') {
3438	return;
3439    }
3440    cur = CUR;
3441    if (cur != '-') {
3442        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3443		              XML_REGEXP_CHARVAL, start, end, NULL);
3444	return;
3445    }
3446    NEXT;
3447    cur = CUR;
3448    if (cur == '\\') {
3449	NEXT;
3450	cur = CUR;
3451	switch (cur) {
3452	    case 'n': end = 0xA; break;
3453	    case 'r': end = 0xD; break;
3454	    case 't': end = 0x9; break;
3455	    case '\\': case '|': case '.': case '-': case '^': case '?':
3456	    case '*': case '+': case '{': case '}': case '(': case ')':
3457	    case '[': case ']':
3458		end = cur; break;
3459	    default:
3460		ERROR("Invalid escape value");
3461		return;
3462	}
3463    } else if ((cur != 0x5B) && (cur != 0x5D)) {
3464	end = cur;
3465    } else {
3466	ERROR("Expecting the end of a char range");
3467	return;
3468    }
3469    NEXT;
3470    /* TODO check that the values are acceptable character ranges for XML */
3471    if (end < start) {
3472	ERROR("End of range is before start of range");
3473    } else {
3474        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
3475		           XML_REGEXP_CHARVAL, start, end, NULL);
3476    }
3477    return;
3478}
3479
3480/**
3481 * xmlFAParsePosCharGroup:
3482 * @ctxt:  a regexp parser context
3483 *
3484 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
3485 */
3486static void
3487xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
3488    do {
3489	if ((CUR == '\\') || (CUR == '.')) {
3490	    xmlFAParseCharClassEsc(ctxt);
3491	} else {
3492	    xmlFAParseCharRange(ctxt);
3493	}
3494    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
3495	     (ctxt->error == 0));
3496}
3497
3498/**
3499 * xmlFAParseCharGroup:
3500 * @ctxt:  a regexp parser context
3501 *
3502 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
3503 * [15]   negCharGroup ::= '^' posCharGroup
3504 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
3505 * [12]   charClassExpr ::= '[' charGroup ']'
3506 */
3507static void
3508xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
3509    int n = ctxt->neg;
3510    while ((CUR != ']') && (ctxt->error == 0)) {
3511	if (CUR == '^') {
3512	    int neg = ctxt->neg;
3513
3514	    NEXT;
3515	    ctxt->neg = !ctxt->neg;
3516	    xmlFAParsePosCharGroup(ctxt);
3517	    ctxt->neg = neg;
3518	} else if (CUR == '-') {
3519	    NEXT;
3520	    ctxt->neg = !ctxt->neg;
3521	    if (CUR != '[') {
3522		ERROR("charClassExpr: '[' expected");
3523		break;
3524	    }
3525	    NEXT;
3526	    xmlFAParseCharGroup(ctxt);
3527	    if (CUR == ']') {
3528		NEXT;
3529	    } else {
3530		ERROR("charClassExpr: ']' expected");
3531		break;
3532	    }
3533	    break;
3534	} else if (CUR != ']') {
3535	    xmlFAParsePosCharGroup(ctxt);
3536	}
3537    }
3538    ctxt->neg = n;
3539}
3540
3541/**
3542 * xmlFAParseCharClass:
3543 * @ctxt:  a regexp parser context
3544 *
3545 * [11]   charClass   ::=     charClassEsc | charClassExpr
3546 * [12]   charClassExpr   ::=   '[' charGroup ']'
3547 */
3548static void
3549xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
3550    if (CUR == '[') {
3551	NEXT;
3552	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
3553	if (ctxt->atom == NULL)
3554	    return;
3555	xmlFAParseCharGroup(ctxt);
3556	if (CUR == ']') {
3557	    NEXT;
3558	} else {
3559	    ERROR("xmlFAParseCharClass: ']' expected");
3560	}
3561    } else {
3562	xmlFAParseCharClassEsc(ctxt);
3563    }
3564}
3565
3566/**
3567 * xmlFAParseQuantExact:
3568 * @ctxt:  a regexp parser context
3569 *
3570 * [8]   QuantExact   ::=   [0-9]+
3571 */
3572static int
3573xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
3574    int ret = 0;
3575    int ok = 0;
3576
3577    while ((CUR >= '0') && (CUR <= '9')) {
3578	ret = ret * 10 + (CUR - '0');
3579	ok = 1;
3580	NEXT;
3581    }
3582    if (ok != 1) {
3583	return(-1);
3584    }
3585    return(ret);
3586}
3587
3588/**
3589 * xmlFAParseQuantifier:
3590 * @ctxt:  a regexp parser context
3591 *
3592 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
3593 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
3594 * [6]   quantRange   ::=   QuantExact ',' QuantExact
3595 * [7]   quantMin   ::=   QuantExact ','
3596 * [8]   QuantExact   ::=   [0-9]+
3597 */
3598static int
3599xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
3600    int cur;
3601
3602    cur = CUR;
3603    if ((cur == '?') || (cur == '*') || (cur == '+')) {
3604	if (ctxt->atom != NULL) {
3605	    if (cur == '?')
3606		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
3607	    else if (cur == '*')
3608		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
3609	    else if (cur == '+')
3610		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
3611	}
3612	NEXT;
3613	return(1);
3614    }
3615    if (cur == '{') {
3616	int min = 0, max = 0;
3617
3618	NEXT;
3619	cur = xmlFAParseQuantExact(ctxt);
3620	if (cur >= 0)
3621	    min = cur;
3622	if (CUR == ',') {
3623	    NEXT;
3624	    cur = xmlFAParseQuantExact(ctxt);
3625	    if (cur >= 0)
3626		max = cur;
3627	}
3628	if (CUR == '}') {
3629	    NEXT;
3630	} else {
3631	    ERROR("Unterminated quantifier");
3632	}
3633	if (max == 0)
3634	    max = min;
3635	if (ctxt->atom != NULL) {
3636	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
3637	    ctxt->atom->min = min;
3638	    ctxt->atom->max = max;
3639	}
3640	return(1);
3641    }
3642    return(0);
3643}
3644
3645/**
3646 * xmlFAParseAtom:
3647 * @ctxt:  a regexp parser context
3648 *
3649 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
3650 */
3651static int
3652xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
3653    int codepoint, len;
3654
3655    codepoint = xmlFAIsChar(ctxt);
3656    if (codepoint > 0) {
3657	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3658	if (ctxt->atom == NULL)
3659	    return(-1);
3660	codepoint = CUR_SCHAR(ctxt->cur, len);
3661	ctxt->atom->codepoint = codepoint;
3662	NEXTL(len);
3663	return(1);
3664    } else if (CUR == '|') {
3665	return(0);
3666    } else if (CUR == 0) {
3667	return(0);
3668    } else if (CUR == ')') {
3669	return(0);
3670    } else if (CUR == '(') {
3671	xmlRegStatePtr start, oldend;
3672
3673	NEXT;
3674	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
3675	start = ctxt->state;
3676	oldend = ctxt->end;
3677	ctxt->end = NULL;
3678	ctxt->atom = NULL;
3679	xmlFAParseRegExp(ctxt, 0);
3680	if (CUR == ')') {
3681	    NEXT;
3682	} else {
3683	    ERROR("xmlFAParseAtom: expecting ')'");
3684	}
3685	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
3686	if (ctxt->atom == NULL)
3687	    return(-1);
3688	ctxt->atom->start = start;
3689	ctxt->atom->stop = ctxt->state;
3690	ctxt->end = oldend;
3691	return(1);
3692    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
3693	xmlFAParseCharClass(ctxt);
3694	return(1);
3695    }
3696    return(0);
3697}
3698
3699/**
3700 * xmlFAParsePiece:
3701 * @ctxt:  a regexp parser context
3702 *
3703 * [3]   piece   ::=   atom quantifier?
3704 */
3705static int
3706xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
3707    int ret;
3708
3709    ctxt->atom = NULL;
3710    ret = xmlFAParseAtom(ctxt);
3711    if (ret == 0)
3712	return(0);
3713    if (ctxt->atom == NULL) {
3714	ERROR("internal: no atom generated");
3715    }
3716    xmlFAParseQuantifier(ctxt);
3717    return(1);
3718}
3719
3720/**
3721 * xmlFAParseBranch:
3722 * @ctxt:  a regexp parser context
3723 * @first:  is taht the first
3724 *
3725 * [2]   branch   ::=   piece*
3726 */
3727static void
3728xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) {
3729    xmlRegStatePtr previous;
3730    xmlRegAtomPtr prevatom = NULL;
3731    int ret;
3732
3733    previous = ctxt->state;
3734    ret = xmlFAParsePiece(ctxt);
3735    if (ret != 0) {
3736	if (first) {
3737	    xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3738	    previous = ctxt->state;
3739	} else {
3740	    prevatom = ctxt->atom;
3741	}
3742	ctxt->atom = NULL;
3743    }
3744    while ((ret != 0) && (ctxt->error == 0)) {
3745	ret = xmlFAParsePiece(ctxt);
3746	if (ret != 0) {
3747	    if (first) {
3748		xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3749	    } else {
3750		xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom);
3751		prevatom = ctxt->atom;
3752	    }
3753	    previous = ctxt->state;
3754	    ctxt->atom = NULL;
3755	}
3756    }
3757    if (!first) {
3758	xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom);
3759    }
3760}
3761
3762/**
3763 * xmlFAParseRegExp:
3764 * @ctxt:  a regexp parser context
3765 * @top:  is that the top-level expressions ?
3766 *
3767 * [1]   regExp   ::=     branch  ( '|' branch )*
3768 */
3769static void
3770xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
3771    xmlRegStatePtr start, end, oldend;
3772
3773    oldend = ctxt->end;
3774
3775    start = ctxt->state;
3776    xmlFAParseBranch(ctxt, (ctxt->end == NULL));
3777    if (CUR != '|') {
3778	ctxt->end = ctxt->state;
3779	return;
3780    }
3781    end = ctxt->state;
3782    while ((CUR == '|') && (ctxt->error == 0)) {
3783	NEXT;
3784	ctxt->state = start;
3785	ctxt->end = end;
3786	xmlFAParseBranch(ctxt, 0);
3787    }
3788    if (!top)
3789	ctxt->end = oldend;
3790}
3791
3792/************************************************************************
3793 * 									*
3794 * 			The basic API					*
3795 * 									*
3796 ************************************************************************/
3797
3798/**
3799 * xmlRegexpPrint:
3800 * @output: the file for the output debug
3801 * @regexp: the compiled regexp
3802 *
3803 * Print the content of the compiled regular expression
3804 */
3805void
3806xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
3807    int i;
3808
3809    fprintf(output, " regexp: ");
3810    if (regexp == NULL) {
3811	fprintf(output, "NULL\n");
3812	return;
3813    }
3814    fprintf(output, "'%s' ", regexp->string);
3815    fprintf(output, "\n");
3816    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
3817    for (i = 0;i < regexp->nbAtoms; i++) {
3818	fprintf(output, " %02d ", i);
3819	xmlRegPrintAtom(output, regexp->atoms[i]);
3820    }
3821    fprintf(output, "%d states:", regexp->nbStates);
3822    fprintf(output, "\n");
3823    for (i = 0;i < regexp->nbStates; i++) {
3824	xmlRegPrintState(output, regexp->states[i]);
3825    }
3826    fprintf(output, "%d counters:\n", regexp->nbCounters);
3827    for (i = 0;i < regexp->nbCounters; i++) {
3828	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
3829		                                regexp->counters[i].max);
3830    }
3831}
3832
3833/**
3834 * xmlRegexpCompile:
3835 * @regexp:  a regular expression string
3836 *
3837 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
3838 * Appendix F and build an automata suitable for testing strings against
3839 * that regular expression
3840 *
3841 * Returns the compiled expression or NULL in case of error
3842 */
3843xmlRegexpPtr
3844xmlRegexpCompile(const xmlChar *regexp) {
3845    xmlRegexpPtr ret;
3846    xmlRegParserCtxtPtr ctxt;
3847
3848    ctxt = xmlRegNewParserCtxt(regexp);
3849    if (ctxt == NULL)
3850	return(NULL);
3851
3852    /* initialize the parser */
3853    ctxt->end = NULL;
3854    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3855    xmlRegStatePush(ctxt, ctxt->start);
3856
3857    /* parse the expression building an automata */
3858    xmlFAParseRegExp(ctxt, 1);
3859    if (CUR != 0) {
3860	ERROR("xmlFAParseRegExp: extra characters");
3861    }
3862    ctxt->end = ctxt->state;
3863    ctxt->start->type = XML_REGEXP_START_STATE;
3864    ctxt->end->type = XML_REGEXP_FINAL_STATE;
3865
3866    /* remove the Epsilon except for counted transitions */
3867    xmlFAEliminateEpsilonTransitions(ctxt);
3868
3869
3870    if (ctxt->error != 0) {
3871	xmlRegFreeParserCtxt(ctxt);
3872	return(NULL);
3873    }
3874    ret = xmlRegEpxFromParse(ctxt);
3875    xmlRegFreeParserCtxt(ctxt);
3876    return(ret);
3877}
3878
3879/**
3880 * xmlRegexpExec:
3881 * @comp:  the compiled regular expression
3882 * @content:  the value to check against the regular expression
3883 *
3884 * Check if the regular expression generate the value
3885 *
3886 * Returns 1 if it matches, 0 if not and a negativa value in case of error
3887 */
3888int
3889xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
3890    if ((comp == NULL) || (content == NULL))
3891	return(-1);
3892    return(xmlFARegExec(comp, content));
3893}
3894
3895/**
3896 * xmlRegexpIsDeterminist:
3897 * @comp:  the compiled regular expression
3898 *
3899 * Check if the regular expression is determinist
3900 *
3901 * Returns 1 if it yes, 0 if not and a negativa value in case of error
3902 */
3903int
3904xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
3905    xmlAutomataPtr am;
3906    int ret;
3907
3908    if (comp == NULL)
3909	return(-1);
3910    if (comp->determinist != -1)
3911	return(comp->determinist);
3912
3913    am = xmlNewAutomata();
3914    if (am->states != NULL) {
3915	int i;
3916
3917	for (i = 0;i < am->nbStates;i++)
3918	    xmlRegFreeState(am->states[i]);
3919	xmlFree(am->states);
3920    }
3921    am->nbAtoms = comp->nbAtoms;
3922    am->atoms = comp->atoms;
3923    am->nbStates = comp->nbStates;
3924    am->states = comp->states;
3925    am->determinist = -1;
3926    ret = xmlFAComputesDeterminism(am);
3927    am->atoms = NULL;
3928    am->states = NULL;
3929    xmlFreeAutomata(am);
3930    return(ret);
3931}
3932
3933/**
3934 * xmlRegFreeRegexp:
3935 * @regexp:  the regexp
3936 *
3937 * Free a regexp
3938 */
3939void
3940xmlRegFreeRegexp(xmlRegexpPtr regexp) {
3941    int i;
3942    if (regexp == NULL)
3943	return;
3944
3945    if (regexp->string != NULL)
3946	xmlFree(regexp->string);
3947    if (regexp->states != NULL) {
3948	for (i = 0;i < regexp->nbStates;i++)
3949	    xmlRegFreeState(regexp->states[i]);
3950	xmlFree(regexp->states);
3951    }
3952    if (regexp->atoms != NULL) {
3953	for (i = 0;i < regexp->nbAtoms;i++)
3954	    xmlRegFreeAtom(regexp->atoms[i]);
3955	xmlFree(regexp->atoms);
3956    }
3957    if (regexp->counters != NULL)
3958	xmlFree(regexp->counters);
3959    if (regexp->compact != NULL)
3960	xmlFree(regexp->compact);
3961    if (regexp->transdata != NULL)
3962	xmlFree(regexp->transdata);
3963    if (regexp->stringMap != NULL) {
3964	for (i = 0; i < regexp->nbstrings;i++)
3965	    xmlFree(regexp->stringMap[i]);
3966	xmlFree(regexp->stringMap);
3967    }
3968
3969    xmlFree(regexp);
3970}
3971
3972#ifdef LIBXML_AUTOMATA_ENABLED
3973/************************************************************************
3974 * 									*
3975 * 			The Automata interface				*
3976 * 									*
3977 ************************************************************************/
3978
3979/**
3980 * xmlNewAutomata:
3981 *
3982 * Create a new automata
3983 *
3984 * Returns the new object or NULL in case of failure
3985 */
3986xmlAutomataPtr
3987xmlNewAutomata(void) {
3988    xmlAutomataPtr ctxt;
3989
3990    ctxt = xmlRegNewParserCtxt(NULL);
3991    if (ctxt == NULL)
3992	return(NULL);
3993
3994    /* initialize the parser */
3995    ctxt->end = NULL;
3996    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3997    xmlRegStatePush(ctxt, ctxt->start);
3998
3999    return(ctxt);
4000}
4001
4002/**
4003 * xmlFreeAutomata:
4004 * @am: an automata
4005 *
4006 * Free an automata
4007 */
4008void
4009xmlFreeAutomata(xmlAutomataPtr am) {
4010    if (am == NULL)
4011	return;
4012    xmlRegFreeParserCtxt(am);
4013}
4014
4015/**
4016 * xmlAutomataGetInitState:
4017 * @am: an automata
4018 *
4019 * Initial state lookup
4020 *
4021 * Returns the initial state of the automata
4022 */
4023xmlAutomataStatePtr
4024xmlAutomataGetInitState(xmlAutomataPtr am) {
4025    if (am == NULL)
4026	return(NULL);
4027    return(am->start);
4028}
4029
4030/**
4031 * xmlAutomataSetFinalState:
4032 * @am: an automata
4033 * @state: a state in this automata
4034 *
4035 * Makes that state a final state
4036 *
4037 * Returns 0 or -1 in case of error
4038 */
4039int
4040xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
4041    if ((am == NULL) || (state == NULL))
4042	return(-1);
4043    state->type = XML_REGEXP_FINAL_STATE;
4044    return(0);
4045}
4046
4047/**
4048 * xmlAutomataNewTransition:
4049 * @am: an automata
4050 * @from: the starting point of the transition
4051 * @to: the target point of the transition or NULL
4052 * @token: the input string associated to that transition
4053 * @data: data passed to the callback function if the transition is activated
4054 *
4055 * If @to is NULL, this create first a new target state in the automata
4056 * and then adds a transition from the @from state to the target state
4057 * activated by the value of @token
4058 *
4059 * Returns the target state or NULL in case of error
4060 */
4061xmlAutomataStatePtr
4062xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
4063			 xmlAutomataStatePtr to, const xmlChar *token,
4064			 void *data) {
4065    xmlRegAtomPtr atom;
4066
4067    if ((am == NULL) || (from == NULL) || (token == NULL))
4068	return(NULL);
4069    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4070    atom->data = data;
4071    if (atom == NULL)
4072	return(NULL);
4073    atom->valuep = xmlStrdup(token);
4074
4075    xmlFAGenerateTransitions(am, from, to, atom);
4076    if (to == NULL)
4077	return(am->state);
4078    return(to);
4079}
4080
4081/**
4082 * xmlAutomataNewTransition2:
4083 * @am: an automata
4084 * @from: the starting point of the transition
4085 * @to: the target point of the transition or NULL
4086 * @token: the first input string associated to that transition
4087 * @token2: the second input string associated to that transition
4088 * @data: data passed to the callback function if the transition is activated
4089 *
4090 * If @to is NULL, this create first a new target state in the automata
4091 * and then adds a transition from the @from state to the target state
4092 * activated by the value of @token
4093 *
4094 * Returns the target state or NULL in case of error
4095 */
4096xmlAutomataStatePtr
4097xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
4098			  xmlAutomataStatePtr to, const xmlChar *token,
4099			  const xmlChar *token2, void *data) {
4100    xmlRegAtomPtr atom;
4101
4102    if ((am == NULL) || (from == NULL) || (token == NULL))
4103	return(NULL);
4104    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4105    atom->data = data;
4106    if (atom == NULL)
4107	return(NULL);
4108    if ((token2 == NULL) || (*token2 == 0)) {
4109	atom->valuep = xmlStrdup(token);
4110    } else {
4111	int lenn, lenp;
4112	xmlChar *str;
4113
4114	lenn = strlen((char *) token2);
4115	lenp = strlen((char *) token);
4116
4117	str = (xmlChar *) xmlMalloc(lenn + lenp + 2);
4118	if (str == NULL) {
4119	    xmlRegFreeAtom(atom);
4120	    return(NULL);
4121	}
4122	memcpy(&str[0], token, lenp);
4123	str[lenp] = '|';
4124	memcpy(&str[lenp + 1], token2, lenn);
4125	str[lenn + lenp + 1] = 0;
4126
4127	atom->valuep = str;
4128    }
4129
4130    xmlFAGenerateTransitions(am, from, to, atom);
4131    if (to == NULL)
4132	return(am->state);
4133    return(to);
4134}
4135
4136/**
4137 * xmlAutomataNewCountTrans:
4138 * @am: an automata
4139 * @from: the starting point of the transition
4140 * @to: the target point of the transition or NULL
4141 * @token: the input string associated to that transition
4142 * @min:  the minimum successive occurences of token
4143 * @max:  the maximum successive occurences of token
4144 * @data:  data associated to the transition
4145 *
4146 * If @to is NULL, this create first a new target state in the automata
4147 * and then adds a transition from the @from state to the target state
4148 * activated by a succession of input of value @token and whose number
4149 * is between @min and @max
4150 *
4151 * Returns the target state or NULL in case of error
4152 */
4153xmlAutomataStatePtr
4154xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4155			 xmlAutomataStatePtr to, const xmlChar *token,
4156			 int min, int max, void *data) {
4157    xmlRegAtomPtr atom;
4158
4159    if ((am == NULL) || (from == NULL) || (token == NULL))
4160	return(NULL);
4161    if (min < 0)
4162	return(NULL);
4163    if ((max < min) || (max < 1))
4164	return(NULL);
4165    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4166    if (atom == NULL)
4167	return(NULL);
4168    atom->valuep = xmlStrdup(token);
4169    atom->data = data;
4170    if (min == 0)
4171	atom->min = 1;
4172    else
4173	atom->min = min;
4174    atom->max = max;
4175
4176    xmlFAGenerateTransitions(am, from, to, atom);
4177    if (to == NULL)
4178	to = am->state;
4179    if (to == NULL)
4180	return(NULL);
4181    if (min == 0)
4182	xmlFAGenerateEpsilonTransition(am, from, to);
4183    return(to);
4184}
4185
4186/**
4187 * xmlAutomataNewOnceTrans:
4188 * @am: an automata
4189 * @from: the starting point of the transition
4190 * @to: the target point of the transition or NULL
4191 * @token: the input string associated to that transition
4192 * @min:  the minimum successive occurences of token
4193 * @max:  the maximum successive occurences of token
4194 * @data:  data associated to the transition
4195 *
4196 * If @to is NULL, this create first a new target state in the automata
4197 * and then adds a transition from the @from state to the target state
4198 * activated by a succession of input of value @token and whose number
4199 * is between @min and @max, moreover that transistion can only be crossed
4200 * once.
4201 *
4202 * Returns the target state or NULL in case of error
4203 */
4204xmlAutomataStatePtr
4205xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4206			 xmlAutomataStatePtr to, const xmlChar *token,
4207			 int min, int max, void *data) {
4208    xmlRegAtomPtr atom;
4209    int counter;
4210
4211    if ((am == NULL) || (from == NULL) || (token == NULL))
4212	return(NULL);
4213    if (min < 1)
4214	return(NULL);
4215    if ((max < min) || (max < 1))
4216	return(NULL);
4217    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
4218    if (atom == NULL)
4219	return(NULL);
4220    atom->valuep = xmlStrdup(token);
4221    atom->data = data;
4222    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
4223    if (min == 0)
4224	atom->min = 1;
4225    else
4226	atom->min = min;
4227    atom->max = max;
4228    /*
4229     * associate a counter to the transition.
4230     */
4231    counter = xmlRegGetCounter(am);
4232    am->counters[counter].min = 1;
4233    am->counters[counter].max = 1;
4234
4235    /* xmlFAGenerateTransitions(am, from, to, atom); */
4236    if (to == NULL) {
4237	to = xmlRegNewState(am);
4238	xmlRegStatePush(am, to);
4239    }
4240    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
4241    xmlRegAtomPush(am, atom);
4242    am->state = to;
4243    if (to == NULL)
4244	to = am->state;
4245    if (to == NULL)
4246	return(NULL);
4247    return(to);
4248}
4249
4250/**
4251 * xmlAutomataNewState:
4252 * @am: an automata
4253 *
4254 * Create a new disconnected state in the automata
4255 *
4256 * Returns the new state or NULL in case of error
4257 */
4258xmlAutomataStatePtr
4259xmlAutomataNewState(xmlAutomataPtr am) {
4260    xmlAutomataStatePtr to;
4261
4262    if (am == NULL)
4263	return(NULL);
4264    to = xmlRegNewState(am);
4265    xmlRegStatePush(am, to);
4266    return(to);
4267}
4268
4269/**
4270 * xmlAutomataNewEpsilon:
4271 * @am: an automata
4272 * @from: the starting point of the transition
4273 * @to: the target point of the transition or NULL
4274 *
4275 * If @to is NULL, this create first a new target state in the automata
4276 * and then adds a an epsilon transition from the @from state to the
4277 * target state
4278 *
4279 * Returns the target state or NULL in case of error
4280 */
4281xmlAutomataStatePtr
4282xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
4283		      xmlAutomataStatePtr to) {
4284    if ((am == NULL) || (from == NULL))
4285	return(NULL);
4286    xmlFAGenerateEpsilonTransition(am, from, to);
4287    if (to == NULL)
4288	return(am->state);
4289    return(to);
4290}
4291
4292/**
4293 * xmlAutomataNewAllTrans:
4294 * @am: an automata
4295 * @from: the starting point of the transition
4296 * @to: the target point of the transition or NULL
4297 * @lax: allow to transition if not all all transitions have been activated
4298 *
4299 * If @to is NULL, this create first a new target state in the automata
4300 * and then adds a an ALL transition from the @from state to the
4301 * target state. That transition is an epsilon transition allowed only when
4302 * all transitions from the @from node have been activated.
4303 *
4304 * Returns the target state or NULL in case of error
4305 */
4306xmlAutomataStatePtr
4307xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4308		       xmlAutomataStatePtr to, int lax) {
4309    if ((am == NULL) || (from == NULL))
4310	return(NULL);
4311    xmlFAGenerateAllTransition(am, from, to, lax);
4312    if (to == NULL)
4313	return(am->state);
4314    return(to);
4315}
4316
4317/**
4318 * xmlAutomataNewCounter:
4319 * @am: an automata
4320 * @min:  the minimal value on the counter
4321 * @max:  the maximal value on the counter
4322 *
4323 * Create a new counter
4324 *
4325 * Returns the counter number or -1 in case of error
4326 */
4327int
4328xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
4329    int ret;
4330
4331    if (am == NULL)
4332	return(-1);
4333
4334    ret = xmlRegGetCounter(am);
4335    if (ret < 0)
4336	return(-1);
4337    am->counters[ret].min = min;
4338    am->counters[ret].max = max;
4339    return(ret);
4340}
4341
4342/**
4343 * xmlAutomataNewCountedTrans:
4344 * @am: an automata
4345 * @from: the starting point of the transition
4346 * @to: the target point of the transition or NULL
4347 * @counter: the counter associated to that transition
4348 *
4349 * If @to is NULL, this create first a new target state in the automata
4350 * and then adds an epsilon transition from the @from state to the target state
4351 * which will increment the counter provided
4352 *
4353 * Returns the target state or NULL in case of error
4354 */
4355xmlAutomataStatePtr
4356xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4357		xmlAutomataStatePtr to, int counter) {
4358    if ((am == NULL) || (from == NULL) || (counter < 0))
4359	return(NULL);
4360    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
4361    if (to == NULL)
4362	return(am->state);
4363    return(to);
4364}
4365
4366/**
4367 * xmlAutomataNewCounterTrans:
4368 * @am: an automata
4369 * @from: the starting point of the transition
4370 * @to: the target point of the transition or NULL
4371 * @counter: the counter associated to that transition
4372 *
4373 * If @to is NULL, this create first a new target state in the automata
4374 * and then adds an epsilon transition from the @from state to the target state
4375 * which will be allowed only if the counter is within the right range.
4376 *
4377 * Returns the target state or NULL in case of error
4378 */
4379xmlAutomataStatePtr
4380xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
4381		xmlAutomataStatePtr to, int counter) {
4382    if ((am == NULL) || (from == NULL) || (counter < 0))
4383	return(NULL);
4384    xmlFAGenerateCountedTransition(am, from, to, counter);
4385    if (to == NULL)
4386	return(am->state);
4387    return(to);
4388}
4389
4390/**
4391 * xmlAutomataCompile:
4392 * @am: an automata
4393 *
4394 * Compile the automata into a Reg Exp ready for being executed.
4395 * The automata should be free after this point.
4396 *
4397 * Returns the compiled regexp or NULL in case of error
4398 */
4399xmlRegexpPtr
4400xmlAutomataCompile(xmlAutomataPtr am) {
4401    xmlRegexpPtr ret;
4402
4403    xmlFAEliminateEpsilonTransitions(am);
4404    /* xmlFAComputesDeterminism(am); */
4405    ret = xmlRegEpxFromParse(am);
4406
4407    return(ret);
4408}
4409
4410/**
4411 * xmlAutomataIsDeterminist:
4412 * @am: an automata
4413 *
4414 * Checks if an automata is determinist.
4415 *
4416 * Returns 1 if true, 0 if not, and -1 in case of error
4417 */
4418int
4419xmlAutomataIsDeterminist(xmlAutomataPtr am) {
4420    int ret;
4421
4422    if (am == NULL)
4423	return(-1);
4424
4425    ret = xmlFAComputesDeterminism(am);
4426    return(ret);
4427}
4428#endif /* LIBXML_AUTOMATA_ENABLED */
4429#endif /* LIBXML_REGEXP_ENABLED */
4430