xmlregexp.c revision 7646b18d64b6c739d04ca453493070e88c4aab13
1/*
2 * regexp.c: generic and extensible Regular Expression engine
3 *
4 * Basically designed with the purpose of compiling regexps for
5 * the variety of validation/shemas mechanisms now available in
6 * XML related specifications thise includes:
7 *    - XML-1.0 DTD validation
8 *    - XML Schemas structure part 1
9 *    - XML Schemas Datatypes part 2 especially Appendix F
10 *    - RELAX-NG/TREX i.e. the counter proposal
11 *
12 * See Copyright for the status of this software.
13 *
14 * Daniel Veillard <veillard@redhat.com>
15 */
16
17#define IN_LIBXML
18#include "libxml.h"
19
20#ifdef LIBXML_REGEXP_ENABLED
21
22#include <stdio.h>
23#include <string.h>
24#include <libxml/tree.h>
25#include <libxml/parserInternals.h>
26#include <libxml/xmlregexp.h>
27#include <libxml/xmlautomata.h>
28#include <libxml/xmlunicode.h>
29
30/* #define DEBUG_REGEXP_GRAPH  */
31/* #define DEBUG_REGEXP_EXEC */
32/* #define DEBUG_PUSH */
33
34#define ERROR(str) ctxt->error = 1;					\
35    xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
36#define NEXT ctxt->cur++
37#define CUR (*(ctxt->cur))
38#define NXT(index) (ctxt->cur[index])
39
40#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
41#define NEXTL(l) ctxt->cur += l;
42
43
44/************************************************************************
45 * 									*
46 * 			Datatypes and structures			*
47 * 									*
48 ************************************************************************/
49
50typedef enum {
51    XML_REGEXP_EPSILON = 1,
52    XML_REGEXP_CHARVAL,
53    XML_REGEXP_RANGES,
54    XML_REGEXP_SUBREG,
55    XML_REGEXP_STRING,
56    XML_REGEXP_ANYCHAR, /* . */
57    XML_REGEXP_ANYSPACE, /* \s */
58    XML_REGEXP_NOTSPACE, /* \S */
59    XML_REGEXP_INITNAME, /* \l */
60    XML_REGEXP_NOTINITNAME, /* \l */
61    XML_REGEXP_NAMECHAR, /* \c */
62    XML_REGEXP_NOTNAMECHAR, /* \C */
63    XML_REGEXP_DECIMAL, /* \d */
64    XML_REGEXP_NOTDECIMAL, /* \d */
65    XML_REGEXP_REALCHAR, /* \w */
66    XML_REGEXP_NOTREALCHAR, /* \w */
67    XML_REGEXP_LETTER,
68    XML_REGEXP_LETTER_UPPERCASE,
69    XML_REGEXP_LETTER_LOWERCASE,
70    XML_REGEXP_LETTER_TITLECASE,
71    XML_REGEXP_LETTER_MODIFIER,
72    XML_REGEXP_LETTER_OTHERS,
73    XML_REGEXP_MARK,
74    XML_REGEXP_MARK_NONSPACING,
75    XML_REGEXP_MARK_SPACECOMBINING,
76    XML_REGEXP_MARK_ENCLOSING,
77    XML_REGEXP_NUMBER,
78    XML_REGEXP_NUMBER_DECIMAL,
79    XML_REGEXP_NUMBER_LETTER,
80    XML_REGEXP_NUMBER_OTHERS,
81    XML_REGEXP_PUNCT,
82    XML_REGEXP_PUNCT_CONNECTOR,
83    XML_REGEXP_PUNCT_DASH,
84    XML_REGEXP_PUNCT_OPEN,
85    XML_REGEXP_PUNCT_CLOSE,
86    XML_REGEXP_PUNCT_INITQUOTE,
87    XML_REGEXP_PUNCT_FINQUOTE,
88    XML_REGEXP_PUNCT_OTHERS,
89    XML_REGEXP_SEPAR,
90    XML_REGEXP_SEPAR_SPACE,
91    XML_REGEXP_SEPAR_LINE,
92    XML_REGEXP_SEPAR_PARA,
93    XML_REGEXP_SYMBOL,
94    XML_REGEXP_SYMBOL_MATH,
95    XML_REGEXP_SYMBOL_CURRENCY,
96    XML_REGEXP_SYMBOL_MODIFIER,
97    XML_REGEXP_SYMBOL_OTHERS,
98    XML_REGEXP_OTHER,
99    XML_REGEXP_OTHER_CONTROL,
100    XML_REGEXP_OTHER_FORMAT,
101    XML_REGEXP_OTHER_PRIVATE,
102    XML_REGEXP_OTHER_NA,
103    XML_REGEXP_BLOCK_NAME
104} xmlRegAtomType;
105
106typedef enum {
107    XML_REGEXP_QUANT_EPSILON = 1,
108    XML_REGEXP_QUANT_ONCE,
109    XML_REGEXP_QUANT_OPT,
110    XML_REGEXP_QUANT_MULT,
111    XML_REGEXP_QUANT_PLUS,
112    XML_REGEXP_QUANT_ONCEONLY,
113    XML_REGEXP_QUANT_ALL,
114    XML_REGEXP_QUANT_RANGE
115} xmlRegQuantType;
116
117typedef enum {
118    XML_REGEXP_START_STATE = 1,
119    XML_REGEXP_FINAL_STATE,
120    XML_REGEXP_TRANS_STATE
121} xmlRegStateType;
122
123typedef enum {
124    XML_REGEXP_MARK_NORMAL = 0,
125    XML_REGEXP_MARK_START,
126    XML_REGEXP_MARK_VISITED
127} xmlRegMarkedType;
128
129typedef struct _xmlRegRange xmlRegRange;
130typedef xmlRegRange *xmlRegRangePtr;
131
132struct _xmlRegRange {
133    int neg;
134    xmlRegAtomType type;
135    int start;
136    int end;
137    xmlChar *blockName;
138};
139
140typedef struct _xmlRegAtom xmlRegAtom;
141typedef xmlRegAtom *xmlRegAtomPtr;
142
143typedef struct _xmlAutomataState xmlRegState;
144typedef xmlRegState *xmlRegStatePtr;
145
146struct _xmlRegAtom {
147    int no;
148    xmlRegAtomType type;
149    xmlRegQuantType quant;
150    int min;
151    int max;
152
153    void *valuep;
154    int neg;
155    int codepoint;
156    xmlRegStatePtr start;
157    xmlRegStatePtr stop;
158    int maxRanges;
159    int nbRanges;
160    xmlRegRangePtr *ranges;
161    void *data;
162};
163
164typedef struct _xmlRegCounter xmlRegCounter;
165typedef xmlRegCounter *xmlRegCounterPtr;
166
167struct _xmlRegCounter {
168    int min;
169    int max;
170};
171
172typedef struct _xmlRegTrans xmlRegTrans;
173typedef xmlRegTrans *xmlRegTransPtr;
174
175struct _xmlRegTrans {
176    xmlRegAtomPtr atom;
177    int to;
178    int counter;
179    int count;
180};
181
182struct _xmlAutomataState {
183    xmlRegStateType type;
184    xmlRegMarkedType mark;
185    int no;
186
187    int maxTrans;
188    int nbTrans;
189    xmlRegTrans *trans;
190};
191
192typedef struct _xmlAutomata xmlRegParserCtxt;
193typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
194
195struct _xmlAutomata {
196    xmlChar *string;
197    xmlChar *cur;
198
199    int error;
200    int neg;
201
202    xmlRegStatePtr start;
203    xmlRegStatePtr end;
204    xmlRegStatePtr state;
205
206    xmlRegAtomPtr atom;
207
208    int maxAtoms;
209    int nbAtoms;
210    xmlRegAtomPtr *atoms;
211
212    int maxStates;
213    int nbStates;
214    xmlRegStatePtr *states;
215
216    int maxCounters;
217    int nbCounters;
218    xmlRegCounter *counters;
219};
220
221struct _xmlRegexp {
222    xmlChar *string;
223    int nbStates;
224    xmlRegStatePtr *states;
225    int nbAtoms;
226    xmlRegAtomPtr *atoms;
227    int nbCounters;
228    xmlRegCounter *counters;
229};
230
231typedef struct _xmlRegExecRollback xmlRegExecRollback;
232typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
233
234struct _xmlRegExecRollback {
235    xmlRegStatePtr state;/* the current state */
236    int index;		/* the index in the input stack */
237    int nextbranch;	/* the next transition to explore in that state */
238    int *counts;	/* save the automate state if it has some */
239};
240
241typedef struct _xmlRegInputToken xmlRegInputToken;
242typedef xmlRegInputToken *xmlRegInputTokenPtr;
243
244struct _xmlRegInputToken {
245    xmlChar *value;
246    void *data;
247};
248
249struct _xmlRegExecCtxt {
250    int status;		/* execution status != 0 indicate an error */
251    int determinist;	/* did we found an inderterministic behaviour */
252    xmlRegexpPtr comp;	/* the compiled regexp */
253    xmlRegExecCallbacks callback;
254    void *data;
255
256    xmlRegStatePtr state;/* the current state */
257    int transno;	/* the current transition on that state */
258    int transcount;	/* the number of char in char counted transitions */
259
260    /*
261     * A stack of rollback states
262     */
263    int maxRollbacks;
264    int nbRollbacks;
265    xmlRegExecRollback *rollbacks;
266
267    /*
268     * The state of the automata if any
269     */
270    int *counts;
271
272    /*
273     * The input stack
274     */
275    int inputStackMax;
276    int inputStackNr;
277    int index;
278    int *charStack;
279    const xmlChar *inputString; /* when operating on characters */
280    xmlRegInputTokenPtr inputStack;/* when operating on strings */
281
282};
283
284#define REGEXP_ALL_COUNTER 0x123456
285
286static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
287
288/************************************************************************
289 * 									*
290 * 			Allocation/Deallocation				*
291 * 									*
292 ************************************************************************/
293
294/**
295 * xmlRegEpxFromParse:
296 * @ctxt:  the parser context used to build it
297 *
298 * Allocate a new regexp and fill it with the reult from the parser
299 *
300 * Returns the new regexp or NULL in case of error
301 */
302static xmlRegexpPtr
303xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
304    xmlRegexpPtr ret;
305
306    ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
307    if (ret == NULL)
308	return(NULL);
309    memset(ret, 0, sizeof(xmlRegexp));
310    ret->string = ctxt->string;
311    ctxt->string = NULL;
312    ret->nbStates = ctxt->nbStates;
313    ctxt->nbStates = 0;
314    ret->states = ctxt->states;
315    ctxt->states = NULL;
316    ret->nbAtoms = ctxt->nbAtoms;
317    ctxt->nbAtoms = 0;
318    ret->atoms = ctxt->atoms;
319    ctxt->atoms = NULL;
320    ret->nbCounters = ctxt->nbCounters;
321    ctxt->nbCounters = 0;
322    ret->counters = ctxt->counters;
323    ctxt->counters = NULL;
324    return(ret);
325}
326
327/**
328 * xmlRegNewParserCtxt:
329 * @string:  the string to parse
330 *
331 * Allocate a new regexp parser context
332 *
333 * Returns the new context or NULL in case of error
334 */
335static xmlRegParserCtxtPtr
336xmlRegNewParserCtxt(const xmlChar *string) {
337    xmlRegParserCtxtPtr ret;
338
339    ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
340    if (ret == NULL)
341	return(NULL);
342    memset(ret, 0, sizeof(xmlRegParserCtxt));
343    if (string != NULL)
344	ret->string = xmlStrdup(string);
345    ret->cur = ret->string;
346    ret->neg = 0;
347    ret->error = 0;
348    return(ret);
349}
350
351/**
352 * xmlRegNewRange:
353 * @ctxt:  the regexp parser context
354 * @neg:  is that negative
355 * @type:  the type of range
356 * @start:  the start codepoint
357 * @end:  the end codepoint
358 *
359 * Allocate a new regexp range
360 *
361 * Returns the new range or NULL in case of error
362 */
363static xmlRegRangePtr
364xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
365	       int neg, xmlRegAtomType type, int start, int end) {
366    xmlRegRangePtr ret;
367
368    ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
369    if (ret == NULL) {
370	ERROR("failed to allocate regexp range");
371	return(NULL);
372    }
373    ret->neg = neg;
374    ret->type = type;
375    ret->start = start;
376    ret->end = end;
377    return(ret);
378}
379
380/**
381 * xmlRegFreeRange:
382 * @range:  the regexp range
383 *
384 * Free a regexp range
385 */
386static void
387xmlRegFreeRange(xmlRegRangePtr range) {
388    if (range == NULL)
389	return;
390
391    if (range->blockName != NULL)
392	xmlFree(range->blockName);
393    xmlFree(range);
394}
395
396/**
397 * xmlRegNewAtom:
398 * @ctxt:  the regexp parser context
399 * @type:  the type of atom
400 *
401 * Allocate a new regexp range
402 *
403 * Returns the new atom or NULL in case of error
404 */
405static xmlRegAtomPtr
406xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
407    xmlRegAtomPtr ret;
408
409    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
410    if (ret == NULL) {
411	ERROR("failed to allocate regexp atom");
412	return(NULL);
413    }
414    memset(ret, 0, sizeof(xmlRegAtom));
415    ret->type = type;
416    ret->quant = XML_REGEXP_QUANT_ONCE;
417    ret->min = 0;
418    ret->max = 0;
419    return(ret);
420}
421
422/**
423 * xmlRegFreeAtom:
424 * @atom:  the regexp atom
425 *
426 * Free a regexp atom
427 */
428static void
429xmlRegFreeAtom(xmlRegAtomPtr atom) {
430    int i;
431
432    if (atom == NULL)
433	return;
434
435    for (i = 0;i < atom->nbRanges;i++)
436	xmlRegFreeRange(atom->ranges[i]);
437    if (atom->ranges != NULL)
438	xmlFree(atom->ranges);
439    if (atom->type == XML_REGEXP_STRING)
440	xmlFree(atom->valuep);
441    xmlFree(atom);
442}
443
444static xmlRegStatePtr
445xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
446    xmlRegStatePtr ret;
447
448    ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
449    if (ret == NULL) {
450	ERROR("failed to allocate regexp state");
451	return(NULL);
452    }
453    memset(ret, 0, sizeof(xmlRegState));
454    ret->type = XML_REGEXP_TRANS_STATE;
455    ret->mark = XML_REGEXP_MARK_NORMAL;
456    return(ret);
457}
458
459/**
460 * xmlRegFreeState:
461 * @state:  the regexp state
462 *
463 * Free a regexp state
464 */
465static void
466xmlRegFreeState(xmlRegStatePtr state) {
467    if (state == NULL)
468	return;
469
470    if (state->trans != NULL)
471	xmlFree(state->trans);
472    xmlFree(state);
473}
474
475/**
476 * xmlRegFreeParserCtxt:
477 * @ctxt:  the regexp parser context
478 *
479 * Free a regexp parser context
480 */
481static void
482xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
483    int i;
484    if (ctxt == NULL)
485	return;
486
487    if (ctxt->string != NULL)
488	xmlFree(ctxt->string);
489    if (ctxt->states != NULL) {
490	for (i = 0;i < ctxt->nbStates;i++)
491	    xmlRegFreeState(ctxt->states[i]);
492	xmlFree(ctxt->states);
493    }
494    if (ctxt->atoms != NULL) {
495	for (i = 0;i < ctxt->nbAtoms;i++)
496	    xmlRegFreeAtom(ctxt->atoms[i]);
497	xmlFree(ctxt->atoms);
498    }
499    if (ctxt->counters != NULL)
500	xmlFree(ctxt->counters);
501    xmlFree(ctxt);
502}
503
504/************************************************************************
505 * 									*
506 * 			Display of Data structures			*
507 * 									*
508 ************************************************************************/
509
510static void
511xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
512    switch (type) {
513        case XML_REGEXP_EPSILON:
514	    fprintf(output, "epsilon "); break;
515        case XML_REGEXP_CHARVAL:
516	    fprintf(output, "charval "); break;
517        case XML_REGEXP_RANGES:
518	    fprintf(output, "ranges "); break;
519        case XML_REGEXP_SUBREG:
520	    fprintf(output, "subexpr "); break;
521        case XML_REGEXP_STRING:
522	    fprintf(output, "string "); break;
523        case XML_REGEXP_ANYCHAR:
524	    fprintf(output, "anychar "); break;
525        case XML_REGEXP_ANYSPACE:
526	    fprintf(output, "anyspace "); break;
527        case XML_REGEXP_NOTSPACE:
528	    fprintf(output, "notspace "); break;
529        case XML_REGEXP_INITNAME:
530	    fprintf(output, "initname "); break;
531        case XML_REGEXP_NOTINITNAME:
532	    fprintf(output, "notinitname "); break;
533        case XML_REGEXP_NAMECHAR:
534	    fprintf(output, "namechar "); break;
535        case XML_REGEXP_NOTNAMECHAR:
536	    fprintf(output, "notnamechar "); break;
537        case XML_REGEXP_DECIMAL:
538	    fprintf(output, "decimal "); break;
539        case XML_REGEXP_NOTDECIMAL:
540	    fprintf(output, "notdecimal "); break;
541        case XML_REGEXP_REALCHAR:
542	    fprintf(output, "realchar "); break;
543        case XML_REGEXP_NOTREALCHAR:
544	    fprintf(output, "notrealchar "); break;
545        case XML_REGEXP_LETTER:
546            fprintf(output, "LETTER "); break;
547        case XML_REGEXP_LETTER_UPPERCASE:
548            fprintf(output, "LETTER_UPPERCASE "); break;
549        case XML_REGEXP_LETTER_LOWERCASE:
550            fprintf(output, "LETTER_LOWERCASE "); break;
551        case XML_REGEXP_LETTER_TITLECASE:
552            fprintf(output, "LETTER_TITLECASE "); break;
553        case XML_REGEXP_LETTER_MODIFIER:
554            fprintf(output, "LETTER_MODIFIER "); break;
555        case XML_REGEXP_LETTER_OTHERS:
556            fprintf(output, "LETTER_OTHERS "); break;
557        case XML_REGEXP_MARK:
558            fprintf(output, "MARK "); break;
559        case XML_REGEXP_MARK_NONSPACING:
560            fprintf(output, "MARK_NONSPACING "); break;
561        case XML_REGEXP_MARK_SPACECOMBINING:
562            fprintf(output, "MARK_SPACECOMBINING "); break;
563        case XML_REGEXP_MARK_ENCLOSING:
564            fprintf(output, "MARK_ENCLOSING "); break;
565        case XML_REGEXP_NUMBER:
566            fprintf(output, "NUMBER "); break;
567        case XML_REGEXP_NUMBER_DECIMAL:
568            fprintf(output, "NUMBER_DECIMAL "); break;
569        case XML_REGEXP_NUMBER_LETTER:
570            fprintf(output, "NUMBER_LETTER "); break;
571        case XML_REGEXP_NUMBER_OTHERS:
572            fprintf(output, "NUMBER_OTHERS "); break;
573        case XML_REGEXP_PUNCT:
574            fprintf(output, "PUNCT "); break;
575        case XML_REGEXP_PUNCT_CONNECTOR:
576            fprintf(output, "PUNCT_CONNECTOR "); break;
577        case XML_REGEXP_PUNCT_DASH:
578            fprintf(output, "PUNCT_DASH "); break;
579        case XML_REGEXP_PUNCT_OPEN:
580            fprintf(output, "PUNCT_OPEN "); break;
581        case XML_REGEXP_PUNCT_CLOSE:
582            fprintf(output, "PUNCT_CLOSE "); break;
583        case XML_REGEXP_PUNCT_INITQUOTE:
584            fprintf(output, "PUNCT_INITQUOTE "); break;
585        case XML_REGEXP_PUNCT_FINQUOTE:
586            fprintf(output, "PUNCT_FINQUOTE "); break;
587        case XML_REGEXP_PUNCT_OTHERS:
588            fprintf(output, "PUNCT_OTHERS "); break;
589        case XML_REGEXP_SEPAR:
590            fprintf(output, "SEPAR "); break;
591        case XML_REGEXP_SEPAR_SPACE:
592            fprintf(output, "SEPAR_SPACE "); break;
593        case XML_REGEXP_SEPAR_LINE:
594            fprintf(output, "SEPAR_LINE "); break;
595        case XML_REGEXP_SEPAR_PARA:
596            fprintf(output, "SEPAR_PARA "); break;
597        case XML_REGEXP_SYMBOL:
598            fprintf(output, "SYMBOL "); break;
599        case XML_REGEXP_SYMBOL_MATH:
600            fprintf(output, "SYMBOL_MATH "); break;
601        case XML_REGEXP_SYMBOL_CURRENCY:
602            fprintf(output, "SYMBOL_CURRENCY "); break;
603        case XML_REGEXP_SYMBOL_MODIFIER:
604            fprintf(output, "SYMBOL_MODIFIER "); break;
605        case XML_REGEXP_SYMBOL_OTHERS:
606            fprintf(output, "SYMBOL_OTHERS "); break;
607        case XML_REGEXP_OTHER:
608            fprintf(output, "OTHER "); break;
609        case XML_REGEXP_OTHER_CONTROL:
610            fprintf(output, "OTHER_CONTROL "); break;
611        case XML_REGEXP_OTHER_FORMAT:
612            fprintf(output, "OTHER_FORMAT "); break;
613        case XML_REGEXP_OTHER_PRIVATE:
614            fprintf(output, "OTHER_PRIVATE "); break;
615        case XML_REGEXP_OTHER_NA:
616            fprintf(output, "OTHER_NA "); break;
617        case XML_REGEXP_BLOCK_NAME:
618	    fprintf(output, "BLOCK "); break;
619    }
620}
621
622static void
623xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
624    switch (type) {
625        case XML_REGEXP_QUANT_EPSILON:
626	    fprintf(output, "epsilon "); break;
627        case XML_REGEXP_QUANT_ONCE:
628	    fprintf(output, "once "); break;
629        case XML_REGEXP_QUANT_OPT:
630	    fprintf(output, "? "); break;
631        case XML_REGEXP_QUANT_MULT:
632	    fprintf(output, "* "); break;
633        case XML_REGEXP_QUANT_PLUS:
634	    fprintf(output, "+ "); break;
635	case XML_REGEXP_QUANT_RANGE:
636	    fprintf(output, "range "); break;
637	case XML_REGEXP_QUANT_ONCEONLY:
638	    fprintf(output, "onceonly "); break;
639	case XML_REGEXP_QUANT_ALL:
640	    fprintf(output, "all "); break;
641    }
642}
643static void
644xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
645    fprintf(output, "  range: ");
646    if (range->neg)
647	fprintf(output, "negative ");
648    xmlRegPrintAtomType(output, range->type);
649    fprintf(output, "%c - %c\n", range->start, range->end);
650}
651
652static void
653xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
654    fprintf(output, " atom: ");
655    if (atom == NULL) {
656	fprintf(output, "NULL\n");
657	return;
658    }
659    xmlRegPrintAtomType(output, atom->type);
660    xmlRegPrintQuantType(output, atom->quant);
661    if (atom->quant == XML_REGEXP_QUANT_RANGE)
662	fprintf(output, "%d-%d ", atom->min, atom->max);
663    if (atom->type == XML_REGEXP_STRING)
664	fprintf(output, "'%s' ", (char *) atom->valuep);
665    if (atom->type == XML_REGEXP_CHARVAL)
666	fprintf(output, "char %c\n", atom->codepoint);
667    else if (atom->type == XML_REGEXP_RANGES) {
668	int i;
669	fprintf(output, "%d entries\n", atom->nbRanges);
670	for (i = 0; i < atom->nbRanges;i++)
671	    xmlRegPrintRange(output, atom->ranges[i]);
672    } else if (atom->type == XML_REGEXP_SUBREG) {
673	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
674    } else {
675	fprintf(output, "\n");
676    }
677}
678
679static void
680xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
681    fprintf(output, "  trans: ");
682    if (trans == NULL) {
683	fprintf(output, "NULL\n");
684	return;
685    }
686    if (trans->to < 0) {
687	fprintf(output, "removed\n");
688	return;
689    }
690    if (trans->counter >= 0) {
691	fprintf(output, "counted %d, ", trans->counter);
692    }
693    if (trans->count >= 0) {
694	fprintf(output, "count based %d, ", trans->count);
695    }
696    if (trans->atom == NULL) {
697	fprintf(output, "epsilon to %d\n", trans->to);
698	return;
699    }
700    if (trans->atom->type == XML_REGEXP_CHARVAL)
701	fprintf(output, "char %c ", trans->atom->codepoint);
702    fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
703}
704
705static void
706xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
707    int i;
708
709    fprintf(output, " state: ");
710    if (state == NULL) {
711	fprintf(output, "NULL\n");
712	return;
713    }
714    if (state->type == XML_REGEXP_START_STATE)
715	fprintf(output, "START ");
716    if (state->type == XML_REGEXP_FINAL_STATE)
717	fprintf(output, "FINAL ");
718
719    fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
720    for (i = 0;i < state->nbTrans; i++) {
721	xmlRegPrintTrans(output, &(state->trans[i]));
722    }
723}
724
725#if 0
726static void
727xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
728    int i;
729
730    fprintf(output, " ctxt: ");
731    if (ctxt == NULL) {
732	fprintf(output, "NULL\n");
733	return;
734    }
735    fprintf(output, "'%s' ", ctxt->string);
736    if (ctxt->error)
737	fprintf(output, "error ");
738    if (ctxt->neg)
739	fprintf(output, "neg ");
740    fprintf(output, "\n");
741    fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
742    for (i = 0;i < ctxt->nbAtoms; i++) {
743	fprintf(output, " %02d ", i);
744	xmlRegPrintAtom(output, ctxt->atoms[i]);
745    }
746    if (ctxt->atom != NULL) {
747	fprintf(output, "current atom:\n");
748	xmlRegPrintAtom(output, ctxt->atom);
749    }
750    fprintf(output, "%d states:", ctxt->nbStates);
751    if (ctxt->start != NULL)
752	fprintf(output, " start: %d", ctxt->start->no);
753    if (ctxt->end != NULL)
754	fprintf(output, " end: %d", ctxt->end->no);
755    fprintf(output, "\n");
756    for (i = 0;i < ctxt->nbStates; i++) {
757	xmlRegPrintState(output, ctxt->states[i]);
758    }
759    fprintf(output, "%d counters:\n", ctxt->nbCounters);
760    for (i = 0;i < ctxt->nbCounters; i++) {
761	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
762		                                ctxt->counters[i].max);
763    }
764}
765#endif
766
767/************************************************************************
768 * 									*
769 *		 Finite Automata structures manipulations		*
770 * 									*
771 ************************************************************************/
772
773static void
774xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
775	           int neg, xmlRegAtomType type, int start, int end,
776		   xmlChar *blockName) {
777    xmlRegRangePtr range;
778
779    if (atom == NULL) {
780	ERROR("add range: atom is NULL");
781	return;
782    }
783    if (atom->type != XML_REGEXP_RANGES) {
784	ERROR("add range: atom is not ranges");
785	return;
786    }
787    if (atom->maxRanges == 0) {
788	atom->maxRanges = 4;
789	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
790		                             sizeof(xmlRegRangePtr));
791	if (atom->ranges == NULL) {
792	    ERROR("add range: allocation failed");
793	    atom->maxRanges = 0;
794	    return;
795	}
796    } else if (atom->nbRanges >= atom->maxRanges) {
797	xmlRegRangePtr *tmp;
798	atom->maxRanges *= 2;
799	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
800		                             sizeof(xmlRegRangePtr));
801	if (tmp == NULL) {
802	    ERROR("add range: allocation failed");
803	    atom->maxRanges /= 2;
804	    return;
805	}
806	atom->ranges = tmp;
807    }
808    range = xmlRegNewRange(ctxt, neg, type, start, end);
809    if (range == NULL)
810	return;
811    range->blockName = blockName;
812    atom->ranges[atom->nbRanges++] = range;
813
814}
815
816static int
817xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
818    if (ctxt->maxCounters == 0) {
819	ctxt->maxCounters = 4;
820	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
821		                             sizeof(xmlRegCounter));
822	if (ctxt->counters == NULL) {
823	    ERROR("reg counter: allocation failed");
824	    ctxt->maxCounters = 0;
825	    return(-1);
826	}
827    } else if (ctxt->nbCounters >= ctxt->maxCounters) {
828	xmlRegCounter *tmp;
829	ctxt->maxCounters *= 2;
830	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
831		                           sizeof(xmlRegCounter));
832	if (tmp == NULL) {
833	    ERROR("reg counter: allocation failed");
834	    ctxt->maxCounters /= 2;
835	    return(-1);
836	}
837	ctxt->counters = tmp;
838    }
839    ctxt->counters[ctxt->nbCounters].min = -1;
840    ctxt->counters[ctxt->nbCounters].max = -1;
841    return(ctxt->nbCounters++);
842}
843
844static void
845xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
846    if (atom == NULL) {
847	ERROR("atom push: atom is NULL");
848	return;
849    }
850    if (ctxt->maxAtoms == 0) {
851	ctxt->maxAtoms = 4;
852	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
853		                             sizeof(xmlRegAtomPtr));
854	if (ctxt->atoms == NULL) {
855	    ERROR("atom push: allocation failed");
856	    ctxt->maxAtoms = 0;
857	    return;
858	}
859    } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
860	xmlRegAtomPtr *tmp;
861	ctxt->maxAtoms *= 2;
862	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
863		                             sizeof(xmlRegAtomPtr));
864	if (tmp == NULL) {
865	    ERROR("atom push: allocation failed");
866	    ctxt->maxAtoms /= 2;
867	    return;
868	}
869	ctxt->atoms = tmp;
870    }
871    atom->no = ctxt->nbAtoms;
872    ctxt->atoms[ctxt->nbAtoms++] = atom;
873}
874
875static void
876xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
877	            xmlRegAtomPtr atom, xmlRegStatePtr target,
878		    int counter, int count) {
879    if (state == NULL) {
880	ERROR("add state: state is NULL");
881	return;
882    }
883    if (target == NULL) {
884	ERROR("add state: target is NULL");
885	return;
886    }
887    if (state->maxTrans == 0) {
888	state->maxTrans = 4;
889	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
890		                             sizeof(xmlRegTrans));
891	if (state->trans == NULL) {
892	    ERROR("add range: allocation failed");
893	    state->maxTrans = 0;
894	    return;
895	}
896    } else if (state->nbTrans >= state->maxTrans) {
897	xmlRegTrans *tmp;
898	state->maxTrans *= 2;
899	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
900		                             sizeof(xmlRegTrans));
901	if (tmp == NULL) {
902	    ERROR("add range: allocation failed");
903	    state->maxTrans /= 2;
904	    return;
905	}
906	state->trans = tmp;
907    }
908#ifdef DEBUG_REGEXP_GRAPH
909    printf("Add trans from %d to %d ", state->no, target->no);
910    if (count >= 0)
911	printf("count based %d", count);
912    else if (counter >= 0)
913	printf("counted %d", counter);
914    else if (atom == NULL)
915	printf("epsilon transition");
916    printf("\n");
917#endif
918
919    state->trans[state->nbTrans].atom = atom;
920    state->trans[state->nbTrans].to = target->no;
921    state->trans[state->nbTrans].counter = counter;
922    state->trans[state->nbTrans].count = count;
923    state->nbTrans++;
924}
925
926static void
927xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
928    if (ctxt->maxStates == 0) {
929	ctxt->maxStates = 4;
930	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
931		                             sizeof(xmlRegStatePtr));
932	if (ctxt->states == NULL) {
933	    ERROR("add range: allocation failed");
934	    ctxt->maxStates = 0;
935	    return;
936	}
937    } else if (ctxt->nbStates >= ctxt->maxStates) {
938	xmlRegStatePtr *tmp;
939	ctxt->maxStates *= 2;
940	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
941		                             sizeof(xmlRegStatePtr));
942	if (tmp == NULL) {
943	    ERROR("add range: allocation failed");
944	    ctxt->maxStates /= 2;
945	    return;
946	}
947	ctxt->states = tmp;
948    }
949    state->no = ctxt->nbStates;
950    ctxt->states[ctxt->nbStates++] = state;
951}
952
953/**
954 * xmlFAGenerateAllTransition:
955 * ctxt:  a regexp parser context
956 * from:  the from state
957 * to:  the target state or NULL for building a new one
958 *
959 */
960static void
961xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
962			   xmlRegStatePtr from, xmlRegStatePtr to) {
963    if (to == NULL) {
964	to = xmlRegNewState(ctxt);
965	xmlRegStatePush(ctxt, to);
966	ctxt->state = to;
967    }
968    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
969}
970
971/**
972 * xmlFAGenerateEpsilonTransition:
973 * ctxt:  a regexp parser context
974 * from:  the from state
975 * to:  the target state or NULL for building a new one
976 *
977 */
978static void
979xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
980			       xmlRegStatePtr from, xmlRegStatePtr to) {
981    if (to == NULL) {
982	to = xmlRegNewState(ctxt);
983	xmlRegStatePush(ctxt, to);
984	ctxt->state = to;
985    }
986    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
987}
988
989/**
990 * xmlFAGenerateCountedEpsilonTransition:
991 * ctxt:  a regexp parser context
992 * from:  the from state
993 * to:  the target state or NULL for building a new one
994 * counter:  the counter for that transition
995 *
996 */
997static void
998xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
999	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1000    if (to == NULL) {
1001	to = xmlRegNewState(ctxt);
1002	xmlRegStatePush(ctxt, to);
1003	ctxt->state = to;
1004    }
1005    xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
1006}
1007
1008/**
1009 * xmlFAGenerateCountedTransition:
1010 * ctxt:  a regexp parser context
1011 * from:  the from state
1012 * to:  the target state or NULL for building a new one
1013 * counter:  the counter for that transition
1014 *
1015 */
1016static void
1017xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
1018	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
1019    if (to == NULL) {
1020	to = xmlRegNewState(ctxt);
1021	xmlRegStatePush(ctxt, to);
1022	ctxt->state = to;
1023    }
1024    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
1025}
1026
1027/**
1028 * xmlFAGenerateTransitions:
1029 * ctxt:  a regexp parser context
1030 * from:  the from state
1031 * to:  the target state or NULL for building a new one
1032 * atom:  the atom generating the transition
1033 *
1034 */
1035static void
1036xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
1037	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
1038    if (atom == NULL) {
1039	ERROR("genrate transition: atom == NULL");
1040	return;
1041    }
1042    if (atom->type == XML_REGEXP_SUBREG) {
1043	/*
1044	 * this is a subexpression handling one should not need to
1045	 * create a new node excep for XML_REGEXP_QUANT_RANGE.
1046	 */
1047	xmlRegAtomPush(ctxt, atom);
1048	if ((to != NULL) && (atom->stop != to) &&
1049	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
1050	    /*
1051	     * Generate an epsilon transition to link to the target
1052	     */
1053	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
1054	}
1055	switch (atom->quant) {
1056	    case XML_REGEXP_QUANT_OPT:
1057		atom->quant = XML_REGEXP_QUANT_ONCE;
1058		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1059		break;
1060	    case XML_REGEXP_QUANT_MULT:
1061		atom->quant = XML_REGEXP_QUANT_ONCE;
1062		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
1063		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1064		break;
1065	    case XML_REGEXP_QUANT_PLUS:
1066		atom->quant = XML_REGEXP_QUANT_ONCE;
1067		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
1068		break;
1069	    case XML_REGEXP_QUANT_RANGE: {
1070		int counter;
1071		xmlRegStatePtr newstate;
1072
1073		/*
1074		 * This one is nasty:
1075		 *   1/ register a new counter
1076		 *   2/ register an epsilon transition associated to
1077		 *      this counter going from atom->stop to atom->start
1078		 *   3/ create a new state
1079		 *   4/ generate a counted transition from atom->stop to
1080		 *      that state
1081		 */
1082		counter = xmlRegGetCounter(ctxt);
1083		ctxt->counters[counter].min = atom->min - 1;
1084		ctxt->counters[counter].max = atom->max - 1;
1085		atom->min = 0;
1086		atom->max = 0;
1087		atom->quant = XML_REGEXP_QUANT_ONCE;
1088		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
1089			                              atom->start, counter);
1090		if (to != NULL) {
1091		    newstate = to;
1092		} else {
1093		    newstate = xmlRegNewState(ctxt);
1094		    xmlRegStatePush(ctxt, newstate);
1095		    ctxt->state = newstate;
1096		}
1097		xmlFAGenerateCountedTransition(ctxt, atom->stop,
1098			                       newstate, counter);
1099	    }
1100	    default:
1101		break;
1102	}
1103	return;
1104    } else {
1105	if (to == NULL) {
1106	    to = xmlRegNewState(ctxt);
1107	    xmlRegStatePush(ctxt, to);
1108	}
1109	xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
1110	xmlRegAtomPush(ctxt, atom);
1111	ctxt->state = to;
1112    }
1113    switch (atom->quant) {
1114	case XML_REGEXP_QUANT_OPT:
1115	    atom->quant = XML_REGEXP_QUANT_ONCE;
1116	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1117	    break;
1118	case XML_REGEXP_QUANT_MULT:
1119	    atom->quant = XML_REGEXP_QUANT_ONCE;
1120	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
1121	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1122	    break;
1123	case XML_REGEXP_QUANT_PLUS:
1124	    atom->quant = XML_REGEXP_QUANT_ONCE;
1125	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
1126	    break;
1127	default:
1128	    break;
1129    }
1130}
1131
1132/**
1133 * xmlFAReduceEpsilonTransitions:
1134 * ctxt:  a regexp parser context
1135 * @fromnr:  the from state
1136 * @tonr:  the to state
1137 * @cpunter:  should that transition be associted to a counted
1138 *
1139 */
1140static void
1141xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
1142	                      int tonr, int counter) {
1143    int transnr;
1144    xmlRegStatePtr from;
1145    xmlRegStatePtr to;
1146
1147#ifdef DEBUG_REGEXP_GRAPH
1148    printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
1149#endif
1150    from = ctxt->states[fromnr];
1151    if (from == NULL)
1152	return;
1153    to = ctxt->states[tonr];
1154    if (to == NULL)
1155	return;
1156    if ((to->mark == XML_REGEXP_MARK_START) ||
1157	(to->mark == XML_REGEXP_MARK_VISITED))
1158	return;
1159
1160    to->mark = XML_REGEXP_MARK_VISITED;
1161    if (to->type == XML_REGEXP_FINAL_STATE) {
1162#ifdef DEBUG_REGEXP_GRAPH
1163	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
1164#endif
1165	from->type = XML_REGEXP_FINAL_STATE;
1166    }
1167    for (transnr = 0;transnr < to->nbTrans;transnr++) {
1168	if (to->trans[transnr].atom == NULL) {
1169	    /*
1170	     * Don't remove counted transitions
1171	     * Don't loop either
1172	     */
1173	    if (to->trans[transnr].to != fromnr) {
1174		if (to->trans[transnr].count >= 0) {
1175		    int newto = to->trans[transnr].to;
1176
1177		    xmlRegStateAddTrans(ctxt, from, NULL,
1178					ctxt->states[newto],
1179					-1, to->trans[transnr].count);
1180		} else {
1181#ifdef DEBUG_REGEXP_GRAPH
1182		    printf("Found epsilon trans %d from %d to %d\n",
1183			   transnr, tonr, to->trans[transnr].to);
1184#endif
1185		    if (to->trans[transnr].counter >= 0) {
1186			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1187					      to->trans[transnr].to,
1188					      to->trans[transnr].counter);
1189		    } else {
1190			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
1191					      to->trans[transnr].to,
1192					      counter);
1193		    }
1194		}
1195	    }
1196	} else {
1197	    int newto = to->trans[transnr].to;
1198
1199	    if (to->trans[transnr].counter >= 0) {
1200		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1201				    ctxt->states[newto],
1202				    to->trans[transnr].counter, -1);
1203	    } else {
1204		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
1205				    ctxt->states[newto], counter, -1);
1206	    }
1207
1208	}
1209    }
1210    to->mark = XML_REGEXP_MARK_NORMAL;
1211}
1212
1213/**
1214 * xmlFAEliminateEpsilonTransitions:
1215 * ctxt:  a regexp parser context
1216 *
1217 */
1218static void
1219xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
1220    int statenr, transnr;
1221    xmlRegStatePtr state;
1222
1223    /*
1224     * build the completed transitions bypassing the epsilons
1225     * Use a marking algorithm to avoid loops
1226     */
1227    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1228	state = ctxt->states[statenr];
1229	if (state == NULL)
1230	    continue;
1231	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1232	    if ((state->trans[transnr].atom == NULL) &&
1233		(state->trans[transnr].to >= 0)) {
1234		if (state->trans[transnr].to == statenr) {
1235		    state->trans[transnr].to = -1;
1236#ifdef DEBUG_REGEXP_GRAPH
1237		    printf("Removed loopback epsilon trans %d on %d\n",
1238			   transnr, statenr);
1239#endif
1240		} else if (state->trans[transnr].count < 0) {
1241		    int newto = state->trans[transnr].to;
1242
1243#ifdef DEBUG_REGEXP_GRAPH
1244		    printf("Found epsilon trans %d from %d to %d\n",
1245			   transnr, statenr, newto);
1246#endif
1247		    state->mark = XML_REGEXP_MARK_START;
1248		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
1249				      newto, state->trans[transnr].counter);
1250		    state->mark = XML_REGEXP_MARK_NORMAL;
1251#ifdef DEBUG_REGEXP_GRAPH
1252		} else {
1253		    printf("Found counted transition %d on %d\n",
1254			   transnr, statenr);
1255#endif
1256	        }
1257	    }
1258	}
1259    }
1260    /*
1261     * Eliminate the epsilon transitions
1262     */
1263    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
1264	state = ctxt->states[statenr];
1265	if (state == NULL)
1266	    continue;
1267	for (transnr = 0;transnr < state->nbTrans;transnr++) {
1268	    if ((state->trans[transnr].atom == NULL) &&
1269		(state->trans[transnr].count < 0) &&
1270		(state->trans[transnr].to >= 0)) {
1271		state->trans[transnr].to = -1;
1272	    }
1273	}
1274    }
1275}
1276
1277/************************************************************************
1278 * 									*
1279 *	Routines to check input against transition atoms		*
1280 * 									*
1281 ************************************************************************/
1282
1283static int
1284xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
1285	                  int start, int end, const xmlChar *blockName) {
1286    int ret = 0;
1287
1288    switch (type) {
1289        case XML_REGEXP_STRING:
1290        case XML_REGEXP_SUBREG:
1291        case XML_REGEXP_RANGES:
1292        case XML_REGEXP_EPSILON:
1293	    return(-1);
1294        case XML_REGEXP_ANYCHAR:
1295	    ret = ((codepoint != '\n') && (codepoint != '\r'));
1296	    break;
1297        case XML_REGEXP_CHARVAL:
1298	    ret = ((codepoint >= start) && (codepoint <= end));
1299	    break;
1300        case XML_REGEXP_NOTSPACE:
1301	    neg = !neg;
1302        case XML_REGEXP_ANYSPACE:
1303	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
1304		   (codepoint == '\t') || (codepoint == ' '));
1305	    break;
1306        case XML_REGEXP_NOTINITNAME:
1307	    neg = !neg;
1308        case XML_REGEXP_INITNAME:
1309	    ret = (xmlIsLetter(codepoint) ||
1310		   (codepoint == '_') || (codepoint == ':'));
1311	    break;
1312        case XML_REGEXP_NOTNAMECHAR:
1313	    neg = !neg;
1314        case XML_REGEXP_NAMECHAR:
1315	    ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
1316		   (codepoint == '.') || (codepoint == '-') ||
1317		   (codepoint == '_') || (codepoint == ':') ||
1318		   xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
1319	    break;
1320        case XML_REGEXP_NOTDECIMAL:
1321	    neg = !neg;
1322        case XML_REGEXP_DECIMAL:
1323	    ret = xmlUCSIsCatNd(codepoint);
1324	    break;
1325        case XML_REGEXP_REALCHAR:
1326	    neg = !neg;
1327        case XML_REGEXP_NOTREALCHAR:
1328	    ret = xmlUCSIsCatP(codepoint);
1329	    if (ret == 0)
1330		ret = xmlUCSIsCatZ(codepoint);
1331	    if (ret == 0)
1332		ret = xmlUCSIsCatC(codepoint);
1333	    break;
1334        case XML_REGEXP_LETTER:
1335	    ret = xmlUCSIsCatL(codepoint);
1336	    break;
1337        case XML_REGEXP_LETTER_UPPERCASE:
1338	    ret = xmlUCSIsCatLu(codepoint);
1339	    break;
1340        case XML_REGEXP_LETTER_LOWERCASE:
1341	    ret = xmlUCSIsCatLl(codepoint);
1342	    break;
1343        case XML_REGEXP_LETTER_TITLECASE:
1344	    ret = xmlUCSIsCatLt(codepoint);
1345	    break;
1346        case XML_REGEXP_LETTER_MODIFIER:
1347	    ret = xmlUCSIsCatLm(codepoint);
1348	    break;
1349        case XML_REGEXP_LETTER_OTHERS:
1350	    ret = xmlUCSIsCatLo(codepoint);
1351	    break;
1352        case XML_REGEXP_MARK:
1353	    ret = xmlUCSIsCatM(codepoint);
1354	    break;
1355        case XML_REGEXP_MARK_NONSPACING:
1356	    ret = xmlUCSIsCatMn(codepoint);
1357	    break;
1358        case XML_REGEXP_MARK_SPACECOMBINING:
1359	    ret = xmlUCSIsCatMc(codepoint);
1360	    break;
1361        case XML_REGEXP_MARK_ENCLOSING:
1362	    ret = xmlUCSIsCatMe(codepoint);
1363	    break;
1364        case XML_REGEXP_NUMBER:
1365	    ret = xmlUCSIsCatN(codepoint);
1366	    break;
1367        case XML_REGEXP_NUMBER_DECIMAL:
1368	    ret = xmlUCSIsCatNd(codepoint);
1369	    break;
1370        case XML_REGEXP_NUMBER_LETTER:
1371	    ret = xmlUCSIsCatNl(codepoint);
1372	    break;
1373        case XML_REGEXP_NUMBER_OTHERS:
1374	    ret = xmlUCSIsCatNo(codepoint);
1375	    break;
1376        case XML_REGEXP_PUNCT:
1377	    ret = xmlUCSIsCatP(codepoint);
1378	    break;
1379        case XML_REGEXP_PUNCT_CONNECTOR:
1380	    ret = xmlUCSIsCatPc(codepoint);
1381	    break;
1382        case XML_REGEXP_PUNCT_DASH:
1383	    ret = xmlUCSIsCatPd(codepoint);
1384	    break;
1385        case XML_REGEXP_PUNCT_OPEN:
1386	    ret = xmlUCSIsCatPs(codepoint);
1387	    break;
1388        case XML_REGEXP_PUNCT_CLOSE:
1389	    ret = xmlUCSIsCatPe(codepoint);
1390	    break;
1391        case XML_REGEXP_PUNCT_INITQUOTE:
1392	    ret = xmlUCSIsCatPi(codepoint);
1393	    break;
1394        case XML_REGEXP_PUNCT_FINQUOTE:
1395	    ret = xmlUCSIsCatPf(codepoint);
1396	    break;
1397        case XML_REGEXP_PUNCT_OTHERS:
1398	    ret = xmlUCSIsCatPo(codepoint);
1399	    break;
1400        case XML_REGEXP_SEPAR:
1401	    ret = xmlUCSIsCatZ(codepoint);
1402	    break;
1403        case XML_REGEXP_SEPAR_SPACE:
1404	    ret = xmlUCSIsCatZs(codepoint);
1405	    break;
1406        case XML_REGEXP_SEPAR_LINE:
1407	    ret = xmlUCSIsCatZl(codepoint);
1408	    break;
1409        case XML_REGEXP_SEPAR_PARA:
1410	    ret = xmlUCSIsCatZp(codepoint);
1411	    break;
1412        case XML_REGEXP_SYMBOL:
1413	    ret = xmlUCSIsCatS(codepoint);
1414	    break;
1415        case XML_REGEXP_SYMBOL_MATH:
1416	    ret = xmlUCSIsCatSm(codepoint);
1417	    break;
1418        case XML_REGEXP_SYMBOL_CURRENCY:
1419	    ret = xmlUCSIsCatSc(codepoint);
1420	    break;
1421        case XML_REGEXP_SYMBOL_MODIFIER:
1422	    ret = xmlUCSIsCatSk(codepoint);
1423	    break;
1424        case XML_REGEXP_SYMBOL_OTHERS:
1425	    ret = xmlUCSIsCatSo(codepoint);
1426	    break;
1427        case XML_REGEXP_OTHER:
1428	    ret = xmlUCSIsCatC(codepoint);
1429	    break;
1430        case XML_REGEXP_OTHER_CONTROL:
1431	    ret = xmlUCSIsCatCc(codepoint);
1432	    break;
1433        case XML_REGEXP_OTHER_FORMAT:
1434	    ret = xmlUCSIsCatCf(codepoint);
1435	    break;
1436        case XML_REGEXP_OTHER_PRIVATE:
1437	    ret = xmlUCSIsCatCo(codepoint);
1438	    break;
1439        case XML_REGEXP_OTHER_NA:
1440	    /* ret = xmlUCSIsCatCn(codepoint); */
1441	    /* Seems it doesn't exist anymore in recent Unicode releases */
1442	    ret = 0;
1443	    break;
1444        case XML_REGEXP_BLOCK_NAME:
1445	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
1446	    break;
1447    }
1448    if (neg)
1449	return(!ret);
1450    return(ret);
1451}
1452
1453static int
1454xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
1455    int i, ret = 0;
1456    xmlRegRangePtr range;
1457
1458    if ((atom == NULL) || (!xmlIsChar(codepoint)))
1459	return(-1);
1460
1461    switch (atom->type) {
1462        case XML_REGEXP_SUBREG:
1463        case XML_REGEXP_EPSILON:
1464	    return(-1);
1465        case XML_REGEXP_CHARVAL:
1466            return(codepoint == atom->codepoint);
1467        case XML_REGEXP_RANGES: {
1468	    int accept = 0;
1469	    for (i = 0;i < atom->nbRanges;i++) {
1470		range = atom->ranges[i];
1471		if (range->neg) {
1472		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1473						0, range->start, range->end,
1474						range->blockName);
1475		    if (ret != 0)
1476			return(0); /* excluded char */
1477		} else {
1478		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
1479						0, range->start, range->end,
1480						range->blockName);
1481		    if (ret != 0)
1482			accept = 1; /* might still be excluded */
1483		}
1484	    }
1485	    return(accept);
1486	}
1487        case XML_REGEXP_STRING:
1488	    printf("TODO: XML_REGEXP_STRING\n");
1489	    return(-1);
1490        case XML_REGEXP_ANYCHAR:
1491        case XML_REGEXP_ANYSPACE:
1492        case XML_REGEXP_NOTSPACE:
1493        case XML_REGEXP_INITNAME:
1494        case XML_REGEXP_NOTINITNAME:
1495        case XML_REGEXP_NAMECHAR:
1496        case XML_REGEXP_NOTNAMECHAR:
1497        case XML_REGEXP_DECIMAL:
1498        case XML_REGEXP_NOTDECIMAL:
1499        case XML_REGEXP_REALCHAR:
1500        case XML_REGEXP_NOTREALCHAR:
1501        case XML_REGEXP_LETTER:
1502        case XML_REGEXP_LETTER_UPPERCASE:
1503        case XML_REGEXP_LETTER_LOWERCASE:
1504        case XML_REGEXP_LETTER_TITLECASE:
1505        case XML_REGEXP_LETTER_MODIFIER:
1506        case XML_REGEXP_LETTER_OTHERS:
1507        case XML_REGEXP_MARK:
1508        case XML_REGEXP_MARK_NONSPACING:
1509        case XML_REGEXP_MARK_SPACECOMBINING:
1510        case XML_REGEXP_MARK_ENCLOSING:
1511        case XML_REGEXP_NUMBER:
1512        case XML_REGEXP_NUMBER_DECIMAL:
1513        case XML_REGEXP_NUMBER_LETTER:
1514        case XML_REGEXP_NUMBER_OTHERS:
1515        case XML_REGEXP_PUNCT:
1516        case XML_REGEXP_PUNCT_CONNECTOR:
1517        case XML_REGEXP_PUNCT_DASH:
1518        case XML_REGEXP_PUNCT_OPEN:
1519        case XML_REGEXP_PUNCT_CLOSE:
1520        case XML_REGEXP_PUNCT_INITQUOTE:
1521        case XML_REGEXP_PUNCT_FINQUOTE:
1522        case XML_REGEXP_PUNCT_OTHERS:
1523        case XML_REGEXP_SEPAR:
1524        case XML_REGEXP_SEPAR_SPACE:
1525        case XML_REGEXP_SEPAR_LINE:
1526        case XML_REGEXP_SEPAR_PARA:
1527        case XML_REGEXP_SYMBOL:
1528        case XML_REGEXP_SYMBOL_MATH:
1529        case XML_REGEXP_SYMBOL_CURRENCY:
1530        case XML_REGEXP_SYMBOL_MODIFIER:
1531        case XML_REGEXP_SYMBOL_OTHERS:
1532        case XML_REGEXP_OTHER:
1533        case XML_REGEXP_OTHER_CONTROL:
1534        case XML_REGEXP_OTHER_FORMAT:
1535        case XML_REGEXP_OTHER_PRIVATE:
1536        case XML_REGEXP_OTHER_NA:
1537	case XML_REGEXP_BLOCK_NAME:
1538	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
1539		                            (const xmlChar *)atom->valuep);
1540	    if (atom->neg)
1541		ret = !ret;
1542	    break;
1543    }
1544    return(ret);
1545}
1546
1547/************************************************************************
1548 * 									*
1549 *	Saving an restoring state of an execution context		*
1550 * 									*
1551 ************************************************************************/
1552
1553#ifdef DEBUG_REGEXP_EXEC
1554static void
1555xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
1556    printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
1557    if (exec->inputStack != NULL) {
1558	int i;
1559	printf(": ");
1560	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
1561	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
1562    } else {
1563	printf(": %s", &(exec->inputString[exec->index]));
1564    }
1565    printf("\n");
1566}
1567#endif
1568
1569static void
1570xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
1571#ifdef DEBUG_REGEXP_EXEC
1572    printf("saving ");
1573    exec->transno++;
1574    xmlFARegDebugExec(exec);
1575    exec->transno--;
1576#endif
1577
1578    if (exec->maxRollbacks == 0) {
1579	exec->maxRollbacks = 4;
1580	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
1581		                             sizeof(xmlRegExecRollback));
1582	if (exec->rollbacks == NULL) {
1583	    fprintf(stderr, "exec save: allocation failed");
1584	    exec->maxRollbacks = 0;
1585	    return;
1586	}
1587	memset(exec->rollbacks, 0,
1588	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
1589    } else if (exec->nbRollbacks >= exec->maxRollbacks) {
1590	xmlRegExecRollback *tmp;
1591	int len = exec->maxRollbacks;
1592
1593	exec->maxRollbacks *= 2;
1594	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
1595			exec->maxRollbacks * sizeof(xmlRegExecRollback));
1596	if (tmp == NULL) {
1597	    fprintf(stderr, "exec save: allocation failed");
1598	    exec->maxRollbacks /= 2;
1599	    return;
1600	}
1601	exec->rollbacks = tmp;
1602	tmp = &exec->rollbacks[len];
1603	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
1604    }
1605    exec->rollbacks[exec->nbRollbacks].state = exec->state;
1606    exec->rollbacks[exec->nbRollbacks].index = exec->index;
1607    exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
1608    if (exec->comp->nbCounters > 0) {
1609	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
1610	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
1611		xmlMalloc(exec->comp->nbCounters * sizeof(int));
1612	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
1613		fprintf(stderr, "exec save: allocation failed");
1614		exec->status = -5;
1615		return;
1616	    }
1617	}
1618	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
1619	       exec->comp->nbCounters * sizeof(int));
1620    }
1621    exec->nbRollbacks++;
1622}
1623
1624static void
1625xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
1626    if (exec->nbRollbacks <= 0) {
1627	exec->status = -1;
1628#ifdef DEBUG_REGEXP_EXEC
1629	printf("rollback failed on empty stack\n");
1630#endif
1631	return;
1632    }
1633    exec->nbRollbacks--;
1634    exec->state = exec->rollbacks[exec->nbRollbacks].state;
1635    exec->index = exec->rollbacks[exec->nbRollbacks].index;
1636    exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
1637    if (exec->comp->nbCounters > 0) {
1638	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
1639	    fprintf(stderr, "exec save: allocation failed");
1640	    exec->status = -6;
1641	    return;
1642	}
1643	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
1644	       exec->comp->nbCounters * sizeof(int));
1645    }
1646
1647#ifdef DEBUG_REGEXP_EXEC
1648    printf("restored ");
1649    xmlFARegDebugExec(exec);
1650#endif
1651}
1652
1653/************************************************************************
1654 * 									*
1655 *	Verifyer, running an input against a compiled regexp		*
1656 * 									*
1657 ************************************************************************/
1658
1659static int
1660xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
1661    xmlRegExecCtxt execval;
1662    xmlRegExecCtxtPtr exec = &execval;
1663    int ret, codepoint, len;
1664
1665    exec->inputString = content;
1666    exec->index = 0;
1667    exec->determinist = 1;
1668    exec->maxRollbacks = 0;
1669    exec->nbRollbacks = 0;
1670    exec->rollbacks = NULL;
1671    exec->status = 0;
1672    exec->comp = comp;
1673    exec->state = comp->states[0];
1674    exec->transno = 0;
1675    exec->transcount = 0;
1676    if (comp->nbCounters > 0) {
1677	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
1678	if (exec->counts == NULL)
1679	    return(-1);
1680        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
1681    } else
1682	exec->counts = NULL;
1683    while ((exec->status == 0) &&
1684	   ((exec->inputString[exec->index] != 0) ||
1685	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
1686	xmlRegTransPtr trans;
1687	xmlRegAtomPtr atom;
1688
1689	/*
1690	 * End of input on non-terminal state, rollback, however we may
1691	 * still have epsilon like transition for counted transitions
1692	 * on counters, in that case don't break too early.
1693	 */
1694	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
1695	    goto rollback;
1696
1697	exec->transcount = 0;
1698	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
1699	    trans = &exec->state->trans[exec->transno];
1700	    if (trans->to < 0)
1701		continue;
1702	    atom = trans->atom;
1703	    ret = 0;
1704	    if (trans->count >= 0) {
1705		int count;
1706		xmlRegCounterPtr counter;
1707
1708		/*
1709		 * A counted transition.
1710		 */
1711
1712		count = exec->counts[trans->count];
1713		counter = &exec->comp->counters[trans->count];
1714#ifdef DEBUG_REGEXP_EXEC
1715		printf("testing count %d: val %d, min %d, max %d\n",
1716		       trans->count, count, counter->min,  counter->max);
1717#endif
1718		ret = ((count >= counter->min) && (count <= counter->max));
1719	    } else if (atom == NULL) {
1720		fprintf(stderr, "epsilon transition left at runtime\n");
1721		exec->status = -2;
1722		break;
1723	    } else if (exec->inputString[exec->index] != 0) {
1724                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
1725		ret = xmlRegCheckCharacter(atom, codepoint);
1726		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
1727		    xmlRegStatePtr to = comp->states[trans->to];
1728
1729		    /*
1730		     * this is a multiple input sequence
1731		     */
1732		    if (exec->state->nbTrans > exec->transno + 1) {
1733			xmlFARegExecSave(exec);
1734		    }
1735		    exec->transcount = 1;
1736		    do {
1737			/*
1738			 * Try to progress as much as possible on the input
1739			 */
1740			if (exec->transcount == atom->max) {
1741			    break;
1742			}
1743			exec->index += len;
1744			/*
1745			 * End of input: stop here
1746			 */
1747			if (exec->inputString[exec->index] == 0) {
1748			    exec->index -= len;
1749			    break;
1750			}
1751			if (exec->transcount >= atom->min) {
1752			    int transno = exec->transno;
1753			    xmlRegStatePtr state = exec->state;
1754
1755			    /*
1756			     * The transition is acceptable save it
1757			     */
1758			    exec->transno = -1; /* trick */
1759			    exec->state = to;
1760			    xmlFARegExecSave(exec);
1761			    exec->transno = transno;
1762			    exec->state = state;
1763			}
1764			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
1765				              len);
1766			ret = xmlRegCheckCharacter(atom, codepoint);
1767			exec->transcount++;
1768		    } while (ret == 1);
1769		    if (exec->transcount < atom->min)
1770			ret = 0;
1771
1772		    /*
1773		     * If the last check failed but one transition was found
1774		     * possible, rollback
1775		     */
1776		    if (ret < 0)
1777			ret = 0;
1778		    if (ret == 0) {
1779			goto rollback;
1780		    }
1781		}
1782	    }
1783	    if (ret == 1) {
1784		if (exec->state->nbTrans > exec->transno + 1) {
1785		    xmlFARegExecSave(exec);
1786		}
1787		if (trans->counter >= 0) {
1788#ifdef DEBUG_REGEXP_EXEC
1789		    printf("Increasing count %d\n", trans->counter);
1790#endif
1791		    exec->counts[trans->counter]++;
1792		}
1793#ifdef DEBUG_REGEXP_EXEC
1794		printf("entering state %d\n", trans->to);
1795#endif
1796		exec->state = comp->states[trans->to];
1797		exec->transno = 0;
1798		if (trans->atom != NULL) {
1799		    exec->index += len;
1800		}
1801		goto progress;
1802	    } else if (ret < 0) {
1803		exec->status = -4;
1804		break;
1805	    }
1806	}
1807	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
1808rollback:
1809	    /*
1810	     * Failed to find a way out
1811	     */
1812	    exec->determinist = 0;
1813	    xmlFARegExecRollBack(exec);
1814	}
1815progress:
1816	continue;
1817    }
1818    if (exec->rollbacks != NULL) {
1819	if (exec->counts != NULL) {
1820	    int i;
1821
1822	    for (i = 0;i < exec->maxRollbacks;i++)
1823		if (exec->rollbacks[i].counts != NULL)
1824		    xmlFree(exec->rollbacks[i].counts);
1825	}
1826	xmlFree(exec->rollbacks);
1827    }
1828    if (exec->counts != NULL)
1829	xmlFree(exec->counts);
1830    if (exec->status == 0)
1831	return(1);
1832    if (exec->status == -1)
1833	return(0);
1834    return(exec->status);
1835}
1836
1837/************************************************************************
1838 * 									*
1839 *	Progressive interface to the verifyer one atom at a time	*
1840 * 									*
1841 ************************************************************************/
1842
1843/**
1844 * xmlRegExecCtxtPtr:
1845 * @comp: a precompiled regular expression
1846 * @callback: a callback function used for handling progresses in the
1847 *            automata matching phase
1848 * @data: the context data associated to the callback in this context
1849 *
1850 * Build a context used for progressive evaluation of a regexp.
1851 */
1852xmlRegExecCtxtPtr
1853xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
1854    xmlRegExecCtxtPtr exec;
1855
1856    if (comp == NULL)
1857	return(NULL);
1858    exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
1859    if (exec == NULL) {
1860	return(NULL);
1861    }
1862    memset(exec, 0, sizeof(xmlRegExecCtxt));
1863    exec->inputString = NULL;
1864    exec->index = 0;
1865    exec->determinist = 1;
1866    exec->maxRollbacks = 0;
1867    exec->nbRollbacks = 0;
1868    exec->rollbacks = NULL;
1869    exec->status = 0;
1870    exec->comp = comp;
1871    exec->state = comp->states[0];
1872    exec->transno = 0;
1873    exec->transcount = 0;
1874    exec->callback = callback;
1875    exec->data = data;
1876    if (comp->nbCounters > 0) {
1877	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
1878	if (exec->counts == NULL) {
1879	    xmlFree(exec);
1880	    return(NULL);
1881	}
1882        memset(exec->counts, 0, comp->nbCounters * sizeof(int));
1883    } else
1884	exec->counts = NULL;
1885    exec->inputStackMax = 0;
1886    exec->inputStackNr = 0;
1887    exec->inputStack = NULL;
1888    return(exec);
1889}
1890
1891/**
1892 * xmlRegFreeExecCtxt:
1893 * @exec: a regular expression evaulation context
1894 *
1895 * Free the structures associated to a regular expression evaulation context.
1896 */
1897void
1898xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
1899    if (exec == NULL)
1900	return;
1901
1902    if (exec->rollbacks != NULL) {
1903	if (exec->counts != NULL) {
1904	    int i;
1905
1906	    for (i = 0;i < exec->maxRollbacks;i++)
1907		if (exec->rollbacks[i].counts != NULL)
1908		    xmlFree(exec->rollbacks[i].counts);
1909	}
1910	xmlFree(exec->rollbacks);
1911    }
1912    if (exec->counts != NULL)
1913	xmlFree(exec->counts);
1914    if (exec->inputStack != NULL) {
1915	int i;
1916
1917	for (i = 0;i < exec->inputStackNr;i++)
1918	    xmlFree(exec->inputStack[i].value);
1919	xmlFree(exec->inputStack);
1920    }
1921    xmlFree(exec);
1922}
1923
1924static void
1925xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
1926	                    void *data) {
1927#ifdef DEBUG_PUSH
1928    printf("saving value: %d:%s\n", exec->inputStackNr, value);
1929#endif
1930    if (exec->inputStackMax == 0) {
1931	exec->inputStackMax = 4;
1932	exec->inputStack = (xmlRegInputTokenPtr)
1933	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
1934	if (exec->inputStack == NULL) {
1935	    fprintf(stderr, "push input: allocation failed");
1936	    exec->inputStackMax = 0;
1937	    return;
1938	}
1939    } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
1940	xmlRegInputTokenPtr tmp;
1941
1942	exec->inputStackMax *= 2;
1943	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
1944			exec->inputStackMax * sizeof(xmlRegInputToken));
1945	if (tmp == NULL) {
1946	    fprintf(stderr, "push input: allocation failed");
1947	    exec->inputStackMax /= 2;
1948	    return;
1949	}
1950	exec->inputStack = tmp;
1951    }
1952    exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
1953    exec->inputStack[exec->inputStackNr].data = data;
1954    exec->inputStackNr++;
1955    exec->inputStack[exec->inputStackNr].value = NULL;
1956    exec->inputStack[exec->inputStackNr].data = NULL;
1957}
1958
1959
1960/**
1961 * xmlRegExecPushString:
1962 * @exec: a regexp execution context
1963 * @value: a string token input
1964 * @data: data associated to the token to reuse in callbacks
1965 *
1966 * Push one input token in the execution context
1967 *
1968 * Returns: 1 if the regexp reached a final state, 0 if non-final, and
1969 *     a negative value in case of error.
1970 */
1971int
1972xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
1973	             void *data) {
1974    xmlRegTransPtr trans;
1975    xmlRegAtomPtr atom;
1976    int ret;
1977    int final = 0;
1978
1979    if (exec == NULL)
1980	return(-1);
1981    if (exec->status != 0)
1982	return(exec->status);
1983
1984    if (value == NULL) {
1985        if (exec->state->type == XML_REGEXP_FINAL_STATE)
1986	    return(1);
1987	final = 1;
1988    }
1989
1990#ifdef DEBUG_PUSH
1991    printf("value pushed: %s\n", value);
1992#endif
1993    /*
1994     * If we have an active rollback stack push the new value there
1995     * and get back to where we were left
1996     */
1997    if ((value != NULL) && (exec->inputStackNr > 0)) {
1998	xmlFARegExecSaveInputString(exec, value, data);
1999	value = exec->inputStack[exec->index].value;
2000	data = exec->inputStack[exec->index].data;
2001#ifdef DEBUG_PUSH
2002	printf("value loaded: %s\n", value);
2003#endif
2004    }
2005
2006    while ((exec->status == 0) &&
2007	   ((value != NULL) ||
2008	    ((final == 1) &&
2009	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
2010
2011	/*
2012	 * End of input on non-terminal state, rollback, however we may
2013	 * still have epsilon like transition for counted transitions
2014	 * on counters, in that case don't break too early.
2015	 */
2016	if ((value == NULL) && (exec->counts == NULL))
2017	    goto rollback;
2018
2019	exec->transcount = 0;
2020	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2021	    trans = &exec->state->trans[exec->transno];
2022	    if (trans->to < 0)
2023		continue;
2024	    atom = trans->atom;
2025	    ret = 0;
2026	    if (trans->count >= 0) {
2027		int count;
2028		xmlRegCounterPtr counter;
2029
2030		/*
2031		 * A counted transition.
2032		 */
2033
2034		count = exec->counts[trans->count];
2035		counter = &exec->comp->counters[trans->count];
2036#ifdef DEBUG_PUSH
2037		printf("testing count %d: val %d, min %d, max %d\n",
2038		       trans->count, count, counter->min,  counter->max);
2039#endif
2040		ret = ((count >= counter->min) && (count <= counter->max));
2041	    } else if (atom == NULL) {
2042		fprintf(stderr, "epsilon transition left at runtime\n");
2043		exec->status = -2;
2044		break;
2045	    } else if (value != NULL) {
2046		ret = xmlStrEqual(value, atom->valuep);
2047		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2048		    xmlRegStatePtr to = exec->comp->states[trans->to];
2049
2050		    /*
2051		     * this is a multiple input sequence
2052		     */
2053		    if (exec->state->nbTrans > exec->transno + 1) {
2054			if (exec->inputStackNr <= 0) {
2055			    xmlFARegExecSaveInputString(exec, value, data);
2056			}
2057			xmlFARegExecSave(exec);
2058		    }
2059		    exec->transcount = 1;
2060		    do {
2061			/*
2062			 * Try to progress as much as possible on the input
2063			 */
2064			if (exec->transcount == atom->max) {
2065			    break;
2066			}
2067			exec->index++;
2068			value = exec->inputStack[exec->index].value;
2069			data = exec->inputStack[exec->index].data;
2070#ifdef DEBUG_PUSH
2071			printf("value loaded: %s\n", value);
2072#endif
2073
2074			/*
2075			 * End of input: stop here
2076			 */
2077			if (value == NULL) {
2078			    exec->index --;
2079			    break;
2080			}
2081			if (exec->transcount >= atom->min) {
2082			    int transno = exec->transno;
2083			    xmlRegStatePtr state = exec->state;
2084
2085			    /*
2086			     * The transition is acceptable save it
2087			     */
2088			    exec->transno = -1; /* trick */
2089			    exec->state = to;
2090			    if (exec->inputStackNr <= 0) {
2091				xmlFARegExecSaveInputString(exec, value, data);
2092			    }
2093			    xmlFARegExecSave(exec);
2094			    exec->transno = transno;
2095			    exec->state = state;
2096			}
2097			ret = xmlStrEqual(value, atom->valuep);
2098			exec->transcount++;
2099		    } while (ret == 1);
2100		    if (exec->transcount < atom->min)
2101			ret = 0;
2102
2103		    /*
2104		     * If the last check failed but one transition was found
2105		     * possible, rollback
2106		     */
2107		    if (ret < 0)
2108			ret = 0;
2109		    if (ret == 0) {
2110			goto rollback;
2111		    }
2112		}
2113	    }
2114	    if (ret == 1) {
2115		if ((exec->callback != NULL) && (atom != NULL)) {
2116		    exec->callback(exec->data, atom->valuep,
2117			           atom->data, data);
2118		}
2119		if (exec->state->nbTrans > exec->transno + 1) {
2120		    if (exec->inputStackNr <= 0) {
2121			xmlFARegExecSaveInputString(exec, value, data);
2122		    }
2123		    xmlFARegExecSave(exec);
2124		}
2125		if (trans->counter >= 0) {
2126#ifdef DEBUG_PUSH
2127		    printf("Increasing count %d\n", trans->counter);
2128#endif
2129		    exec->counts[trans->counter]++;
2130		}
2131#ifdef DEBUG_PUSH
2132		printf("entering state %d\n", trans->to);
2133#endif
2134		exec->state = exec->comp->states[trans->to];
2135		exec->transno = 0;
2136		if (trans->atom != NULL) {
2137		    if (exec->inputStack != NULL) {
2138			exec->index++;
2139			if (exec->index < exec->inputStackNr) {
2140			    value = exec->inputStack[exec->index].value;
2141			    data = exec->inputStack[exec->index].data;
2142#ifdef DEBUG_PUSH
2143			    printf("value loaded: %s\n", value);
2144#endif
2145			} else {
2146			    value = NULL;
2147			    data = NULL;
2148#ifdef DEBUG_PUSH
2149			    printf("end of input\n");
2150#endif
2151			}
2152		    } else {
2153			value = NULL;
2154			data = NULL;
2155#ifdef DEBUG_PUSH
2156			printf("end of input\n");
2157#endif
2158		    }
2159		}
2160		goto progress;
2161	    } else if (ret < 0) {
2162		exec->status = -4;
2163		break;
2164	    }
2165	}
2166	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2167rollback:
2168	    /*
2169	     * Failed to find a way out
2170	     */
2171	    exec->determinist = 0;
2172	    xmlFARegExecRollBack(exec);
2173	    if (exec->status == 0) {
2174		value = exec->inputStack[exec->index].value;
2175		data = exec->inputStack[exec->index].data;
2176#ifdef DEBUG_PUSH
2177		printf("value loaded: %s\n", value);
2178#endif
2179	    }
2180	}
2181progress:
2182	continue;
2183    }
2184    if (exec->status == 0) {
2185        return(exec->state->type == XML_REGEXP_FINAL_STATE);
2186    }
2187    return(exec->status);
2188}
2189
2190#if 0
2191static int
2192xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
2193    xmlRegTransPtr trans;
2194    xmlRegAtomPtr atom;
2195    int ret;
2196    int codepoint, len;
2197
2198    if (exec == NULL)
2199	return(-1);
2200    if (exec->status != 0)
2201	return(exec->status);
2202
2203    while ((exec->status == 0) &&
2204	   ((exec->inputString[exec->index] != 0) ||
2205	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
2206
2207	/*
2208	 * End of input on non-terminal state, rollback, however we may
2209	 * still have epsilon like transition for counted transitions
2210	 * on counters, in that case don't break too early.
2211	 */
2212	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
2213	    goto rollback;
2214
2215	exec->transcount = 0;
2216	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
2217	    trans = &exec->state->trans[exec->transno];
2218	    if (trans->to < 0)
2219		continue;
2220	    atom = trans->atom;
2221	    ret = 0;
2222	    if (trans->count >= 0) {
2223		int count;
2224		xmlRegCounterPtr counter;
2225
2226		/*
2227		 * A counted transition.
2228		 */
2229
2230		count = exec->counts[trans->count];
2231		counter = &exec->comp->counters[trans->count];
2232#ifdef DEBUG_REGEXP_EXEC
2233		printf("testing count %d: val %d, min %d, max %d\n",
2234		       trans->count, count, counter->min,  counter->max);
2235#endif
2236		ret = ((count >= counter->min) && (count <= counter->max));
2237	    } else if (atom == NULL) {
2238		fprintf(stderr, "epsilon transition left at runtime\n");
2239		exec->status = -2;
2240		break;
2241	    } else if (exec->inputString[exec->index] != 0) {
2242                codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
2243		ret = xmlRegCheckCharacter(atom, codepoint);
2244		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
2245		    xmlRegStatePtr to = exec->comp->states[trans->to];
2246
2247		    /*
2248		     * this is a multiple input sequence
2249		     */
2250		    if (exec->state->nbTrans > exec->transno + 1) {
2251			xmlFARegExecSave(exec);
2252		    }
2253		    exec->transcount = 1;
2254		    do {
2255			/*
2256			 * Try to progress as much as possible on the input
2257			 */
2258			if (exec->transcount == atom->max) {
2259			    break;
2260			}
2261			exec->index += len;
2262			/*
2263			 * End of input: stop here
2264			 */
2265			if (exec->inputString[exec->index] == 0) {
2266			    exec->index -= len;
2267			    break;
2268			}
2269			if (exec->transcount >= atom->min) {
2270			    int transno = exec->transno;
2271			    xmlRegStatePtr state = exec->state;
2272
2273			    /*
2274			     * The transition is acceptable save it
2275			     */
2276			    exec->transno = -1; /* trick */
2277			    exec->state = to;
2278			    xmlFARegExecSave(exec);
2279			    exec->transno = transno;
2280			    exec->state = state;
2281			}
2282			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
2283				              len);
2284			ret = xmlRegCheckCharacter(atom, codepoint);
2285			exec->transcount++;
2286		    } while (ret == 1);
2287		    if (exec->transcount < atom->min)
2288			ret = 0;
2289
2290		    /*
2291		     * If the last check failed but one transition was found
2292		     * possible, rollback
2293		     */
2294		    if (ret < 0)
2295			ret = 0;
2296		    if (ret == 0) {
2297			goto rollback;
2298		    }
2299		}
2300	    }
2301	    if (ret == 1) {
2302		if (exec->state->nbTrans > exec->transno + 1) {
2303		    xmlFARegExecSave(exec);
2304		}
2305		if (trans->counter >= 0) {
2306#ifdef DEBUG_REGEXP_EXEC
2307		    printf("Increasing count %d\n", trans->counter);
2308#endif
2309		    exec->counts[trans->counter]++;
2310		}
2311#ifdef DEBUG_REGEXP_EXEC
2312		printf("entering state %d\n", trans->to);
2313#endif
2314		exec->state = exec->comp->states[trans->to];
2315		exec->transno = 0;
2316		if (trans->atom != NULL) {
2317		    exec->index += len;
2318		}
2319		goto progress;
2320	    } else if (ret < 0) {
2321		exec->status = -4;
2322		break;
2323	    }
2324	}
2325	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
2326rollback:
2327	    /*
2328	     * Failed to find a way out
2329	     */
2330	    exec->determinist = 0;
2331	    xmlFARegExecRollBack(exec);
2332	}
2333progress:
2334	continue;
2335    }
2336}
2337#endif
2338/************************************************************************
2339 * 									*
2340 *	Parser for the Shemas Datatype Regular Expressions		*
2341 *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
2342 * 									*
2343 ************************************************************************/
2344
2345/**
2346 * xmlFAIsChar:
2347 * ctxt:  a regexp parser context
2348 *
2349 * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
2350 */
2351static int
2352xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
2353    int cur;
2354    int len;
2355
2356    cur = CUR_SCHAR(ctxt->cur, len);
2357    if ((cur == '.') || (cur == '\\') || (cur == '?') ||
2358	(cur == '*') || (cur == '+') || (cur == '(') ||
2359	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
2360	(cur == 0x5D) || (cur == 0))
2361	return(-1);
2362    return(cur);
2363}
2364
2365/**
2366 * xmlFAParseCharProp:
2367 * ctxt:  a regexp parser context
2368 *
2369 * [27]   charProp   ::=   IsCategory | IsBlock
2370 * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
2371 *                       Separators | Symbols | Others
2372 * [29]   Letters   ::=   'L' [ultmo]?
2373 * [30]   Marks   ::=   'M' [nce]?
2374 * [31]   Numbers   ::=   'N' [dlo]?
2375 * [32]   Punctuation   ::=   'P' [cdseifo]?
2376 * [33]   Separators   ::=   'Z' [slp]?
2377 * [34]   Symbols   ::=   'S' [mcko]?
2378 * [35]   Others   ::=   'C' [cfon]?
2379 * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
2380 */
2381static void
2382xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
2383    int cur;
2384    xmlRegAtomType type = 0;
2385    xmlChar *blockName = NULL;
2386
2387    cur = CUR;
2388    if (cur == 'L') {
2389	NEXT;
2390	cur = CUR;
2391	if (cur == 'u') {
2392	    NEXT;
2393	    type = XML_REGEXP_LETTER_UPPERCASE;
2394	} else if (cur == 'l') {
2395	    NEXT;
2396	    type = XML_REGEXP_LETTER_LOWERCASE;
2397	} else if (cur == 't') {
2398	    NEXT;
2399	    type = XML_REGEXP_LETTER_TITLECASE;
2400	} else if (cur == 'm') {
2401	    NEXT;
2402	    type = XML_REGEXP_LETTER_MODIFIER;
2403	} else if (cur == 'o') {
2404	    NEXT;
2405	    type = XML_REGEXP_LETTER_OTHERS;
2406	} else {
2407	    type = XML_REGEXP_LETTER;
2408	}
2409    } else if (cur == 'M') {
2410	NEXT;
2411	cur = CUR;
2412	if (cur == 'n') {
2413	    NEXT;
2414	    /* nonspacing */
2415	    type = XML_REGEXP_MARK_NONSPACING;
2416	} else if (cur == 'c') {
2417	    NEXT;
2418	    /* spacing combining */
2419	    type = XML_REGEXP_MARK_SPACECOMBINING;
2420	} else if (cur == 'e') {
2421	    NEXT;
2422	    /* enclosing */
2423	    type = XML_REGEXP_MARK_ENCLOSING;
2424	} else {
2425	    /* all marks */
2426	    type = XML_REGEXP_MARK;
2427	}
2428    } else if (cur == 'N') {
2429	NEXT;
2430	cur = CUR;
2431	if (cur == 'd') {
2432	    NEXT;
2433	    /* digital */
2434	    type = XML_REGEXP_NUMBER_DECIMAL;
2435	} else if (cur == 'l') {
2436	    NEXT;
2437	    /* letter */
2438	    type = XML_REGEXP_NUMBER_LETTER;
2439	} else if (cur == 'o') {
2440	    NEXT;
2441	    /* other */
2442	    type = XML_REGEXP_NUMBER_OTHERS;
2443	} else {
2444	    /* all numbers */
2445	    type = XML_REGEXP_NUMBER;
2446	}
2447    } else if (cur == 'P') {
2448	NEXT;
2449	cur = CUR;
2450	if (cur == 'c') {
2451	    NEXT;
2452	    /* connector */
2453	    type = XML_REGEXP_PUNCT_CONNECTOR;
2454	} else if (cur == 'd') {
2455	    NEXT;
2456	    /* dash */
2457	    type = XML_REGEXP_PUNCT_DASH;
2458	} else if (cur == 's') {
2459	    NEXT;
2460	    /* open */
2461	    type = XML_REGEXP_PUNCT_OPEN;
2462	} else if (cur == 'e') {
2463	    NEXT;
2464	    /* close */
2465	    type = XML_REGEXP_PUNCT_CLOSE;
2466	} else if (cur == 'i') {
2467	    NEXT;
2468	    /* initial quote */
2469	    type = XML_REGEXP_PUNCT_INITQUOTE;
2470	} else if (cur == 'f') {
2471	    NEXT;
2472	    /* final quote */
2473	    type = XML_REGEXP_PUNCT_FINQUOTE;
2474	} else if (cur == 'o') {
2475	    NEXT;
2476	    /* other */
2477	    type = XML_REGEXP_PUNCT_OTHERS;
2478	} else {
2479	    /* all punctuation */
2480	    type = XML_REGEXP_PUNCT;
2481	}
2482    } else if (cur == 'Z') {
2483	NEXT;
2484	cur = CUR;
2485	if (cur == 's') {
2486	    NEXT;
2487	    /* space */
2488	    type = XML_REGEXP_SEPAR_SPACE;
2489	} else if (cur == 'l') {
2490	    NEXT;
2491	    /* line */
2492	    type = XML_REGEXP_SEPAR_LINE;
2493	} else if (cur == 'p') {
2494	    NEXT;
2495	    /* paragraph */
2496	    type = XML_REGEXP_SEPAR_PARA;
2497	} else {
2498	    /* all separators */
2499	    type = XML_REGEXP_SEPAR;
2500	}
2501    } else if (cur == 'S') {
2502	NEXT;
2503	cur = CUR;
2504	if (cur == 'm') {
2505	    NEXT;
2506	    type = XML_REGEXP_SYMBOL_MATH;
2507	    /* math */
2508	} else if (cur == 'c') {
2509	    NEXT;
2510	    type = XML_REGEXP_SYMBOL_CURRENCY;
2511	    /* currency */
2512	} else if (cur == 'k') {
2513	    NEXT;
2514	    type = XML_REGEXP_SYMBOL_MODIFIER;
2515	    /* modifiers */
2516	} else if (cur == 'o') {
2517	    NEXT;
2518	    type = XML_REGEXP_SYMBOL_OTHERS;
2519	    /* other */
2520	} else {
2521	    /* all symbols */
2522	    type = XML_REGEXP_SYMBOL;
2523	}
2524    } else if (cur == 'C') {
2525	NEXT;
2526	cur = CUR;
2527	if (cur == 'c') {
2528	    NEXT;
2529	    /* control */
2530	    type = XML_REGEXP_OTHER_CONTROL;
2531	} else if (cur == 'f') {
2532	    NEXT;
2533	    /* format */
2534	    type = XML_REGEXP_OTHER_FORMAT;
2535	} else if (cur == 'o') {
2536	    NEXT;
2537	    /* private use */
2538	    type = XML_REGEXP_OTHER_PRIVATE;
2539	} else if (cur == 'n') {
2540	    NEXT;
2541	    /* not assigned */
2542	    type = XML_REGEXP_OTHER_NA;
2543	} else {
2544	    /* all others */
2545	    type = XML_REGEXP_OTHER;
2546	}
2547    } else if (cur == 'I') {
2548	const xmlChar *start;
2549	NEXT;
2550	cur = CUR;
2551	if (cur != 's') {
2552	    ERROR("IsXXXX expected");
2553	    return;
2554	}
2555	NEXT;
2556	start = ctxt->cur;
2557	cur = CUR;
2558	if (((cur >= 'a') && (cur <= 'z')) ||
2559	    ((cur >= 'A') && (cur <= 'Z')) ||
2560	    ((cur >= '0') && (cur <= '9')) ||
2561	    (cur == 0x2D)) {
2562	    NEXT;
2563	    cur = CUR;
2564	    while (((cur >= 'a') && (cur <= 'z')) ||
2565		((cur >= 'A') && (cur <= 'Z')) ||
2566		((cur >= '0') && (cur <= '9')) ||
2567		(cur == 0x2D)) {
2568		NEXT;
2569		cur = CUR;
2570	    }
2571	}
2572	type = XML_REGEXP_BLOCK_NAME;
2573	blockName = xmlStrndup(start, ctxt->cur - start);
2574    } else {
2575	ERROR("Unknown char property");
2576	return;
2577    }
2578    if (ctxt->atom == NULL) {
2579	ctxt->atom = xmlRegNewAtom(ctxt, type);
2580	if (ctxt->atom != NULL)
2581	    ctxt->atom->valuep = blockName;
2582    } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
2583        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2584		           type, 0, 0, blockName);
2585    }
2586}
2587
2588/**
2589 * xmlFAParseCharClassEsc:
2590 * ctxt:  a regexp parser context
2591 *
2592 * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
2593 * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
2594 * [25] catEsc   ::=   '\p{' charProp '}'
2595 * [26] complEsc ::=   '\P{' charProp '}'
2596 * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
2597 */
2598static void
2599xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
2600    int cur;
2601
2602    if (CUR == '.') {
2603	if (ctxt->atom == NULL) {
2604	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
2605	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
2606	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2607			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
2608	}
2609	NEXT;
2610	return;
2611    }
2612    if (CUR != '\\') {
2613	ERROR("Escaped sequence: expecting \\");
2614	return;
2615    }
2616    NEXT;
2617    cur = CUR;
2618    if (cur == 'p') {
2619	NEXT;
2620	if (CUR != '{') {
2621	    ERROR("Expecting '{'");
2622	    return;
2623	}
2624	NEXT;
2625	xmlFAParseCharProp(ctxt);
2626	if (CUR != '}') {
2627	    ERROR("Expecting '}'");
2628	    return;
2629	}
2630	NEXT;
2631    } else if (cur == 'P') {
2632	NEXT;
2633	if (CUR != '{') {
2634	    ERROR("Expecting '{'");
2635	    return;
2636	}
2637	NEXT;
2638	xmlFAParseCharProp(ctxt);
2639	ctxt->atom->neg = 1;
2640	if (CUR != '}') {
2641	    ERROR("Expecting '}'");
2642	    return;
2643	}
2644	NEXT;
2645    } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
2646	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
2647	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
2648	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
2649	(cur == 0x5E)) {
2650	if (ctxt->atom == NULL) {
2651	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
2652	    if (ctxt->atom != NULL)
2653		ctxt->atom->codepoint = cur;
2654	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
2655	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2656			       XML_REGEXP_CHARVAL, cur, cur, NULL);
2657	}
2658	NEXT;
2659    } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
2660	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
2661	(cur == 'w') || (cur == 'W')) {
2662	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
2663
2664	switch (cur) {
2665	    case 's':
2666		type = XML_REGEXP_ANYSPACE;
2667		break;
2668	    case 'S':
2669		type = XML_REGEXP_NOTSPACE;
2670		break;
2671	    case 'i':
2672		type = XML_REGEXP_INITNAME;
2673		break;
2674	    case 'I':
2675		type = XML_REGEXP_NOTINITNAME;
2676		break;
2677	    case 'c':
2678		type = XML_REGEXP_NAMECHAR;
2679		break;
2680	    case 'C':
2681		type = XML_REGEXP_NOTNAMECHAR;
2682		break;
2683	    case 'd':
2684		type = XML_REGEXP_DECIMAL;
2685		break;
2686	    case 'D':
2687		type = XML_REGEXP_NOTDECIMAL;
2688		break;
2689	    case 'w':
2690		type = XML_REGEXP_REALCHAR;
2691		break;
2692	    case 'W':
2693		type = XML_REGEXP_NOTREALCHAR;
2694		break;
2695	}
2696	NEXT;
2697	if (ctxt->atom == NULL) {
2698	    ctxt->atom = xmlRegNewAtom(ctxt, type);
2699	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
2700	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2701			       type, 0, 0, NULL);
2702	}
2703    }
2704}
2705
2706/**
2707 * xmlFAParseCharRef:
2708 * ctxt:  a regexp parser context
2709 *
2710 * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
2711 */
2712static int
2713xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
2714    int ret = 0, cur;
2715
2716    if ((CUR != '&') || (NXT(1) != '#'))
2717	return(-1);
2718    NEXT;
2719    NEXT;
2720    cur = CUR;
2721    if (cur == 'x') {
2722	NEXT;
2723	cur = CUR;
2724	if (((cur >= '0') && (cur <= '9')) ||
2725	    ((cur >= 'a') && (cur <= 'f')) ||
2726	    ((cur >= 'A') && (cur <= 'F'))) {
2727	    while (((cur >= '0') && (cur <= '9')) ||
2728		   ((cur >= 'A') && (cur <= 'F'))) {
2729		if ((cur >= '0') && (cur <= '9'))
2730		    ret = ret * 16 + cur - '0';
2731		else if ((cur >= 'a') && (cur <= 'f'))
2732		    ret = ret * 16 + 10 + (cur - 'a');
2733		else
2734		    ret = ret * 16 + 10 + (cur - 'A');
2735		NEXT;
2736		cur = CUR;
2737	    }
2738	} else {
2739	    ERROR("Char ref: expecting [0-9A-F]");
2740	    return(-1);
2741	}
2742    } else {
2743	if ((cur >= '0') && (cur <= '9')) {
2744	    while ((cur >= '0') && (cur <= '9')) {
2745		ret = ret * 10 + cur - '0';
2746		NEXT;
2747		cur = CUR;
2748	    }
2749	} else {
2750	    ERROR("Char ref: expecting [0-9]");
2751	    return(-1);
2752	}
2753    }
2754    if (cur != ';') {
2755	ERROR("Char ref: expecting ';'");
2756	return(-1);
2757    } else {
2758	NEXT;
2759    }
2760    return(ret);
2761}
2762
2763/**
2764 * xmlFAParseCharRange:
2765 * ctxt:  a regexp parser context
2766 *
2767 * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
2768 * [18]   seRange   ::=   charOrEsc '-' charOrEsc
2769 * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
2770 * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
2771 * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
2772 */
2773static void
2774xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
2775    int cur;
2776    int start = -1;
2777    int end = -1;
2778
2779    if ((CUR == '&') && (NXT(1) == '#')) {
2780	end = start = xmlFAParseCharRef(ctxt);
2781        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2782	                   XML_REGEXP_CHARVAL, start, end, NULL);
2783	return;
2784    }
2785    cur = CUR;
2786    if (cur == '\\') {
2787	NEXT;
2788	cur = CUR;
2789	switch (cur) {
2790	    case 'n': start = 0xA; break;
2791	    case 'r': start = 0xD; break;
2792	    case 't': start = 0x9; break;
2793	    case '\\': case '|': case '.': case '-': case '^': case '?':
2794	    case '*': case '+': case '{': case '}': case '(': case ')':
2795	    case '[': case ']':
2796		start = cur; break;
2797	    default:
2798		ERROR("Invalid escape value");
2799		return;
2800	}
2801	end = start;
2802    } else if ((cur != 0x5B) && (cur != 0x5D)) {
2803	end = start = cur;
2804    } else {
2805	ERROR("Expecting a char range");
2806	return;
2807    }
2808    NEXT;
2809    if (start == '-') {
2810	return;
2811    }
2812    cur = CUR;
2813    if (cur != '-') {
2814        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2815		              XML_REGEXP_CHARVAL, start, end, NULL);
2816	return;
2817    }
2818    NEXT;
2819    cur = CUR;
2820    if (cur == '\\') {
2821	NEXT;
2822	cur = CUR;
2823	switch (cur) {
2824	    case 'n': end = 0xA; break;
2825	    case 'r': end = 0xD; break;
2826	    case 't': end = 0x9; break;
2827	    case '\\': case '|': case '.': case '-': case '^': case '?':
2828	    case '*': case '+': case '{': case '}': case '(': case ')':
2829	    case '[': case ']':
2830		end = cur; break;
2831	    default:
2832		ERROR("Invalid escape value");
2833		return;
2834	}
2835    } else if ((cur != 0x5B) && (cur != 0x5D)) {
2836	end = cur;
2837    } else {
2838	ERROR("Expecting the end of a char range");
2839	return;
2840    }
2841    NEXT;
2842    /* TODO check that the values are acceptable character ranges for XML */
2843    if (end < start) {
2844	ERROR("End of range is before start of range");
2845    } else {
2846        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
2847		           XML_REGEXP_CHARVAL, start, end, NULL);
2848    }
2849    return;
2850}
2851
2852/**
2853 * xmlFAParsePosCharGroup:
2854 * ctxt:  a regexp parser context
2855 *
2856 * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
2857 */
2858static void
2859xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
2860    do {
2861	if ((CUR == '\\') || (CUR == '.')) {
2862	    xmlFAParseCharClassEsc(ctxt);
2863	} else {
2864	    xmlFAParseCharRange(ctxt);
2865	}
2866    } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
2867	     (ctxt->error == 0));
2868}
2869
2870/**
2871 * xmlFAParseCharGroup:
2872 * ctxt:  a regexp parser context
2873 *
2874 * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
2875 * [15]   negCharGroup ::= '^' posCharGroup
2876 * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
2877 * [12]   charClassExpr ::= '[' charGroup ']'
2878 */
2879static void
2880xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
2881    int n = ctxt->neg;
2882    while ((CUR != ']') && (ctxt->error == 0)) {
2883	if (CUR == '^') {
2884	    int neg = ctxt->neg;
2885
2886	    NEXT;
2887	    ctxt->neg = !ctxt->neg;
2888	    xmlFAParsePosCharGroup(ctxt);
2889	    ctxt->neg = neg;
2890	} else if (CUR == '-') {
2891	    NEXT;
2892	    ctxt->neg = !ctxt->neg;
2893	    if (CUR != '[') {
2894		ERROR("charClassExpr: '[' expected");
2895		break;
2896	    }
2897	    NEXT;
2898	    xmlFAParseCharGroup(ctxt);
2899	    if (CUR == ']') {
2900		NEXT;
2901	    } else {
2902		ERROR("charClassExpr: ']' expected");
2903		break;
2904	    }
2905	    break;
2906	} else if (CUR != ']') {
2907	    xmlFAParsePosCharGroup(ctxt);
2908	}
2909    }
2910    ctxt->neg = n;
2911}
2912
2913/**
2914 * xmlFAParseCharClass:
2915 * ctxt:  a regexp parser context
2916 *
2917 * [11]   charClass   ::=     charClassEsc | charClassExpr
2918 * [12]   charClassExpr   ::=   '[' charGroup ']'
2919 */
2920static void
2921xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
2922    if (CUR == '[') {
2923	NEXT;
2924	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
2925	if (ctxt->atom == NULL)
2926	    return;
2927	xmlFAParseCharGroup(ctxt);
2928	if (CUR == ']') {
2929	    NEXT;
2930	} else {
2931	    ERROR("xmlFAParseCharClass: ']' expected");
2932	}
2933    } else {
2934	xmlFAParseCharClassEsc(ctxt);
2935    }
2936}
2937
2938/**
2939 * xmlFAParseQuantExact:
2940 * ctxt:  a regexp parser context
2941 *
2942 * [8]   QuantExact   ::=   [0-9]+
2943 */
2944static int
2945xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
2946    int ret = 0;
2947    int ok = 0;
2948
2949    while ((CUR >= '0') && (CUR <= '9')) {
2950	ret = ret * 10 + (CUR - '0');
2951	ok = 1;
2952	NEXT;
2953    }
2954    if (ok != 1) {
2955	return(-1);
2956    }
2957    return(ret);
2958}
2959
2960/**
2961 * xmlFAParseQuantifier:
2962 * ctxt:  a regexp parser context
2963 *
2964 * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
2965 * [5]   quantity   ::=   quantRange | quantMin | QuantExact
2966 * [6]   quantRange   ::=   QuantExact ',' QuantExact
2967 * [7]   quantMin   ::=   QuantExact ','
2968 * [8]   QuantExact   ::=   [0-9]+
2969 */
2970static int
2971xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
2972    int cur;
2973
2974    cur = CUR;
2975    if ((cur == '?') || (cur == '*') || (cur == '+')) {
2976	if (ctxt->atom != NULL) {
2977	    if (cur == '?')
2978		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
2979	    else if (cur == '*')
2980		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
2981	    else if (cur == '+')
2982		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
2983	}
2984	NEXT;
2985	return(1);
2986    }
2987    if (cur == '{') {
2988	int min = 0, max = 0;
2989
2990	NEXT;
2991	cur = xmlFAParseQuantExact(ctxt);
2992	if (cur >= 0)
2993	    min = cur;
2994	if (CUR == ',') {
2995	    NEXT;
2996	    cur = xmlFAParseQuantExact(ctxt);
2997	    if (cur >= 0)
2998		max = cur;
2999	}
3000	if (CUR == '}') {
3001	    NEXT;
3002	} else {
3003	    ERROR("Unterminated quantifier");
3004	}
3005	if (max == 0)
3006	    max = min;
3007	if (ctxt->atom != NULL) {
3008	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
3009	    ctxt->atom->min = min;
3010	    ctxt->atom->max = max;
3011	}
3012	return(1);
3013    }
3014    return(0);
3015}
3016
3017/**
3018 * xmlFAParseAtom:
3019 * ctxt:  a regexp parser context
3020 *
3021 * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
3022 */
3023static int
3024xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
3025    int codepoint, len;
3026
3027    codepoint = xmlFAIsChar(ctxt);
3028    if (codepoint > 0) {
3029	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
3030	if (ctxt->atom == NULL)
3031	    return(-1);
3032	codepoint = CUR_SCHAR(ctxt->cur, len);
3033	ctxt->atom->codepoint = codepoint;
3034	NEXTL(len);
3035	return(1);
3036    } else if (CUR == '|') {
3037	return(0);
3038    } else if (CUR == 0) {
3039	return(0);
3040    } else if (CUR == ')') {
3041	return(0);
3042    } else if (CUR == '(') {
3043	xmlRegStatePtr start, oldend;
3044
3045	NEXT;
3046	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
3047	start = ctxt->state;
3048	oldend = ctxt->end;
3049	ctxt->end = NULL;
3050	ctxt->atom = NULL;
3051	xmlFAParseRegExp(ctxt, 0);
3052	if (CUR == ')') {
3053	    NEXT;
3054	} else {
3055	    ERROR("xmlFAParseAtom: expecting ')'");
3056	}
3057	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
3058	if (ctxt->atom == NULL)
3059	    return(-1);
3060	ctxt->atom->start = start;
3061	ctxt->atom->stop = ctxt->state;
3062	ctxt->end = oldend;
3063	return(1);
3064    } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
3065	xmlFAParseCharClass(ctxt);
3066	return(1);
3067    }
3068    return(0);
3069}
3070
3071/**
3072 * xmlFAParsePiece:
3073 * ctxt:  a regexp parser context
3074 *
3075 * [3]   piece   ::=   atom quantifier?
3076 */
3077static int
3078xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
3079    int ret;
3080
3081    ctxt->atom = NULL;
3082    ret = xmlFAParseAtom(ctxt);
3083    if (ret == 0)
3084	return(0);
3085    if (ctxt->atom == NULL) {
3086	ERROR("internal: no atom generated");
3087    }
3088    xmlFAParseQuantifier(ctxt);
3089    return(1);
3090}
3091
3092/**
3093 * xmlFAParseBranch:
3094 * ctxt:  a regexp parser context
3095 * first:  is taht the first
3096 *
3097 * [2]   branch   ::=   piece*
3098 */
3099static void
3100xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) {
3101    xmlRegStatePtr previous;
3102    xmlRegAtomPtr prevatom = NULL;
3103    int ret;
3104
3105    previous = ctxt->state;
3106    ret = xmlFAParsePiece(ctxt);
3107    if (ret != 0) {
3108	if (first) {
3109	    xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3110	    previous = ctxt->state;
3111	} else {
3112	    prevatom = ctxt->atom;
3113	}
3114	ctxt->atom = NULL;
3115    }
3116    while ((ret != 0) && (ctxt->error == 0)) {
3117	ret = xmlFAParsePiece(ctxt);
3118	if (ret != 0) {
3119	    if (first) {
3120		xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
3121	    } else {
3122		xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom);
3123		prevatom = ctxt->atom;
3124	    }
3125	    previous = ctxt->state;
3126	    ctxt->atom = NULL;
3127	}
3128    }
3129    if (!first) {
3130	xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom);
3131    }
3132}
3133
3134/**
3135 * xmlFAParseRegExp:
3136 * ctxt:  a regexp parser context
3137 * top:  is that the top-level expressions ?
3138 *
3139 * [1]   regExp   ::=     branch  ( '|' branch )*
3140 */
3141static void
3142xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
3143    xmlRegStatePtr start, end, oldend;
3144
3145    oldend = ctxt->end;
3146
3147    start = ctxt->state;
3148    xmlFAParseBranch(ctxt, (ctxt->end == NULL));
3149    if (CUR != '|') {
3150	ctxt->end = ctxt->state;
3151	return;
3152    }
3153    end = ctxt->state;
3154    while ((CUR == '|') && (ctxt->error == 0)) {
3155	NEXT;
3156	ctxt->state = start;
3157	ctxt->end = end;
3158	xmlFAParseBranch(ctxt, 0);
3159    }
3160    if (!top)
3161	ctxt->end = oldend;
3162}
3163
3164/************************************************************************
3165 * 									*
3166 * 			The basic API					*
3167 * 									*
3168 ************************************************************************/
3169
3170/**
3171 * xmlRegexpPrint:
3172 * @output: the file for the output debug
3173 * @regexp: the compiled regexp
3174 *
3175 * Print the content of the compiled regular expression
3176 */
3177void
3178xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
3179    int i;
3180
3181    fprintf(output, " regexp: ");
3182    if (regexp == NULL) {
3183	fprintf(output, "NULL\n");
3184	return;
3185    }
3186    fprintf(output, "'%s' ", regexp->string);
3187    fprintf(output, "\n");
3188    fprintf(output, "%d atoms:\n", regexp->nbAtoms);
3189    for (i = 0;i < regexp->nbAtoms; i++) {
3190	fprintf(output, " %02d ", i);
3191	xmlRegPrintAtom(output, regexp->atoms[i]);
3192    }
3193    fprintf(output, "%d states:", regexp->nbStates);
3194    fprintf(output, "\n");
3195    for (i = 0;i < regexp->nbStates; i++) {
3196	xmlRegPrintState(output, regexp->states[i]);
3197    }
3198    fprintf(output, "%d counters:\n", regexp->nbCounters);
3199    for (i = 0;i < regexp->nbCounters; i++) {
3200	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
3201		                                regexp->counters[i].max);
3202    }
3203}
3204
3205/**
3206 * xmlRegexpCompile:
3207 * @regexp:  a regular expression string
3208 *
3209 * Parses a regular expression conforming to XML Schemas Part 2 Datatype
3210 * Appendix F and build an automata suitable for testing strings against
3211 * that regular expression
3212 *
3213 * Returns the compiled expression or NULL in case of error
3214 */
3215xmlRegexpPtr
3216xmlRegexpCompile(const xmlChar *regexp) {
3217    xmlRegexpPtr ret;
3218    xmlRegParserCtxtPtr ctxt;
3219
3220    ctxt = xmlRegNewParserCtxt(regexp);
3221    if (ctxt == NULL)
3222	return(NULL);
3223
3224    /* initialize the parser */
3225    ctxt->end = NULL;
3226    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3227    xmlRegStatePush(ctxt, ctxt->start);
3228
3229    /* parse the expression building an automata */
3230    xmlFAParseRegExp(ctxt, 1);
3231    if (CUR != 0) {
3232	ERROR("xmlFAParseRegExp: extra characters");
3233    }
3234    ctxt->end = ctxt->state;
3235    ctxt->start->type = XML_REGEXP_START_STATE;
3236    ctxt->end->type = XML_REGEXP_FINAL_STATE;
3237
3238    /* remove the Epsilon except for counted transitions */
3239    xmlFAEliminateEpsilonTransitions(ctxt);
3240
3241
3242    if (ctxt->error != 0) {
3243	xmlRegFreeParserCtxt(ctxt);
3244	return(NULL);
3245    }
3246    ret = xmlRegEpxFromParse(ctxt);
3247    xmlRegFreeParserCtxt(ctxt);
3248    return(ret);
3249}
3250
3251/**
3252 * xmlRegexpExec:
3253 * @comp:  the compiled regular expression
3254 * @content:  the value to check against the regular expression
3255 *
3256 * Check if the regular expression generate the value
3257 *
3258 * Returns 1 if it matches, 0 if not and a negativa value in case of error
3259 */
3260int
3261xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
3262    if ((comp == NULL) || (content == NULL))
3263	return(-1);
3264    return(xmlFARegExec(comp, content));
3265}
3266
3267/**
3268 * xmlRegFreeRegexp:
3269 * @regexp:  the regexp
3270 *
3271 * Free a regexp
3272 */
3273void
3274xmlRegFreeRegexp(xmlRegexpPtr regexp) {
3275    int i;
3276    if (regexp == NULL)
3277	return;
3278
3279    if (regexp->string != NULL)
3280	xmlFree(regexp->string);
3281    if (regexp->states != NULL) {
3282	for (i = 0;i < regexp->nbStates;i++)
3283	    xmlRegFreeState(regexp->states[i]);
3284	xmlFree(regexp->states);
3285    }
3286    if (regexp->atoms != NULL) {
3287	for (i = 0;i < regexp->nbAtoms;i++)
3288	    xmlRegFreeAtom(regexp->atoms[i]);
3289	xmlFree(regexp->atoms);
3290    }
3291    if (regexp->counters != NULL)
3292	xmlFree(regexp->counters);
3293    xmlFree(regexp);
3294}
3295
3296#ifdef LIBXML_AUTOMATA_ENABLED
3297/************************************************************************
3298 * 									*
3299 * 			The Automata interface				*
3300 * 									*
3301 ************************************************************************/
3302
3303/**
3304 * xmlNewAutomata:
3305 *
3306 * Create a new automata
3307 *
3308 * Returns the new object or NULL in case of failure
3309 */
3310xmlAutomataPtr
3311xmlNewAutomata(void) {
3312    xmlAutomataPtr ctxt;
3313
3314    ctxt = xmlRegNewParserCtxt(NULL);
3315    if (ctxt == NULL)
3316	return(NULL);
3317
3318    /* initialize the parser */
3319    ctxt->end = NULL;
3320    ctxt->start = ctxt->state = xmlRegNewState(ctxt);
3321    xmlRegStatePush(ctxt, ctxt->start);
3322
3323    return(ctxt);
3324}
3325
3326/**
3327 * xmlFreeAutomata:
3328 * @am: an automata
3329 *
3330 * Free an automata
3331 */
3332void
3333xmlFreeAutomata(xmlAutomataPtr am) {
3334    if (am == NULL)
3335	return;
3336    xmlRegFreeParserCtxt(am);
3337}
3338
3339/**
3340 * xmlAutomataGetInitState:
3341 * @am: an automata
3342 *
3343 * Returns the initial state of the automata
3344 */
3345xmlAutomataStatePtr
3346xmlAutomataGetInitState(xmlAutomataPtr am) {
3347    if (am == NULL)
3348	return(NULL);
3349    return(am->start);
3350}
3351
3352/**
3353 * xmlAutomataSetFinalState:
3354 * @am: an automata
3355 * @state: a state in this automata
3356 *
3357 * Makes that state a final state
3358 *
3359 * Returns 0 or -1 in case of error
3360 */
3361int
3362xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
3363    if ((am == NULL) || (state == NULL))
3364	return(-1);
3365    state->type = XML_REGEXP_FINAL_STATE;
3366    return(0);
3367}
3368
3369/**
3370 * xmlAutomataNewTransition:
3371 * @am: an automata
3372 * @from: the starting point of the transition
3373 * @to: the target point of the transition or NULL
3374 * @token: the input string associated to that transition
3375 * @data: data passed to the callback function if the transition is activated
3376 *
3377 * If @to is NULL, this create first a new target state in the automata
3378 * and then adds a transition from the @from state to the target state
3379 * activated by the value of @token
3380 *
3381 * Returns the target state or NULL in case of error
3382 */
3383xmlAutomataStatePtr
3384xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
3385			 xmlAutomataStatePtr to, const xmlChar *token,
3386			 void *data) {
3387    xmlRegAtomPtr atom;
3388
3389    if ((am == NULL) || (from == NULL) || (token == NULL))
3390	return(NULL);
3391    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
3392    atom->data = data;
3393    if (atom == NULL)
3394	return(NULL);
3395    atom->valuep = xmlStrdup(token);
3396
3397    xmlFAGenerateTransitions(am, from, to, atom);
3398    if (to == NULL)
3399	return(am->state);
3400    return(to);
3401}
3402
3403/**
3404 * xmlAutomataNewCountTrans:
3405 * @am: an automata
3406 * @from: the starting point of the transition
3407 * @to: the target point of the transition or NULL
3408 * @token: the input string associated to that transition
3409 * @min:  the minimum successive occurences of token
3410 * @min:  the maximum successive occurences of token
3411 *
3412 * If @to is NULL, this create first a new target state in the automata
3413 * and then adds a transition from the @from state to the target state
3414 * activated by a succession of input of value @token and whose number
3415 * is between @min and @max
3416 *
3417 * Returns the target state or NULL in case of error
3418 */
3419xmlAutomataStatePtr
3420xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
3421			 xmlAutomataStatePtr to, const xmlChar *token,
3422			 int min, int max, void *data) {
3423    xmlRegAtomPtr atom;
3424
3425    if ((am == NULL) || (from == NULL) || (token == NULL))
3426	return(NULL);
3427    if (min < 0)
3428	return(NULL);
3429    if ((max < min) || (max < 1))
3430	return(NULL);
3431    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
3432    if (atom == NULL)
3433	return(NULL);
3434    atom->valuep = xmlStrdup(token);
3435    atom->data = data;
3436    if (min == 0)
3437	atom->min = 1;
3438    else
3439	atom->min = min;
3440    atom->max = max;
3441
3442    xmlFAGenerateTransitions(am, from, to, atom);
3443    if (to == NULL)
3444	to = am->state;
3445    if (to == NULL)
3446	return(NULL);
3447    if (min == 0)
3448	xmlFAGenerateEpsilonTransition(am, from, to);
3449    return(to);
3450}
3451
3452/**
3453 * xmlAutomataNewOnceTrans:
3454 * @am: an automata
3455 * @from: the starting point of the transition
3456 * @to: the target point of the transition or NULL
3457 * @token: the input string associated to that transition
3458 * @min:  the minimum successive occurences of token
3459 * @min:  the maximum successive occurences of token
3460 *
3461 * If @to is NULL, this create first a new target state in the automata
3462 * and then adds a transition from the @from state to the target state
3463 * activated by a succession of input of value @token and whose number
3464 * is between @min and @max, moreover that transistion can only be crossed
3465 * once.
3466 *
3467 * Returns the target state or NULL in case of error
3468 */
3469xmlAutomataStatePtr
3470xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
3471			 xmlAutomataStatePtr to, const xmlChar *token,
3472			 int min, int max, void *data) {
3473    xmlRegAtomPtr atom;
3474    int counter;
3475
3476    if ((am == NULL) || (from == NULL) || (token == NULL))
3477	return(NULL);
3478    if (min < 1)
3479	return(NULL);
3480    if ((max < min) || (max < 1))
3481	return(NULL);
3482    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
3483    if (atom == NULL)
3484	return(NULL);
3485    atom->valuep = xmlStrdup(token);
3486    atom->data = data;
3487    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
3488    if (min == 0)
3489	atom->min = 1;
3490    else
3491	atom->min = min;
3492    atom->max = max;
3493    /*
3494     * associate a counter to the transition.
3495     */
3496    counter = xmlRegGetCounter(am);
3497    am->counters[counter].min = 1;
3498    am->counters[counter].max = 1;
3499
3500    /* xmlFAGenerateTransitions(am, from, to, atom); */
3501    if (to == NULL) {
3502	to = xmlRegNewState(am);
3503	xmlRegStatePush(am, to);
3504    }
3505    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
3506    xmlRegAtomPush(am, atom);
3507    am->state = to;
3508    if (to == NULL)
3509	to = am->state;
3510    if (to == NULL)
3511	return(NULL);
3512    return(to);
3513}
3514
3515/**
3516 * xmlAutomataNewState:
3517 * @am: an automata
3518 *
3519 * Create a new disconnected state in the automata
3520 *
3521 * Returns the new state or NULL in case of error
3522 */
3523xmlAutomataStatePtr
3524xmlAutomataNewState(xmlAutomataPtr am) {
3525    xmlAutomataStatePtr to;
3526
3527    if (am == NULL)
3528	return(NULL);
3529    to = xmlRegNewState(am);
3530    xmlRegStatePush(am, to);
3531    return(to);
3532}
3533
3534/**
3535 * xmlAutomataNewTransition:
3536 * @am: an automata
3537 * @from: the starting point of the transition
3538 * @to: the target point of the transition or NULL
3539 *
3540 * If @to is NULL, this create first a new target state in the automata
3541 * and then adds a an epsilon transition from the @from state to the
3542 * target state
3543 *
3544 * Returns the target state or NULL in case of error
3545 */
3546xmlAutomataStatePtr
3547xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
3548		      xmlAutomataStatePtr to) {
3549    if ((am == NULL) || (from == NULL))
3550	return(NULL);
3551    xmlFAGenerateEpsilonTransition(am, from, to);
3552    if (to == NULL)
3553	return(am->state);
3554    return(to);
3555}
3556
3557/**
3558 * xmlAutomataNewAllTrans:
3559 * @am: an automata
3560 * @from: the starting point of the transition
3561 * @to: the target point of the transition or NULL
3562 *
3563 * If @to is NULL, this create first a new target state in the automata
3564 * and then adds a an ALL transition from the @from state to the
3565 * target state. That transition is an epsilon transition allowed only when
3566 * all transitions from the @from node have been activated.
3567 *
3568 * Returns the target state or NULL in case of error
3569 */
3570xmlAutomataStatePtr
3571xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
3572		      xmlAutomataStatePtr to) {
3573    if ((am == NULL) || (from == NULL))
3574	return(NULL);
3575    xmlFAGenerateAllTransition(am, from, to);
3576    if (to == NULL)
3577	return(am->state);
3578    return(to);
3579}
3580
3581/**
3582 * xmlAutomataNewCounter:
3583 * @am: an automata
3584 * @min:  the minimal value on the counter
3585 * @max:  the maximal value on the counter
3586 *
3587 * Create a new counter
3588 *
3589 * Returns the counter number or -1 in case of error
3590 */
3591int
3592xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
3593    int ret;
3594
3595    if (am == NULL)
3596	return(-1);
3597
3598    ret = xmlRegGetCounter(am);
3599    if (ret < 0)
3600	return(-1);
3601    am->counters[ret].min = min;
3602    am->counters[ret].max = max;
3603    return(ret);
3604}
3605
3606/**
3607 * xmlAutomataNewCountedTrans:
3608 * @am: an automata
3609 * @from: the starting point of the transition
3610 * @to: the target point of the transition or NULL
3611 * @counter: the counter associated to that transition
3612 *
3613 * If @to is NULL, this create first a new target state in the automata
3614 * and then adds an epsilon transition from the @from state to the target state
3615 * which will increment the counter provided
3616 *
3617 * Returns the target state or NULL in case of error
3618 */
3619xmlAutomataStatePtr
3620xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
3621		xmlAutomataStatePtr to, int counter) {
3622    if ((am == NULL) || (from == NULL) || (counter < 0))
3623	return(NULL);
3624    xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
3625    if (to == NULL)
3626	return(am->state);
3627    return(to);
3628}
3629
3630/**
3631 * xmlAutomataNewCounterTrans:
3632 * @am: an automata
3633 * @from: the starting point of the transition
3634 * @to: the target point of the transition or NULL
3635 * @counter: the counter associated to that transition
3636 *
3637 * If @to is NULL, this create first a new target state in the automata
3638 * and then adds an epsilon transition from the @from state to the target state
3639 * which will be allowed only if the counter is within the right range.
3640 *
3641 * Returns the target state or NULL in case of error
3642 */
3643xmlAutomataStatePtr
3644xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
3645		xmlAutomataStatePtr to, int counter) {
3646    if ((am == NULL) || (from == NULL) || (counter < 0))
3647	return(NULL);
3648    xmlFAGenerateCountedTransition(am, from, to, counter);
3649    if (to == NULL)
3650	return(am->state);
3651    return(to);
3652}
3653
3654/**
3655 * xmlAutomataCompile:
3656 * @am: an automata
3657 *
3658 * Compile the automata into a Reg Exp ready for being executed.
3659 * The automata should be free after this point.
3660 *
3661 * Returns the compiled regexp or NULL in case of error
3662 */
3663xmlRegexpPtr
3664xmlAutomataCompile(xmlAutomataPtr am) {
3665    xmlRegexpPtr ret;
3666
3667    xmlFAEliminateEpsilonTransitions(am);
3668    ret = xmlRegEpxFromParse(am);
3669
3670    return(ret);
3671}
3672#endif /* LIBXML_AUTOMATA_ENABLED */
3673#endif /* LIBXML_REGEXP_ENABLED */
3674