xmlregexp.c revision 6e65e15777ebb281aec362fa2aba51e2cb5aa87f
14255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/* 24255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * regexp.c: generic and extensible Regular Expression engine 34255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 44255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Basically designed with the purpose of compiling regexps for 54255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * the variety of validation/shemas mechanisms now available in 6ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * XML related specifications these include: 74255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * - XML-1.0 DTD validation 84255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * - XML Schemas structure part 1 94255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * - XML Schemas Datatypes part 2 especially Appendix F 104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * - RELAX-NG/TREX i.e. the counter proposal 114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * See Copyright for the status of this software. 134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Daniel Veillard <veillard@redhat.com> 154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define IN_LIBXML 184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include "libxml.h" 194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef LIBXML_REGEXP_ENABLED 214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 22cee2b3a5f124e19db46109132c22e1b8faec1c87Daniel Veillard/* #define DEBUG_ERR */ 23fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <stdio.h> 254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <string.h> 26ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#ifdef HAVE_LIMITS_H 27ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#include <limits.h> 28ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#endif 29ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard 304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <libxml/tree.h> 314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <libxml/parserInternals.h> 324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <libxml/xmlregexp.h> 334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <libxml/xmlautomata.h> 344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#include <libxml/xmlunicode.h> 354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#ifndef INT_MAX 37ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#define INT_MAX 123456789 /* easy to flag and big enough for our needs */ 38ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard#endif 39ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard 40c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard/* #define DEBUG_REGEXP_GRAPH */ 4110752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard/* #define DEBUG_REGEXP_EXEC */ 424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/* #define DEBUG_PUSH */ 4323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard/* #define DEBUG_COMPACTION */ 444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard#define ERROR(str) \ 46ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard ctxt->error = XML_REGEXP_COMPILE_ERROR; \ 47ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrCompile(ctxt, str); 484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define NEXT ctxt->cur++ 494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define CUR (*(ctxt->cur)) 504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define NXT(index) (ctxt->cur[index]) 514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) 534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#define NEXTL(l) ctxt->cur += l; 54c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard#define XML_REG_STRING_SEPARATOR '|' 554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 56e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard/** 57e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * TODO: 58e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 59e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * macro to flag unimplemented blocks 60e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 61e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard#define TODO \ 62e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard xmlGenericError(xmlGenericErrorContext, \ 63e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard "Unimplemented block at %s:%d\n", \ 64e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard __FILE__, __LINE__); 65e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Datatypes and structures * 694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef enum { 734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_EPSILON = 1, 744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_CHARVAL, 754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_RANGES, 764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SUBREG, 774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_STRING, 784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_ANYCHAR, /* . */ 794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_ANYSPACE, /* \s */ 804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NOTSPACE, /* \S */ 814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_INITNAME, /* \l */ 824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NOTINITNAME, /* \l */ 834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NAMECHAR, /* \c */ 844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NOTNAMECHAR, /* \C */ 854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_DECIMAL, /* \d */ 864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NOTDECIMAL, /* \d */ 874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_REALCHAR, /* \w */ 884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NOTREALCHAR, /* \w */ 894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER, 904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER_UPPERCASE, 914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER_LOWERCASE, 924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER_TITLECASE, 934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER_MODIFIER, 944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_LETTER_OTHERS, 954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK, 964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_NONSPACING, 974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_SPACECOMBINING, 984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_ENCLOSING, 994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NUMBER, 1004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NUMBER_DECIMAL, 1014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NUMBER_LETTER, 1024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_NUMBER_OTHERS, 1034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT, 1044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_CONNECTOR, 1054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_DASH, 1064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_OPEN, 1074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_CLOSE, 1084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_INITQUOTE, 1094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_FINQUOTE, 1104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_PUNCT_OTHERS, 1114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SEPAR, 1124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SEPAR_SPACE, 1134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SEPAR_LINE, 1144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SEPAR_PARA, 1154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SYMBOL, 1164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SYMBOL_MATH, 1174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SYMBOL_CURRENCY, 1184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SYMBOL_MODIFIER, 1194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_SYMBOL_OTHERS, 1204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_OTHER, 1214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_OTHER_CONTROL, 1224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_OTHER_FORMAT, 1234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_OTHER_PRIVATE, 1244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_OTHER_NA, 1254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_BLOCK_NAME 1264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} xmlRegAtomType; 1274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef enum { 1294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_EPSILON = 1, 1304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_ONCE, 1314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_OPT, 1324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_MULT, 1334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_PLUS, 1347646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard XML_REGEXP_QUANT_ONCEONLY, 1357646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard XML_REGEXP_QUANT_ALL, 1364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_QUANT_RANGE 1374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} xmlRegQuantType; 1384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef enum { 1404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_START_STATE = 1, 1414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_FINAL_STATE, 142cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_TRANS_STATE, 143cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE 1444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} xmlRegStateType; 1454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef enum { 1474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_NORMAL = 0, 1484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_START, 1494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_MARK_VISITED 1504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} xmlRegMarkedType; 1514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegRange xmlRegRange; 1534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegRange *xmlRegRangePtr; 1544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegRange { 156f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard int neg; /* 0 normal, 1 not, 2 exclude */ 1574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomType type; 1584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int start; 1594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int end; 1604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *blockName; 1614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 1624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegAtom xmlRegAtom; 1644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegAtom *xmlRegAtomPtr; 1654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlAutomataState xmlRegState; 1674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegState *xmlRegStatePtr; 1684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegAtom { 1704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int no; 1714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomType type; 1724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegQuantType quant; 1734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int min; 1744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int max; 1754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *valuep; 177a646cfdb14097f72d3b0ce9b0f43126934d8efd3Daniel Veillard void *valuep2; 1784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int neg; 1794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int codepoint; 1804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr start; 1814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr stop; 1824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxRanges; 1834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbRanges; 1844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegRangePtr *ranges; 1854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *data; 1864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 1874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegCounter xmlRegCounter; 1894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegCounter *xmlRegCounterPtr; 1904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegCounter { 1924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int min; 1934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int max; 1944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 1954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegTrans xmlRegTrans; 1974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegTrans *xmlRegTransPtr; 1984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegTrans { 2004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 2014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int to; 2024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int counter; 2034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int count; 2044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlAutomataState { 2074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStateType type; 2084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegMarkedType mark; 20923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegMarkedType reached; 2104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int no; 2114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxTrans; 2124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbTrans; 2134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegTrans *trans; 214db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard /* knowing states ponting to us can speed things up */ 215db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int maxTransTo; 216db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int nbTransTo; 217db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int *transTo; 2184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlAutomata xmlRegParserCtxt; 2214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegParserCtxt *xmlRegParserCtxtPtr; 2224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlAutomata { 2244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *string; 2254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *cur; 2264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int error; 2284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int neg; 2294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr start; 2314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr end; 2324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state; 2334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 2354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxAtoms; 2374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbAtoms; 2384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr *atoms; 2394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxStates; 2414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbStates; 2424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr *states; 2434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxCounters; 2454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbCounters; 2464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounter *counters; 247e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 248e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int determinist; 2496e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard int negs; 2504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegexp { 2534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *string; 2544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbStates; 2554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr *states; 2564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbAtoms; 2574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr *atoms; 2584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbCounters; 2594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounter *counters; 260e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int determinist; 26123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 26223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * That's the compact form for determinists automatas 26323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 26423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int nbstates; 26523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int *compact; 266118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard void **transdata; 26723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int nbstrings; 26823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlChar **stringMap; 2694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegExecRollback xmlRegExecRollback; 2724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegExecRollback *xmlRegExecRollbackPtr; 2734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegExecRollback { 2754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state;/* the current state */ 2764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int index; /* the index in the input stack */ 2774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nextbranch; /* the next transition to explore in that state */ 278ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack int *counts; /* save the automata state if it has some */ 2794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef struct _xmlRegInputToken xmlRegInputToken; 2824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardtypedef xmlRegInputToken *xmlRegInputTokenPtr; 2834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegInputToken { 2854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *value; 2864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *data; 2874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 2884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstruct _xmlRegExecCtxt { 2904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int status; /* execution status != 0 indicate an error */ 291ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack int determinist; /* did we find an indeterministic behaviour */ 2924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegexpPtr comp; /* the compiled regexp */ 2934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecCallbacks callback; 2944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *data; 2954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state;/* the current state */ 2974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int transno; /* the current transition on that state */ 298ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack int transcount; /* the number of chars in char counted transitions */ 2994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 3004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 3014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * A stack of rollback states 3024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 3034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int maxRollbacks; 3044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int nbRollbacks; 3054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecRollback *rollbacks; 3064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 3074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 3084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The state of the automata if any 3094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 3104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int *counts; 3114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 3124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 3134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The input stack 3144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 3154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int inputStackMax; 3164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int inputStackNr; 3174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int index; 3184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int *charStack; 3194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard const xmlChar *inputString; /* when operating on characters */ 3204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegInputTokenPtr inputStack;/* when operating on strings */ 3214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 3227bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard /* 3237bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * error handling 3247bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard */ 3257bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int errStateNo; /* the error state number */ 3267bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlRegStatePtr errState; /* the error state */ 3277bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlChar *errString; /* the string raising the error */ 3287bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int *errCounts; /* counters at the error state */ 3294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard}; 3304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 331441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard#define REGEXP_ALL_COUNTER 0x123456 332441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard#define REGEXP_ALL_LAX_COUNTER 0x123457 3337646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 3344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); 33523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillardstatic void xmlRegFreeState(xmlRegStatePtr state); 33623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillardstatic void xmlRegFreeAtom(xmlRegAtomPtr atom); 3379efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillardstatic int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); 3384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 3394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 340ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * * 341ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * Regexp memory error handler * 342ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * * 343ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard ************************************************************************/ 344ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard/** 345ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * xmlRegexpErrMemory: 346ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * @extra: extra information 347ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * 348ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * Handle an out of memory condition 349ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard */ 350ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillardstatic void 351ff46a0443e6b999297e52c160b88536b8089ec56Daniel VeillardxmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) 352ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard{ 353ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard const char *regexp = NULL; 354ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard if (ctxt != NULL) { 355ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard regexp = (const char *) ctxt->string; 356ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard ctxt->error = XML_ERR_NO_MEMORY; 357ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard } 358659e71ec2476d24bfca0d6432a69ef9a49a62be4Daniel Veillard __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 359ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 360ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard regexp, NULL, 0, 0, 361ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard "Memory allocation failed : %s\n", extra); 362ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard} 363ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard 364ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard/** 365ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * xmlRegexpErrCompile: 366ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * @extra: extra information 367ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard * 368ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Handle a compilation failure 369ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard */ 370ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillardstatic void 371ff46a0443e6b999297e52c160b88536b8089ec56Daniel VeillardxmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra) 372ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard{ 373ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard const char *regexp = NULL; 374ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard int idx = 0; 375ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard 376ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard if (ctxt != NULL) { 377ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard regexp = (const char *) ctxt->string; 378ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard idx = ctxt->cur - ctxt->string; 379ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard ctxt->error = XML_REGEXP_COMPILE_ERROR; 380ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard } 381659e71ec2476d24bfca0d6432a69ef9a49a62be4Daniel Veillard __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP, 382ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra, 383ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard regexp, NULL, idx, 0, 384ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard "failed to compile: %s\n", extra); 385ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard} 386ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard 387ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard/************************************************************************ 3884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 3894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Allocation/Deallocation * 3904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 3914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 3924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 39323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillardstatic int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); 3944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 3954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegEpxFromParse: 3964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @ctxt: the parser context used to build it 3974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 398ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Allocate a new regexp and fill it with the result from the parser 3994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 4004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new regexp or NULL in case of error 4014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 4024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic xmlRegexpPtr 4034255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { 4044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegexpPtr ret; 4054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 4064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); 407a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (ret == NULL) { 408ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "compiling regexp"); 4094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 410a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 4114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(ret, 0, sizeof(xmlRegexp)); 4124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->string = ctxt->string; 4134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->nbStates = ctxt->nbStates; 4144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->states = ctxt->states; 4154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->nbAtoms = ctxt->nbAtoms; 4164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->atoms = ctxt->atoms; 4174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->nbCounters = ctxt->nbCounters; 4184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->counters = ctxt->counters; 419e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret->determinist = ctxt->determinist; 42023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 42123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((ret->determinist != 0) && 42223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard (ret->nbCounters == 0) && 4236e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard (ctxt->negs == 0) && 424118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard (ret->atoms != NULL) && 42523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard (ret->atoms[0] != NULL) && 42623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard (ret->atoms[0]->type == XML_REGEXP_STRING)) { 42723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int i, j, nbstates = 0, nbatoms = 0; 42823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int *stateRemap; 42923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int *stringRemap; 43023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int *transitions; 431118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard void **transdata; 43223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlChar **stringMap; 43323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlChar *value; 43423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 43523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 43623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Switch to a compact representation 43723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 1/ counting the effective number of states left 438ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 2/ counting the unique number of atoms, and check that 43923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * they are all of the string type 44023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 3/ build a table state x atom for the transitions 44123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 44223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 44323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stateRemap = xmlMalloc(ret->nbStates * sizeof(int)); 444a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (stateRemap == NULL) { 445ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "compiling regexp"); 446a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 447a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 448a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 44923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < ret->nbStates;i++) { 45023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (ret->states[i] != NULL) { 45123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stateRemap[i] = nbstates; 45223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard nbstates++; 45323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } else { 45423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stateRemap[i] = -1; 45523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 45623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 45723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_COMPACTION 45823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("Final: %d states\n", nbstates); 45923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 46023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *)); 461a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (stringMap == NULL) { 462ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "compiling regexp"); 463a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stateRemap); 464a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 465a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 466a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 46723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int)); 468a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (stringRemap == NULL) { 469ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "compiling regexp"); 470a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringMap); 471a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stateRemap); 472a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 473a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 474a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 47523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < ret->nbAtoms;i++) { 47623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((ret->atoms[i]->type == XML_REGEXP_STRING) && 47723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { 47823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard value = ret->atoms[i]->valuep; 47923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (j = 0;j < nbatoms;j++) { 48023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (xmlStrEqual(stringMap[j], value)) { 48123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stringRemap[i] = j; 48223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard break; 48323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 48423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 48523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (j >= nbatoms) { 48623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stringRemap[i] = nbatoms; 48723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stringMap[nbatoms] = xmlStrdup(value); 488a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (stringMap[nbatoms] == NULL) { 489a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard for (i = 0;i < nbatoms;i++) 490a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringMap[i]); 491a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringRemap); 492a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringMap); 493a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stateRemap); 494a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 495a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 496a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 49723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard nbatoms++; 49823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 49923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } else { 50023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stateRemap); 50123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringRemap); 50223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < nbatoms;i++) 50323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringMap[i]); 50423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringMap); 505a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 506a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 50723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 50823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 50923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_COMPACTION 51023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("Final: %d atoms\n", nbatoms); 51123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 512a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard transitions = (int *) xmlMalloc((nbstates + 1) * 513a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard (nbatoms + 1) * sizeof(int)); 514a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (transitions == NULL) { 515a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stateRemap); 516a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringRemap); 517a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(stringMap); 518a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFree(ret); 519a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 520a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 521a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int)); 52223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 52323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 52423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Allocate the transition table. The first entry for each 525ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * state corresponds to the state type. 52623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 527118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard transdata = NULL; 52823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 52923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < ret->nbStates;i++) { 53023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int stateno, atomno, targetno, prev; 53123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegStatePtr state; 53223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegTransPtr trans; 53323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 53423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard stateno = stateRemap[i]; 53523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (stateno == -1) 53623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard continue; 53723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = ret->states[i]; 53823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 53923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard transitions[stateno * (nbatoms + 1)] = state->type; 54023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 54123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (j = 0;j < state->nbTrans;j++) { 54223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard trans = &(state->trans[j]); 54323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((trans->to == -1) || (trans->atom == NULL)) 54423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard continue; 54523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard atomno = stringRemap[trans->atom->no]; 546118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if ((trans->atom->data != NULL) && (transdata == NULL)) { 547118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard transdata = (void **) xmlMalloc(nbstates * nbatoms * 548118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard sizeof(void *)); 549118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if (transdata != NULL) 550118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard memset(transdata, 0, 551118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard nbstates * nbatoms * sizeof(void *)); 552a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard else { 553ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "compiling regexp"); 554a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard break; 555a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 556118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard } 55723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard targetno = stateRemap[trans->to]; 55823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 559ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * if the same atom can generate transitions to 2 different 56023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * states then it means the automata is not determinist and 56123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * the compact form can't be used ! 56223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 56323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; 56423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (prev != 0) { 56523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (prev != targetno + 1) { 56623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->determinist = 0; 56723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_COMPACTION 56823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", 56923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard i, j, trans->atom->no, trans->to, atomno, targetno); 57023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf(" previous to is %d\n", prev); 57123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 57223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->determinist = 0; 573118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if (transdata != NULL) 574118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard xmlFree(transdata); 57523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(transitions); 57623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stateRemap); 57723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringRemap); 57823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < nbatoms;i++) 57923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringMap[i]); 58023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringMap); 581a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard goto not_determ; 58223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 58323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } else { 58423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#if 0 58523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("State %d trans %d: atom %d to %d : %d to %d\n", 58623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard i, j, trans->atom->no, trans->to, atomno, targetno); 58723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 58823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard transitions[stateno * (nbatoms + 1) + atomno + 1] = 589118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard targetno + 1; /* to avoid 0 */ 590118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if (transdata != NULL) 591118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard transdata[stateno * nbatoms + atomno] = 592118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard trans->atom->data; 59323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 59423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 59523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 59623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->determinist = 1; 59723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_COMPACTION 59823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 59923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Debug 60023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 60123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < nbstates;i++) { 60223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (j = 0;j < nbatoms + 1;j++) { 60323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("%02d ", transitions[i * (nbatoms + 1) + j]); 60423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 60523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("\n"); 60623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 60723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("\n"); 60823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 60923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 61023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Cleanup of the old data 61123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 61223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (ret->states != NULL) { 61323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < ret->nbStates;i++) 61423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegFreeState(ret->states[i]); 61523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(ret->states); 61623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 61723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->states = NULL; 61823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->nbStates = 0; 61923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (ret->atoms != NULL) { 62023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < ret->nbAtoms;i++) 62123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegFreeAtom(ret->atoms[i]); 62223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(ret->atoms); 62323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 62423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->atoms = NULL; 62523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->nbAtoms = 0; 62623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 62723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->compact = transitions; 628118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard ret->transdata = transdata; 62923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->stringMap = stringMap; 63023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->nbstrings = nbatoms; 63123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret->nbstates = nbstates; 63223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stateRemap); 63323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(stringRemap); 63423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 635a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillardnot_determ: 636a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->string = NULL; 637a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->nbStates = 0; 638a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->states = NULL; 639a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->nbAtoms = 0; 640a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->atoms = NULL; 641a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->nbCounters = 0; 642a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard ctxt->counters = NULL; 6434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 6444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 6454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 6464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 6474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegNewParserCtxt: 6484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @string: the string to parse 6494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 6504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Allocate a new regexp parser context 6514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 6524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new context or NULL in case of error 6534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 6544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic xmlRegParserCtxtPtr 6554255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegNewParserCtxt(const xmlChar *string) { 6564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegParserCtxtPtr ret; 6574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 6584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); 6594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == NULL) 6604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 6614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(ret, 0, sizeof(xmlRegParserCtxt)); 6624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (string != NULL) 6634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->string = xmlStrdup(string); 6644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->cur = ret->string; 6654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->neg = 0; 6666e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard ret->negs = 0; 6674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->error = 0; 668e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret->determinist = -1; 6694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 6704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 6714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 6724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 6734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegNewRange: 6744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @ctxt: the regexp parser context 6754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @neg: is that negative 6764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @type: the type of range 6774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @start: the start codepoint 6784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @end: the end codepoint 6794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 6804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Allocate a new regexp range 6814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 6824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new range or NULL in case of error 6834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 6844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic xmlRegRangePtr 6854255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegNewRange(xmlRegParserCtxtPtr ctxt, 6864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int neg, xmlRegAtomType type, int start, int end) { 6874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegRangePtr ret; 6884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 6894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); 6904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == NULL) { 691ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating range"); 6924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 6934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 6944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->neg = neg; 6954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->type = type; 6964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->start = start; 6974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->end = end; 6984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 6994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 7004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 7024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeRange: 7034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @range: the regexp range 7044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 7054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free a regexp range 7064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 7074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 7084255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeRange(xmlRegRangePtr range) { 7094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (range == NULL) 7104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 7114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (range->blockName != NULL) 7134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(range->blockName); 7144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(range); 7154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 7164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 7184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegNewAtom: 7194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @ctxt: the regexp parser context 7204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @type: the type of atom 7214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 7224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Allocate a new regexp range 7234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 7244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new atom or NULL in case of error 7254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 7264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic xmlRegAtomPtr 7274255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { 7284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr ret; 7294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); 7314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == NULL) { 732ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating atom"); 7334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 7344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 7354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(ret, 0, sizeof(xmlRegAtom)); 7364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->type = type; 7374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->quant = XML_REGEXP_QUANT_ONCE; 7384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->min = 0; 7394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->max = 0; 7404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 7414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 7424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 7444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeAtom: 7454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @atom: the regexp atom 7464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 7474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free a regexp atom 7484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 7494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 7504255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeAtom(xmlRegAtomPtr atom) { 7514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 7524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) 7544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 7554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < atom->nbRanges;i++) 7574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeRange(atom->ranges[i]); 7584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->ranges != NULL) 7594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(atom->ranges); 760de0e498b426e8493ab9e7b22a2f88501e96db000Daniel Veillard if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL)) 761de0e498b426e8493ab9e7b22a2f88501e96db000Daniel Veillard xmlFree(atom->valuep); 76277005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL)) 76377005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard xmlFree(atom->valuep2); 764de0e498b426e8493ab9e7b22a2f88501e96db000Daniel Veillard if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL)) 7654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(atom->valuep); 7664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(atom); 7674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 7684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic xmlRegStatePtr 7704255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegNewState(xmlRegParserCtxtPtr ctxt) { 7714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr ret; 7724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); 7744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == NULL) { 775ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating state"); 7764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 7774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 7784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(ret, 0, sizeof(xmlRegState)); 7794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->type = XML_REGEXP_TRANS_STATE; 7804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret->mark = XML_REGEXP_MARK_NORMAL; 7814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 7824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 7834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 7854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeState: 7864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @state: the regexp state 7874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 7884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free a regexp state 7894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 7904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 7914255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeState(xmlRegStatePtr state) { 7924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state == NULL) 7934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 7944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 7954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->trans != NULL) 7964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(state->trans); 797db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state->transTo != NULL) 798db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlFree(state->transTo); 7994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(state); 8004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 8014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 8024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 8034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeParserCtxt: 8044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @ctxt: the regexp parser context 8054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 8064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free a regexp parser context 8074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 8084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 8094255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { 8104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 8114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt == NULL) 8124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 8134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 8144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->string != NULL) 8154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(ctxt->string); 8164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->states != NULL) { 8174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < ctxt->nbStates;i++) 8184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeState(ctxt->states[i]); 8194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(ctxt->states); 8204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 8214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atoms != NULL) { 8224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < ctxt->nbAtoms;i++) 8234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeAtom(ctxt->atoms[i]); 8244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(ctxt->atoms); 8254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 8264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->counters != NULL) 8274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(ctxt->counters); 8284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(ctxt); 8294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 8304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 8314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 8324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 8334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Display of Data structures * 8344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 8354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 8364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 8374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 8384255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { 8394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (type) { 8404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_EPSILON: 8414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "epsilon "); break; 8424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_CHARVAL: 8434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "charval "); break; 8444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_RANGES: 8454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "ranges "); break; 8464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SUBREG: 8474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "subexpr "); break; 8484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_STRING: 8494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "string "); break; 8504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYCHAR: 8514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "anychar "); break; 8524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYSPACE: 8534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "anyspace "); break; 8544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTSPACE: 8554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "notspace "); break; 8564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_INITNAME: 8574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "initname "); break; 8584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTINITNAME: 8594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "notinitname "); break; 8604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NAMECHAR: 8614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "namechar "); break; 8624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTNAMECHAR: 8634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "notnamechar "); break; 8644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_DECIMAL: 8654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "decimal "); break; 8664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTDECIMAL: 8674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "notdecimal "); break; 8684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_REALCHAR: 8694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "realchar "); break; 8704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTREALCHAR: 8714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "notrealchar "); break; 8724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER: 8734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER "); break; 8744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_UPPERCASE: 8754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER_UPPERCASE "); break; 8764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_LOWERCASE: 8774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER_LOWERCASE "); break; 8784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_TITLECASE: 8794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER_TITLECASE "); break; 8804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_MODIFIER: 8814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER_MODIFIER "); break; 8824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_OTHERS: 8834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "LETTER_OTHERS "); break; 8844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK: 8854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "MARK "); break; 8864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_NONSPACING: 8874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "MARK_NONSPACING "); break; 8884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_SPACECOMBINING: 8894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "MARK_SPACECOMBINING "); break; 8904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_ENCLOSING: 8914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "MARK_ENCLOSING "); break; 8924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER: 8934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NUMBER "); break; 8944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_DECIMAL: 8954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NUMBER_DECIMAL "); break; 8964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_LETTER: 8974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NUMBER_LETTER "); break; 8984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_OTHERS: 8994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NUMBER_OTHERS "); break; 9004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT: 9014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT "); break; 9024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CONNECTOR: 9034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_CONNECTOR "); break; 9044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_DASH: 9054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_DASH "); break; 9064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OPEN: 9074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_OPEN "); break; 9084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CLOSE: 9094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_CLOSE "); break; 9104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_INITQUOTE: 9114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_INITQUOTE "); break; 9124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_FINQUOTE: 9134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_FINQUOTE "); break; 9144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OTHERS: 9154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "PUNCT_OTHERS "); break; 9164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR: 9174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SEPAR "); break; 9184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_SPACE: 9194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SEPAR_SPACE "); break; 9204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_LINE: 9214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SEPAR_LINE "); break; 9224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_PARA: 9234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SEPAR_PARA "); break; 9244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL: 9254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SYMBOL "); break; 9264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MATH: 9274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SYMBOL_MATH "); break; 9284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_CURRENCY: 9294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SYMBOL_CURRENCY "); break; 9304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MODIFIER: 9314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SYMBOL_MODIFIER "); break; 9324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_OTHERS: 9334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "SYMBOL_OTHERS "); break; 9344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER: 9354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "OTHER "); break; 9364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_CONTROL: 9374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "OTHER_CONTROL "); break; 9384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_FORMAT: 9394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "OTHER_FORMAT "); break; 9404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_PRIVATE: 9414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "OTHER_PRIVATE "); break; 9424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_NA: 9434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "OTHER_NA "); break; 9444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_BLOCK_NAME: 9454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "BLOCK "); break; 9464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 9474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 9484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 9494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 9504255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { 9514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (type) { 9524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_EPSILON: 9534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "epsilon "); break; 9544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_ONCE: 9554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "once "); break; 9564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_OPT: 9574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "? "); break; 9584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_MULT: 9594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "* "); break; 9604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_PLUS: 9614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "+ "); break; 9624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_RANGE: 9634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "range "); break; 9647646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard case XML_REGEXP_QUANT_ONCEONLY: 9657646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard fprintf(output, "onceonly "); break; 9667646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard case XML_REGEXP_QUANT_ALL: 9677646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard fprintf(output, "all "); break; 9684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 9694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 9704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 9714255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintRange(FILE *output, xmlRegRangePtr range) { 9724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " range: "); 9734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (range->neg) 9744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "negative "); 9754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintAtomType(output, range->type); 9764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%c - %c\n", range->start, range->end); 9774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 9784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 9794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 9804255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { 9814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " atom: "); 9824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) { 9834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NULL\n"); 9844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 9854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 9869efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if (atom->neg) 9879efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard fprintf(output, "not "); 9884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintAtomType(output, atom->type); 9894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintQuantType(output, atom->quant); 9904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->quant == XML_REGEXP_QUANT_RANGE) 9914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d-%d ", atom->min, atom->max); 9924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->type == XML_REGEXP_STRING) 9934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "'%s' ", (char *) atom->valuep); 9944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->type == XML_REGEXP_CHARVAL) 9954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "char %c\n", atom->codepoint); 9964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if (atom->type == XML_REGEXP_RANGES) { 9974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 9984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d entries\n", atom->nbRanges); 9994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0; i < atom->nbRanges;i++) 10004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintRange(output, atom->ranges[i]); 10014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (atom->type == XML_REGEXP_SUBREG) { 10024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); 10034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 10044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "\n"); 10054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 10074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 10094255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { 10104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " trans: "); 10114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans == NULL) { 10124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NULL\n"); 10134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 10144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->to < 0) { 10164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "removed\n"); 10174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 10184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->counter >= 0) { 10204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "counted %d, ", trans->counter); 10214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10228a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard if (trans->count == REGEXP_ALL_COUNTER) { 10238a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard fprintf(output, "all transition, "); 10248a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard } else if (trans->count >= 0) { 10254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "count based %d, ", trans->count); 10264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->atom == NULL) { 10284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "epsilon to %d\n", trans->to); 10294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 10304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->atom->type == XML_REGEXP_CHARVAL) 10324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "char %c ", trans->atom->codepoint); 10334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); 10344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 10354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 10374255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintState(FILE *output, xmlRegStatePtr state) { 10384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 10394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " state: "); 10414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state == NULL) { 10424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NULL\n"); 10434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 10444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->type == XML_REGEXP_START_STATE) 10464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "START "); 10474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->type == XML_REGEXP_FINAL_STATE) 10484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "FINAL "); 10494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); 10514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < state->nbTrans; i++) { 10524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintTrans(output, &(state->trans[i])); 10534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 10554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 105623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 10574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 10584255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { 10594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 10604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " ctxt: "); 10624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt == NULL) { 10634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NULL\n"); 10644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 10654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "'%s' ", ctxt->string); 10674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->error) 10684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "error "); 10694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->neg) 10704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "neg "); 10714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "\n"); 10724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d atoms:\n", ctxt->nbAtoms); 10734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < ctxt->nbAtoms; i++) { 10744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " %02d ", i); 10754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintAtom(output, ctxt->atoms[i]); 10764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom != NULL) { 10784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "current atom:\n"); 10794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintAtom(output, ctxt->atom); 10804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d states:", ctxt->nbStates); 10824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->start != NULL) 10834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " start: %d", ctxt->start->no); 10844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->end != NULL) 10854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " end: %d", ctxt->end->no); 10864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "\n"); 10874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < ctxt->nbStates; i++) { 10884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintState(output, ctxt->states[i]); 10894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d counters:\n", ctxt->nbCounters); 10914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < ctxt->nbCounters; i++) { 10924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, 10934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters[i].max); 10944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 10954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 109623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 10974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 10984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 10994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 11004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Finite Automata structures manipulations * 11014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 11024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 11034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 11044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 11054255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, 11064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int neg, xmlRegAtomType type, int start, int end, 11074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *blockName) { 11084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegRangePtr range; 11094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 11104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) { 11114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("add range: atom is NULL"); 11124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 11134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->type != XML_REGEXP_RANGES) { 11154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("add range: atom is not ranges"); 11164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 11174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->maxRanges == 0) { 11194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->maxRanges = 4; 11204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * 11214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegRangePtr)); 11224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->ranges == NULL) { 1123ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding ranges"); 11244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->maxRanges = 0; 11254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 11264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (atom->nbRanges >= atom->maxRanges) { 11284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegRangePtr *tmp; 11294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->maxRanges *= 2; 11304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * 11314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegRangePtr)); 11324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 1133ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding ranges"); 11344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->maxRanges /= 2; 11354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 11364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->ranges = tmp; 11384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard range = xmlRegNewRange(ctxt, neg, type, start, end); 11404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (range == NULL) 11414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 11424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard range->blockName = blockName; 11434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->ranges[atom->nbRanges++] = range; 11444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 11454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 11464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 11474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 11484255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { 11494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->maxCounters == 0) { 11504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxCounters = 4; 11514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * 11524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegCounter)); 11534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->counters == NULL) { 1154ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating counter"); 11554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxCounters = 0; 11564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 11574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->nbCounters >= ctxt->maxCounters) { 11594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounter *tmp; 11604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxCounters *= 2; 11614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * 11624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegCounter)); 11634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 1164ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating counter"); 11654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxCounters /= 2; 11664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 11674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters = tmp; 11694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters[ctxt->nbCounters].min = -1; 11714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters[ctxt->nbCounters].max = -1; 11724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ctxt->nbCounters++); 11734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 11744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1175a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillardstatic int 11764255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { 11774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) { 11784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("atom push: atom is NULL"); 1179a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 11804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->maxAtoms == 0) { 11824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxAtoms = 4; 11834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * 11844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegAtomPtr)); 11854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atoms == NULL) { 1186ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "pushing atom"); 11874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxAtoms = 0; 1188a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 11894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 11904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { 11914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr *tmp; 11924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxAtoms *= 2; 11934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * 11944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegAtomPtr)); 11954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 1196ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "allocating counter"); 11974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxAtoms /= 2; 1198a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 11994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atoms = tmp; 12014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->no = ctxt->nbAtoms; 12034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atoms[ctxt->nbAtoms++] = atom; 1204a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(0); 12054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 12064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 12074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 1208db68b74dc7ec531361a736de7769a3e8ce881f79Daniel VeillardxmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, 1209db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int from) { 1210db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (target->maxTransTo == 0) { 1211db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->maxTransTo = 8; 1212db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->transTo = (int *) xmlMalloc(target->maxTransTo * 1213db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard sizeof(int)); 1214db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (target->transTo == NULL) { 1215db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegexpErrMemory(ctxt, "adding transition"); 1216db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->maxTransTo = 0; 1217db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard return; 1218db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1219db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } else if (target->nbTransTo >= target->maxTransTo) { 1220db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int *tmp; 1221db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->maxTransTo *= 2; 1222db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo * 1223db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard sizeof(int)); 1224db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (tmp == NULL) { 1225db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegexpErrMemory(ctxt, "adding transition"); 1226db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->maxTransTo /= 2; 1227db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard return; 1228db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1229db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->transTo = tmp; 1230db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1231db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->transTo[target->nbTransTo] = from; 1232db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard target->nbTransTo++; 1233db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard} 1234db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1235db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillardstatic void 12364255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 12374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom, xmlRegStatePtr target, 1238db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int counter, int count, int nchk) { 1239f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack 1240f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack int nrtrans; 1241f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack 12424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state == NULL) { 12434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("add state: state is NULL"); 12444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 12454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (target == NULL) { 12474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("add state: target is NULL"); 12484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 12494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1250f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack /* 1251f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack * Other routines follow the philosophy 'When in doubt, add a transition' 1252f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack * so we check here whether such a transition is already present and, if 1253f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack * so, silently ignore this request. 1254f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack */ 1255f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack 1256db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (nchk == 0) { 1257db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) { 1258db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegTransPtr trans = &(state->trans[nrtrans]); 1259db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if ((trans->atom == atom) && 1260db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (trans->to == target->no) && 1261db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (trans->counter == counter) && 1262db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (trans->count == count)) { 1263f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack#ifdef DEBUG_REGEXP_GRAPH 1264db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard printf("Ignoring duplicate transition from %d to %d\n", 1265db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard state->no, target->no); 1266f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack#endif 1267db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard return; 1268db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1269db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1270f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack } 1271f9b5fa2dec22e88683d96d0a2782c244df2ca766William M. Brack 12724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->maxTrans == 0) { 1273db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard state->maxTrans = 8; 12744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * 12754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegTrans)); 12764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->trans == NULL) { 1277ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding transition"); 12784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->maxTrans = 0; 12794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 12804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (state->nbTrans >= state->maxTrans) { 12824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegTrans *tmp; 12834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->maxTrans *= 2; 12844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * 12854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegTrans)); 12864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 1287ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding transition"); 12884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->maxTrans /= 2; 12894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 12904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans = tmp; 12924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 12934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 12944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Add trans from %d to %d ", state->no, target->no); 12958a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard if (count == REGEXP_ALL_COUNTER) 12962cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("all transition\n"); 12974402ab420f68d9384bd6c2066c1676848d98d27cDaniel Veillard else if (count >= 0) 12982cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("count based %d\n", count); 12994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if (counter >= 0) 13002cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("counted %d\n", counter); 13014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if (atom == NULL) 13022cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("epsilon transition\n"); 13032cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard else if (atom != NULL) 13042cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard xmlRegPrintAtom(stdout, atom); 13054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 13064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 13074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans[state->nbTrans].atom = atom; 13084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans[state->nbTrans].to = target->no; 13094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans[state->nbTrans].counter = counter; 13104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans[state->nbTrans].count = count; 13114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->nbTrans++; 1312db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTransTo(ctxt, target, state->no); 13134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 13144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1315a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillardstatic int 13164255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { 1317a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (state == NULL) return(-1); 13184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->maxStates == 0) { 13194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxStates = 4; 13204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * 13214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegStatePtr)); 13224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->states == NULL) { 1323ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding state"); 13244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxStates = 0; 1325a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 13264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 13274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->nbStates >= ctxt->maxStates) { 13284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr *tmp; 13294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxStates *= 2; 13304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * 13314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegStatePtr)); 13324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 1333ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(ctxt, "adding state"); 13344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->maxStates /= 2; 1335a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 13364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 13374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->states = tmp; 13384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 13394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->no = ctxt->nbStates; 13404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->states[ctxt->nbStates++] = state; 1341a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(0); 13424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 13434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 13444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 13457646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * xmlFAGenerateAllTransition: 1346441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 1347441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @from: the from state 1348441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @to: the target state or NULL for building a new one 1349441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @lax: 13507646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * 13517646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard */ 13527646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillardstatic void 13537646b18d64b6c739d04ca453493070e88c4aab13Daniel VeillardxmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, 1354441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlRegStatePtr from, xmlRegStatePtr to, 1355441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard int lax) { 13567646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (to == NULL) { 13577646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard to = xmlRegNewState(ctxt); 13587646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard xmlRegStatePush(ctxt, to); 13597646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard ctxt->state = to; 13607646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard } 1361441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if (lax) 1362db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER, 0); 1363441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard else 1364db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER, 0); 13657646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard} 13667646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 13677646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard/** 13684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAGenerateEpsilonTransition: 1369441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 1370441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @from: the from state 1371441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @to: the target state or NULL for building a new one 13724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 13734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 13744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 13754255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, 13764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr from, xmlRegStatePtr to) { 13774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) { 13784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = xmlRegNewState(ctxt); 13794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(ctxt, to); 13804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = to; 13814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1382db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1, 0); 13834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 13844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 13854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 13864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAGenerateCountedEpsilonTransition: 1387441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 1388441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @from: the from state 1389441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @to: the target state or NULL for building a new one 13904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * counter: the counter for that transition 13914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 13924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 13934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 13944255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, 13954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 13964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) { 13974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = xmlRegNewState(ctxt); 13984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(ctxt, to); 13994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = to; 14004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1401db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1, 0); 14024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 14034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 14044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 14054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAGenerateCountedTransition: 1406441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 1407441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @from: the from state 1408441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @to: the target state or NULL for building a new one 14094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * counter: the counter for that transition 14104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 14114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 14124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 14134255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, 14144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr from, xmlRegStatePtr to, int counter) { 14154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) { 14164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = xmlRegNewState(ctxt); 14174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(ctxt, to); 14184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = to; 14194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1420db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter, 0); 14214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 14224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 14234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 14244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAGenerateTransitions: 1425441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 1426441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @from: the from state 1427441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @to: the target state or NULL for building a new one 1428441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @atom: the atom generating the transition 14294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 1430ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Returns 0 if success and -1 in case of error. 14314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 1432a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillardstatic int 14334255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, 14344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr to, xmlRegAtomPtr atom) { 14354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) { 14364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("genrate transition: atom == NULL"); 1437a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 14384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 14394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->type == XML_REGEXP_SUBREG) { 14404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 14414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * this is a subexpression handling one should not need to 1442ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * create a new node except for XML_REGEXP_QUANT_RANGE. 14434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 1444a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (xmlRegAtomPush(ctxt, atom) < 0) { 1445a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 1446a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 14474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((to != NULL) && (atom->stop != to) && 14484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (atom->quant != XML_REGEXP_QUANT_RANGE)) { 14494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 14504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Generate an epsilon transition to link to the target 14514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 14524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); 14534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 14544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (atom->quant) { 14554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_OPT: 14564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 14574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); 14584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 14594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_MULT: 14604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 14614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); 14624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 14634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 14644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_PLUS: 14654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 14664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); 14674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 14684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_RANGE: { 14694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int counter; 14704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr newstate; 14714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 14724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 14734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * This one is nasty: 1474ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 1/ if range has minOccurs == 0, create a new state 1475ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * and create epsilon transitions from atom->start 1476ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * to atom->stop, as well as atom->start to the new 1477ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * state 1478ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 2/ register a new counter 1479ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 3/ register an epsilon transition associated to 14804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * this counter going from atom->stop to atom->start 1481ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 4/ create a new state 1482ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * 5/ generate a counted transition from atom->stop to 14834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * that state 14844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 1485ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack if (atom->min == 0) { 1486ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack xmlFAGenerateEpsilonTransition(ctxt, atom->start, 1487ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack atom->stop); 1488ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack newstate = xmlRegNewState(ctxt); 1489ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack xmlRegStatePush(ctxt, newstate); 1490ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack ctxt->state = newstate; 1491ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack xmlFAGenerateEpsilonTransition(ctxt, atom->start, 1492ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack newstate); 1493ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack } 14944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard counter = xmlRegGetCounter(ctxt); 14954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters[counter].min = atom->min - 1; 14964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->counters[counter].max = atom->max - 1; 14974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->min = 0; 14984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->max = 0; 14994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 15004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, 15014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->start, counter); 15024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to != NULL) { 15034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard newstate = to; 15044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 15054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard newstate = xmlRegNewState(ctxt); 15064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(ctxt, newstate); 15074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = newstate; 15084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 15094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateCountedTransition(ctxt, atom->stop, 15104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard newstate, counter); 15114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 15124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard default: 15134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 15144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1515a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(0); 151699c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard } else if ((atom->min == 0) && (atom->max == 0) && 151799c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard (atom->quant == XML_REGEXP_QUANT_RANGE)) { 151899c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard /* 151999c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard * we can discard the atom and generate an epsilon transition instead 152099c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard */ 152199c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard if (to == NULL) { 152299c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard to = xmlRegNewState(ctxt); 152399c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard if (to != NULL) 152499c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard xmlRegStatePush(ctxt, to); 152599c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard else { 152699c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard return(-1); 152799c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard } 152899c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard } 152999c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, from, to); 153099c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard ctxt->state = to; 153199c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard xmlRegFreeAtom(atom); 153299c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard return(0); 15334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 15344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) { 15354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = xmlRegNewState(ctxt); 1536a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (to != NULL) 1537a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlRegStatePush(ctxt, to); 1538a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard else { 1539a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 1540a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 1541a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 1542a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (xmlRegAtomPush(ctxt, atom) < 0) { 1543a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 15444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1545db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1, 0); 15464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = to; 15474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 15484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (atom->quant) { 15494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_OPT: 15504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 15514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, from, to); 15524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 15534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_MULT: 15544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 15554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, from, to); 1556db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0); 15574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 15584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_QUANT_PLUS: 15594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCE; 1560db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0); 15614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 15624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard default: 15634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 15644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 1565a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(0); 15664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 15674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 15684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 15694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAReduceEpsilonTransitions: 1570441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 15714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @fromnr: the from state 15724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @tonr: the to state 1573ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * @counter: should that transition be associated to a counted 15744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 15754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 15764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 15774255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, 15784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int tonr, int counter) { 15794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int transnr; 15804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr from; 15814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr to; 15824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 15834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 15844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); 15854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 15864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard from = ctxt->states[fromnr]; 15874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (from == NULL) 15884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 15894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = ctxt->states[tonr]; 15904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) 15914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 15924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((to->mark == XML_REGEXP_MARK_START) || 15934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (to->mark == XML_REGEXP_MARK_VISITED)) 15944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 15954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 15964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to->mark = XML_REGEXP_MARK_VISITED; 15974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to->type == XML_REGEXP_FINAL_STATE) { 15984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 15994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("State %d is final, so %d becomes final\n", tonr, fromnr); 16004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 16014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard from->type = XML_REGEXP_FINAL_STATE; 16024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 16034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (transnr = 0;transnr < to->nbTrans;transnr++) { 1604db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (to->trans[transnr].to < 0) 1605db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard continue; 16064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to->trans[transnr].atom == NULL) { 16074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 16084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Don't remove counted transitions 16094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Don't loop either 16104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 1611b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to->trans[transnr].to != fromnr) { 1612b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to->trans[transnr].count >= 0) { 1613b509f1543df71549969eeac076349e05d2f78044Daniel Veillard int newto = to->trans[transnr].to; 1614b509f1543df71549969eeac076349e05d2f78044Daniel Veillard 1615b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlRegStateAddTrans(ctxt, from, NULL, 1616b509f1543df71549969eeac076349e05d2f78044Daniel Veillard ctxt->states[newto], 1617db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard -1, to->trans[transnr].count, 0); 1618b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } else { 16194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 1620b509f1543df71549969eeac076349e05d2f78044Daniel Veillard printf("Found epsilon trans %d from %d to %d\n", 1621b509f1543df71549969eeac076349e05d2f78044Daniel Veillard transnr, tonr, to->trans[transnr].to); 16224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 1623b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to->trans[transnr].counter >= 0) { 1624b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1625b509f1543df71549969eeac076349e05d2f78044Daniel Veillard to->trans[transnr].to, 1626b509f1543df71549969eeac076349e05d2f78044Daniel Veillard to->trans[transnr].counter); 1627b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } else { 1628b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlFAReduceEpsilonTransitions(ctxt, fromnr, 1629b509f1543df71549969eeac076349e05d2f78044Daniel Veillard to->trans[transnr].to, 1630b509f1543df71549969eeac076349e05d2f78044Daniel Veillard counter); 1631b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } 1632b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } 16334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 16344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 16354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int newto = to->trans[transnr].to; 16364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1637b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to->trans[transnr].counter >= 0) { 1638b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1639b509f1543df71549969eeac076349e05d2f78044Daniel Veillard ctxt->states[newto], 1640db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard to->trans[transnr].counter, -1, 1); 1641b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } else { 1642b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 1643db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard ctxt->states[newto], counter, -1, 1); 1644b509f1543df71549969eeac076349e05d2f78044Daniel Veillard } 16454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 16464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 16474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to->mark = XML_REGEXP_MARK_NORMAL; 16484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 16494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 16504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 1651db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * xmlFAEliminateSimpleEpsilonTransitions: 1652db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * @ctxt: a regexp parser context 1653db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * 1654db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * Eliminating general epsilon transitions can get costly in the general 1655db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * algorithm due to the large amount of generated new transitions and 1656db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * associated comparisons. However for simple epsilon transition used just 1657db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * to separate building blocks when generating the automata this can be 1658db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * reduced to state elimination: 1659db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * - if there exists an epsilon from X to Y 1660db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * - if there is no other transition from X 1661db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * then X and Y are semantically equivalent and X can be eliminated 1662db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * If X is the start state then make Y the start state, else replace the 1663db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard * target of all transitions to X by transitions to Y. 1664db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard */ 1665db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillardstatic void 1666db68b74dc7ec531361a736de7769a3e8ce881f79Daniel VeillardxmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 1667db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int statenr, i, j, newto; 1668db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStatePtr state, tmp; 1669db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1670db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1671db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard state = ctxt->states[statenr]; 1672db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state == NULL) 1673db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard continue; 1674db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state->nbTrans != 1) 1675db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard continue; 1676db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard /* is the only transition out a basic transition */ 1677db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if ((state->trans[0].atom == NULL) && 1678db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (state->trans[0].to >= 0) && 1679db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (state->trans[0].to != statenr) && 1680db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (state->trans[0].counter < 0) && 1681db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (state->trans[0].count < 0)) { 1682db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard newto = state->trans[0].to; 1683db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1684db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state->type == XML_REGEXP_START_STATE) { 1685db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 1686db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard printf("Found simple epsilon trans from start %d to %d\n", 1687db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard statenr, newto); 1688db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#endif 1689db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } else { 1690db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 1691db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard printf("Found simple epsilon trans from %d to %d\n", 1692db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard statenr, newto); 1693db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#endif 1694db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (i = 0;i < state->nbTransTo;i++) { 1695db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp = ctxt->states[state->transTo[i]]; 1696db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (j = 0;j < tmp->nbTrans;j++) { 1697db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (tmp->trans[j].to == statenr) { 1698db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp->trans[j].to = newto; 1699db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 1700db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard printf("Changed transition %d on %d to go to %d\n", 1701db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard j, tmp->no, newto); 1702db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#endif 1703db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTransTo(ctxt, ctxt->states[newto], 1704db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp->no); 1705db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1706db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1707db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1708db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#if 0 1709db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (i = 0;i < ctxt->nbStates;i++) { 1710db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp = ctxt->states[i]; 1711db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (j = 0;j < tmp->nbTrans;j++) { 1712db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (tmp->trans[j].to == statenr) { 1713db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard tmp->trans[j].to = newto; 1714db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 1715db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard printf("Changed transition %d on %d to go to %d\n", 1716db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard j, tmp->no, newto); 1717db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#endif 1718db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1719db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1720db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1721db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard#endif 1722db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state->type == XML_REGEXP_FINAL_STATE) 1723db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; 1724db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard /* eliminate the transition completely */ 1725db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard state->nbTrans = 0; 1726db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1727db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1728db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1729db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1730db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1731db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 1732db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard} 1733db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard/** 17344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAEliminateEpsilonTransitions: 1735441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 17364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 17374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 17384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 17394255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { 17404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int statenr, transnr; 17414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state; 1742db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard int has_epsilon; 17434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1744a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (ctxt->states == NULL) return; 1745a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard 1746db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlFAEliminateSimpleEpsilonTransitions(ctxt); 1747db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard 1748db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard has_epsilon = 0; 1749a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard 17504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 17514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * build the completed transitions bypassing the epsilons 17524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Use a marking algorithm to avoid loops 1753cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * mark sink states too. 17544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 17554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 17564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state = ctxt->states[statenr]; 17574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state == NULL) 17584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 1759cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((state->nbTrans == 0) && 1760cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (state->type != XML_REGEXP_FINAL_STATE)) { 1761cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard state->type = XML_REGEXP_SINK_STATE; 1762cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 17634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (transnr = 0;transnr < state->nbTrans;transnr++) { 17644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((state->trans[transnr].atom == NULL) && 17654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (state->trans[transnr].to >= 0)) { 17664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (state->trans[transnr].to == statenr) { 17674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->trans[transnr].to = -1; 17684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 17694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Removed loopback epsilon trans %d on %d\n", 17704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard transnr, statenr); 17714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 17724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (state->trans[transnr].count < 0) { 17734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int newto = state->trans[transnr].to; 17744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 17754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 17764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Found epsilon trans %d from %d to %d\n", 17774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard transnr, statenr, newto); 17784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 17794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->mark = XML_REGEXP_MARK_START; 1780db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard has_epsilon = 1; 17814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAReduceEpsilonTransitions(ctxt, statenr, 17824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard newto, state->trans[transnr].counter); 17834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->mark = XML_REGEXP_MARK_NORMAL; 17844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 17854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 17864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Found counted transition %d on %d\n", 17874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard transnr, statenr); 17884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 17894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 17904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 17914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 17924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 17934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 17944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Eliminate the epsilon transitions 17954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 1796db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (has_epsilon) { 1797db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1798db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard state = ctxt->states[statenr]; 1799db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if (state == NULL) 1800db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard continue; 1801db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard for (transnr = 0;transnr < state->nbTrans;transnr++) { 1802db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegTransPtr trans = &(state->trans[transnr]); 1803db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard if ((trans->atom == NULL) && 1804db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (trans->count < 0) && 1805db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard (trans->to >= 0)) { 1806db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard trans->to = -1; 1807db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard } 18084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 18094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 18104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 181123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 181223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 181323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Use this pass to detect unreachable states too 181423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 181523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 181623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = ctxt->states[statenr]; 181723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (state != NULL) 1818779af00750fa86045e94422287d67a2cf5723f65William M. Brack state->reached = XML_REGEXP_MARK_NORMAL; 181923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 182023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = ctxt->states[0]; 182123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (state != NULL) 1822779af00750fa86045e94422287d67a2cf5723f65William M. Brack state->reached = XML_REGEXP_MARK_START; 182323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard while (state != NULL) { 182423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegStatePtr target = NULL; 1825779af00750fa86045e94422287d67a2cf5723f65William M. Brack state->reached = XML_REGEXP_MARK_VISITED; 182623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 1827ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Mark all states reachable from the current reachable state 182823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 182923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (transnr = 0;transnr < state->nbTrans;transnr++) { 183023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((state->trans[transnr].to >= 0) && 183123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ((state->trans[transnr].atom != NULL) || 183223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard (state->trans[transnr].count >= 0))) { 183323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int newto = state->trans[transnr].to; 183423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 183523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (ctxt->states[newto] == NULL) 183623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard continue; 1837779af00750fa86045e94422287d67a2cf5723f65William M. Brack if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { 1838779af00750fa86045e94422287d67a2cf5723f65William M. Brack ctxt->states[newto]->reached = XML_REGEXP_MARK_START; 183923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard target = ctxt->states[newto]; 184023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 184123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 184223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 1843cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard 184423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 184523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * find the next accessible state not explored 184623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 184723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (target == NULL) { 184823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (statenr = 1;statenr < ctxt->nbStates;statenr++) { 184923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = ctxt->states[statenr]; 1850779af00750fa86045e94422287d67a2cf5723f65William M. Brack if ((state != NULL) && (state->reached == 1851779af00750fa86045e94422287d67a2cf5723f65William M. Brack XML_REGEXP_MARK_START)) { 185223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard target = state; 185323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard break; 185423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 185523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 185623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 185723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = target; 185823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 185923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 186023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard state = ctxt->states[statenr]; 1861779af00750fa86045e94422287d67a2cf5723f65William M. Brack if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) { 186223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 186323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("Removed unreachable state %d\n", statenr); 186423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 186523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegFreeState(state); 186623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ctxt->states[statenr] = NULL; 186723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 186823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 186923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 18704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 18714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 1872e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard/** 1873e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * xmlFACompareAtoms: 1874e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * @atom1: an atom 1875e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * @atom2: an atom 1876e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1877ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Compares two atoms to check whether they are equivalents 1878e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1879e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Returns 1 if yes and 0 otherwise 1880e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1881e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillardstatic int 1882e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel VeillardxmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) { 18839efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard int ret; 18849efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 1885e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (atom1 == atom2) 1886e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(1); 1887e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if ((atom1 == NULL) || (atom2 == NULL)) 1888e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 1889e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 1890e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (atom1->type != atom2->type) 1891e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 1892e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard switch (atom1->type) { 1893e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard case XML_REGEXP_STRING: 18949efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep, 18959efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard (xmlChar *)atom2->valuep); 18969efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard break; 1897e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard case XML_REGEXP_EPSILON: 1898e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(1); 1899e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard case XML_REGEXP_CHARVAL: 19009efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard ret = atom1->codepoint == atom2->codepoint; 19019efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard break; 1902e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard case XML_REGEXP_RANGES: 1903e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard TODO; 1904e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 1905e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard default: 19069efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(1); 1907e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 19086e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard if (atom1->neg != atom2->neg) { 19099efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard ret = !ret; 19106e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard } 19119efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(ret); 1912e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard} 1913e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 1914e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard/** 1915e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * xmlFARecurseDeterminism: 1916e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * @ctxt: a regexp parser context 1917e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1918e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Check whether the associated regexp is determinist, 1919e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * should be called after xmlFAEliminateEpsilonTransitions() 1920e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1921e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1922e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillardstatic int 1923e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel VeillardxmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 1924e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int to, xmlRegAtomPtr atom) { 1925e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int ret = 1; 1926e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int transnr; 1927e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard xmlRegTransPtr t1; 1928e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 1929e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (state == NULL) 1930e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(ret); 1931e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard for (transnr = 0;transnr < state->nbTrans;transnr++) { 1932e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard t1 = &(state->trans[transnr]); 1933e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard /* 1934e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * check transitions conflicting with the one looked at 1935e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1936e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->atom == NULL) { 1937e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->to == -1) 1938e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1939e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 1940e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard to, atom); 1941e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (ret == 0) 1942e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 1943e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1944e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 1945e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->to != to) 1946e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1947e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (xmlFACompareAtoms(t1->atom, atom)) 1948e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 1949e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 1950e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(ret); 1951e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard} 1952e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 1953e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard/** 1954e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * xmlFAComputesDeterminism: 1955e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * @ctxt: a regexp parser context 1956e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1957e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Check whether the associated regexp is determinist, 1958e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * should be called after xmlFAEliminateEpsilonTransitions() 1959e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 1960e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1961e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillardstatic int 1962e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel VeillardxmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { 1963e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int statenr, transnr; 1964e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard xmlRegStatePtr state; 1965e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard xmlRegTransPtr t1, t2; 1966e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int i; 1967e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int ret = 1; 1968e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 19694402ab420f68d9384bd6c2066c1676848d98d27cDaniel Veillard#ifdef DEBUG_REGEXP_GRAPH 19704402ab420f68d9384bd6c2066c1676848d98d27cDaniel Veillard printf("xmlFAComputesDeterminism\n"); 19714402ab420f68d9384bd6c2066c1676848d98d27cDaniel Veillard xmlRegPrintCtxt(stdout, ctxt); 19724402ab420f68d9384bd6c2066c1676848d98d27cDaniel Veillard#endif 1973e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (ctxt->determinist != -1) 1974e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(ctxt->determinist); 1975e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 1976e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard /* 1977ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Check for all states that there aren't 2 transitions 1978e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * with the same atom and a different target. 1979e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1980e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard for (statenr = 0;statenr < ctxt->nbStates;statenr++) { 1981e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard state = ctxt->states[statenr]; 1982e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (state == NULL) 1983e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1984e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard for (transnr = 0;transnr < state->nbTrans;transnr++) { 1985e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard t1 = &(state->trans[transnr]); 1986e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard /* 1987e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Determinism checks in case of counted or all transitions 1988e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * will have to be handled separately 1989e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 1990e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->atom == NULL) 1991e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1992e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->to == -1) /* eliminated */ 1993e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1994e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard for (i = 0;i < transnr;i++) { 1995e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard t2 = &(state->trans[i]); 1996e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t2->to == -1) /* eliminated */ 1997e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard continue; 1998e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t2->atom != NULL) { 1999e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (t1->to == t2->to) { 2000e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (xmlFACompareAtoms(t1->atom, t2->atom)) 2001ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack t2->to = -1; /* eliminated */ 2002e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } else { 2003e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard /* not determinist ! */ 2004e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (xmlFACompareAtoms(t1->atom, t2->atom)) 2005e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret = 0; 2006e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 2007e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } else if (t1->to != -1) { 2008e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard /* 2009e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * do the closure in case of remaining specific 2010e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * epsilon transitions like choices or all 2011e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 2012e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 2013e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard t2->to, t2->atom); 2014e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (ret == 0) 2015e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(0); 2016e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 2017e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 2018e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (ret == 0) 2019e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard break; 2020e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 2021e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (ret == 0) 2022e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard break; 2023e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard } 2024e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ctxt->determinist = ret; 2025e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(ret); 2026e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard} 2027e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 20284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 20294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 20304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Routines to check input against transition atoms * 20314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 20324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 20334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 20344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 20354255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, 20364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int start, int end, const xmlChar *blockName) { 20374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret = 0; 20384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 20394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (type) { 20404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_STRING: 20414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SUBREG: 20424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_RANGES: 20434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_EPSILON: 20444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 20454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYCHAR: 20464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((codepoint != '\n') && (codepoint != '\r')); 20474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_CHARVAL: 20494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((codepoint >= start) && (codepoint <= end)); 20504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTSPACE: 20524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard neg = !neg; 20534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYSPACE: 20544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((codepoint == '\n') || (codepoint == '\r') || 20554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (codepoint == '\t') || (codepoint == ' ')); 20564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTINITNAME: 20584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard neg = !neg; 20594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_INITNAME: 2060871611bb0325095b30559ff1edc1fdaa2ad5fd2fWilliam M. Brack ret = (IS_LETTER(codepoint) || 20614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (codepoint == '_') || (codepoint == ':')); 20624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTNAMECHAR: 20644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard neg = !neg; 20654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NAMECHAR: 2066871611bb0325095b30559ff1edc1fdaa2ad5fd2fWilliam M. Brack ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) || 20674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (codepoint == '.') || (codepoint == '-') || 20684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (codepoint == '_') || (codepoint == ':') || 2069871611bb0325095b30559ff1edc1fdaa2ad5fd2fWilliam M. Brack IS_COMBINING(codepoint) || IS_EXTENDER(codepoint)); 20704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTDECIMAL: 20724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard neg = !neg; 20734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_DECIMAL: 20744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatNd(codepoint); 20754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_REALCHAR: 20774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard neg = !neg; 20784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTREALCHAR: 20794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatP(codepoint); 20804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) 20814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatZ(codepoint); 20824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) 20834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatC(codepoint); 20844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER: 20864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatL(codepoint); 20874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_UPPERCASE: 20894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatLu(codepoint); 20904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_LOWERCASE: 20924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatLl(codepoint); 20934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_TITLECASE: 20954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatLt(codepoint); 20964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 20974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_MODIFIER: 20984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatLm(codepoint); 20994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_OTHERS: 21014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatLo(codepoint); 21024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK: 21044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatM(codepoint); 21054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_NONSPACING: 21074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatMn(codepoint); 21084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_SPACECOMBINING: 21104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatMc(codepoint); 21114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_ENCLOSING: 21134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatMe(codepoint); 21144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER: 21164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatN(codepoint); 21174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_DECIMAL: 21194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatNd(codepoint); 21204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_LETTER: 21224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatNl(codepoint); 21234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_OTHERS: 21254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatNo(codepoint); 21264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT: 21284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatP(codepoint); 21294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CONNECTOR: 21314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPc(codepoint); 21324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_DASH: 21344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPd(codepoint); 21354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OPEN: 21374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPs(codepoint); 21384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CLOSE: 21404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPe(codepoint); 21414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_INITQUOTE: 21434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPi(codepoint); 21444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_FINQUOTE: 21464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPf(codepoint); 21474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OTHERS: 21494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatPo(codepoint); 21504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR: 21524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatZ(codepoint); 21534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_SPACE: 21554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatZs(codepoint); 21564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_LINE: 21584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatZl(codepoint); 21594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_PARA: 21614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatZp(codepoint); 21624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL: 21644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatS(codepoint); 21654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MATH: 21674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatSm(codepoint); 21684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_CURRENCY: 21704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatSc(codepoint); 21714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MODIFIER: 21734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatSk(codepoint); 21744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_OTHERS: 21764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatSo(codepoint); 21774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER: 21794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatC(codepoint); 21804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_CONTROL: 21824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatCc(codepoint); 21834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_FORMAT: 21854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatCf(codepoint); 21864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_PRIVATE: 21884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsCatCo(codepoint); 21894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_NA: 21914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* ret = xmlUCSIsCatCn(codepoint); */ 21924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* Seems it doesn't exist anymore in recent Unicode releases */ 21934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 21944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_BLOCK_NAME: 21964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlUCSIsBlock(codepoint, (const char *) blockName); 21974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 21984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 21994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (neg) 22004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(!ret); 22014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 22024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 22034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 22044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 22054255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { 22064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i, ret = 0; 22074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegRangePtr range; 22084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2209871611bb0325095b30559ff1edc1fdaa2ad5fd2fWilliam M. Brack if ((atom == NULL) || (!IS_CHAR(codepoint))) 22104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 22114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 22124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (atom->type) { 22134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SUBREG: 22144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_EPSILON: 22154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 22164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_CHARVAL: 22174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(codepoint == atom->codepoint); 22184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_RANGES: { 22194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int accept = 0; 2220f2a1283564df8ded1942b231c60c3a7ae4ff748aDaniel Veillard 22214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < atom->nbRanges;i++) { 22224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard range = atom->ranges[i]; 2223f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard if (range->neg == 2) { 22244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacterRange(range->type, codepoint, 22254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 0, range->start, range->end, 22264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard range->blockName); 22274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret != 0) 22284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); /* excluded char */ 2229f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard } else if (range->neg) { 2230f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard ret = xmlRegCheckCharacterRange(range->type, codepoint, 2231f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard 0, range->start, range->end, 2232f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard range->blockName); 2233f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard if (ret == 0) 2234f2a1283564df8ded1942b231c60c3a7ae4ff748aDaniel Veillard accept = 1; 2235f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard else 2236f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard return(0); 22374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 22384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacterRange(range->type, codepoint, 22394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 0, range->start, range->end, 22404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard range->blockName); 22414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret != 0) 22424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard accept = 1; /* might still be excluded */ 22434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 22444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 22454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(accept); 22464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 22474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_STRING: 22484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("TODO: XML_REGEXP_STRING\n"); 22494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 22504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYCHAR: 22514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_ANYSPACE: 22524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTSPACE: 22534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_INITNAME: 22544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTINITNAME: 22554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NAMECHAR: 22564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTNAMECHAR: 22574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_DECIMAL: 22584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTDECIMAL: 22594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_REALCHAR: 22604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NOTREALCHAR: 22614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER: 22624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_UPPERCASE: 22634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_LOWERCASE: 22644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_TITLECASE: 22654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_MODIFIER: 22664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_LETTER_OTHERS: 22674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK: 22684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_NONSPACING: 22694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_SPACECOMBINING: 22704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_MARK_ENCLOSING: 22714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER: 22724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_DECIMAL: 22734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_LETTER: 22744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_NUMBER_OTHERS: 22754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT: 22764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CONNECTOR: 22774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_DASH: 22784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OPEN: 22794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_CLOSE: 22804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_INITQUOTE: 22814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_FINQUOTE: 22824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_PUNCT_OTHERS: 22834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR: 22844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_SPACE: 22854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_LINE: 22864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SEPAR_PARA: 22874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL: 22884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MATH: 22894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_CURRENCY: 22904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_MODIFIER: 22914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_SYMBOL_OTHERS: 22924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER: 22934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_CONTROL: 22944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_FORMAT: 22954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_PRIVATE: 22964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_OTHER_NA: 22974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case XML_REGEXP_BLOCK_NAME: 22984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, 22994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (const xmlChar *)atom->valuep); 23004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom->neg) 23014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = !ret; 23024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 23034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 23054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 23064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 23084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 2309ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Saving and restoring state of an execution context * 23104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 23114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 23124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 23144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 23154255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFARegDebugExec(xmlRegExecCtxtPtr exec) { 23164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); 23174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStack != NULL) { 23184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 23194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf(": "); 23204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) 23214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); 23224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 23234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf(": %s", &(exec->inputString[exec->index])); 23244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("\n"); 23264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 23274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 23284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 23304255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFARegExecSave(xmlRegExecCtxtPtr exec) { 23314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 23324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("saving "); 23334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno++; 23344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegDebugExec(exec); 23354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno--; 23364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 23374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->maxRollbacks == 0) { 23394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks = 4; 23404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * 23414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard sizeof(xmlRegExecRollback)); 23424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks == NULL) { 2343ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "saving regexp"); 23444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks = 0; 23454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 23464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(exec->rollbacks, 0, 23484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks * sizeof(xmlRegExecRollback)); 23494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (exec->nbRollbacks >= exec->maxRollbacks) { 23504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecRollback *tmp; 23514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int len = exec->maxRollbacks; 23524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks *= 2; 23544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, 23554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks * sizeof(xmlRegExecRollback)); 23564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 2357ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "saving regexp"); 23584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks /= 2; 23594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 23604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks = tmp; 23624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = &exec->rollbacks[len]; 23634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); 23644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks[exec->nbRollbacks].state = exec->state; 23664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks[exec->nbRollbacks].index = exec->index; 23674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; 23684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->comp->nbCounters > 0) { 23694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 23704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks[exec->nbRollbacks].counts = (int *) 23714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlMalloc(exec->comp->nbCounters * sizeof(int)); 23724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 2373ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "saving regexp"); 23744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -5; 23754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 23764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, 23794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->comp->nbCounters * sizeof(int)); 23804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->nbRollbacks++; 23824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 23834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 23844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 23854255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { 23864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->nbRollbacks <= 0) { 23874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -1; 23884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 23894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("rollback failed on empty stack\n"); 23904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 23914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 23924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 23934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->nbRollbacks--; 23944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = exec->rollbacks[exec->nbRollbacks].state; 23954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index = exec->rollbacks[exec->nbRollbacks].index; 23964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; 23974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->comp->nbCounters > 0) { 23984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { 23994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(stderr, "exec save: allocation failed"); 24004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -6; 24014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 24024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 24034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, 24044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->comp->nbCounters * sizeof(int)); 24054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 24064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 24084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("restored "); 24094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegDebugExec(exec); 24104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 24114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 24124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 24144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 2415ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Verifier, running an input against a compiled regexp * 24164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 24174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 24184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 24204255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { 24214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecCtxt execval; 24224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecCtxtPtr exec = &execval; 24237bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int ret, codepoint = 0, len; 24244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputString = content; 24264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index = 0; 24274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->determinist = 1; 24284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks = 0; 24294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->nbRollbacks = 0; 24304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks = NULL; 24314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = 0; 24324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->comp = comp; 24334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = comp->states[0]; 24344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = 0; 24354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 0; 2436f2a1283564df8ded1942b231c60c3a7ae4ff748aDaniel Veillard exec->inputStack = NULL; 2437f2a1283564df8ded1942b231c60c3a7ae4ff748aDaniel Veillard exec->inputStackMax = 0; 24384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (comp->nbCounters > 0) { 24394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); 2440ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard if (exec->counts == NULL) { 2441ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "running regexp"); 24424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 2443ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard } 24444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(exec->counts, 0, comp->nbCounters * sizeof(int)); 24454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else 24464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts = NULL; 24474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((exec->status == 0) && 24484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((exec->inputString[exec->index] != 0) || 24494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (exec->state->type != XML_REGEXP_FINAL_STATE))) { 24504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegTransPtr trans; 24514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 24524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 24540e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * If end of input on non-terminal state, rollback, however we may 24554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * still have epsilon like transition for counted transitions 24560e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * on counters, in that case don't break too early. Additionally, 24570e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * if we are working on a range like "AB{0,2}", where B is not present, 24580e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * we don't want to break. 24594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 24600e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { 2461ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack /* 2462ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * if there is a transition, we must check if 2463ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * atom allows minOccurs of 0 2464ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack */ 2465ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack if (exec->transno < exec->state->nbTrans) { 24660e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack trans = &exec->state->trans[exec->transno]; 24670e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack if (trans->to >=0) { 24680e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack atom = trans->atom; 24690e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack if (!((atom->min == 0) && (atom->max > 0))) 24700e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack goto rollback; 24710e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack } 24720e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack } else 24730e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack goto rollback; 24740e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack } 24754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 0; 24774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (;exec->transno < exec->state->nbTrans;exec->transno++) { 24784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans = &exec->state->trans[exec->transno]; 24794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->to < 0) 24804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 24814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom = trans->atom; 24824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 24834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->count >= 0) { 24844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int count; 24854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounterPtr counter; 24864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 24884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * A counted transition. 24894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 24904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 24914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard count = exec->counts[trans->count]; 24924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard counter = &exec->comp->counters[trans->count]; 24934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 24944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("testing count %d: val %d, min %d, max %d\n", 24954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans->count, count, counter->min, counter->max); 24964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 24974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((count >= counter->min) && (count <= counter->max)); 24984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (atom == NULL) { 24994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(stderr, "epsilon transition left at runtime\n"); 25004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -2; 25014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 25024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (exec->inputString[exec->index] != 0) { 25034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 25044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacter(atom, codepoint); 25050e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { 25064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr to = comp->states[trans->to]; 25074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 25084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 25094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * this is a multiple input sequence 25104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 25114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 25124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 25134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 1; 25154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard do { 25164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 25174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Try to progress as much as possible on the input 25184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 25194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount == atom->max) { 25204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 25214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index += len; 25234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 25244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * End of input: stop here 25254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 25264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputString[exec->index] == 0) { 25274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index -= len; 25284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 25294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount >= atom->min) { 25314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int transno = exec->transno; 25324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state = exec->state; 25334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 25344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 25354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The transition is acceptable save it 25364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 25374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = -1; /* trick */ 25384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = to; 25394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 25404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = transno; 25414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = state; 25424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 25444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard len); 25454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacter(atom, codepoint); 25464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount++; 25474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } while (ret == 1); 25484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount < atom->min) 25494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 25504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 25514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 25524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * If the last check failed but one transition was found 25534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * possible, rollback 25544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 25554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret < 0) 25564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 25574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) { 25584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto rollback; 25594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25600e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) { 25610e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack /* 25620e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * we don't match on the codepoint, but minOccurs of 0 25630e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * says that's ok. Setting len to 0 inhibits stepping 25640e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack * over the codepoint. 25650e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack */ 25660e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack exec->transcount = 1; 25670e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack len = 0; 25680e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack ret = 1; 25694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25700e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack } else if ((atom->min == 0) && (atom->max > 0)) { 25710e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack /* another spot to match when minOccurs is 0 */ 25720e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack exec->transcount = 1; 25730e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack len = 0; 25740e00b28db65e0ad7f56e22874286682e90ffe9fbWilliam M. Brack ret = 1; 25754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 1) { 25774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 25784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 25794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 25804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->counter >= 0) { 25814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 25824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Increasing count %d\n", trans->counter); 25834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 25844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts[trans->counter]++; 25854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 258610752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard if ((trans->count >= 0) && 258710752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard (trans->count < REGEXP_ALL_COUNTER)) { 258810752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard#ifdef DEBUG_REGEXP_EXEC 258910752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard printf("resetting count %d on transition\n", 259010752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard trans->count); 259110752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard#endif 259210752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard exec->counts[trans->count] = 0; 259310752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard } 25944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 25954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("entering state %d\n", trans->to); 25964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 25974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = comp->states[trans->to]; 25984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = 0; 25994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->atom != NULL) { 26004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index += len; 26014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto progress; 26034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ret < 0) { 26044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -4; 26054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 26064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 26094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardrollback: 26104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 26114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Failed to find a way out 26124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 26134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->determinist = 0; 26144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecRollBack(exec); 26154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardprogress: 26174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 26184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks != NULL) { 26204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->counts != NULL) { 26214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 26224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 26234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < exec->maxRollbacks;i++) 26244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks[i].counts != NULL) 26254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->rollbacks[i].counts); 26264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->rollbacks); 26284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->counts != NULL) 26304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->counts); 26314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status == 0) 26324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 26334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status == -1) 26344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 26354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec->status); 26364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 26374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 26384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 26394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 2640ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Progressive interface to the verifier one atom at a time * 26414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 26424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 26437bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#ifdef DEBUG_ERR 26447bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillardstatic void testerr(xmlRegExecCtxtPtr exec); 26457bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#endif 26464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 26474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 264801c13b5be2d249ef66d86585adee87901bb8efa2Daniel Veillard * xmlRegNewExecCtxt: 26494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @comp: a precompiled regular expression 26504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @callback: a callback function used for handling progresses in the 26514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * automata matching phase 26524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @data: the context data associated to the callback in this context 26534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 26544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Build a context used for progressive evaluation of a regexp. 265501c13b5be2d249ef66d86585adee87901bb8efa2Daniel Veillard * 265601c13b5be2d249ef66d86585adee87901bb8efa2Daniel Veillard * Returns the new context 26574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 26584255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegExecCtxtPtr 26594255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { 26604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegExecCtxtPtr exec; 26614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 26624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (comp == NULL) 26634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 2664a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if ((comp->compact == NULL) && (comp->states == NULL)) 2665a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 26664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); 26674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec == NULL) { 2668ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "creating execution context"); 26694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 26704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard memset(exec, 0, sizeof(xmlRegExecCtxt)); 26724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputString = NULL; 26734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index = 0; 26744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->determinist = 1; 26754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->maxRollbacks = 0; 26764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->nbRollbacks = 0; 26774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->rollbacks = NULL; 26784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = 0; 26794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->comp = comp; 268023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (comp->compact == NULL) 268123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard exec->state = comp->states[0]; 26824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = 0; 26834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 0; 26844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->callback = callback; 26854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->data = data; 26864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (comp->nbCounters > 0) { 26877bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard /* 26887bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * For error handling, exec->counts is allocated twice the size 26897bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * the second half is used to store the data in case of rollback 26907bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard */ 26917bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int) 26927bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * 2); 26934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->counts == NULL) { 2694ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "creating execution context"); 26954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec); 26964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 26974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 26987bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2); 26997bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errCounts = &exec->counts[comp->nbCounters]; 27007bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } else { 27014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts = NULL; 27027bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errCounts = NULL; 27037bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 27044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax = 0; 27054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackNr = 0; 27064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack = NULL; 27077bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errStateNo = -1; 27087bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errString = NULL; 27094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec); 27104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 27114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 27134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeExecCtxt: 27144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @exec: a regular expression evaulation context 27154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 27164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free the structures associated to a regular expression evaulation context. 27174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 27184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardvoid 27194255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { 27204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec == NULL) 27214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 27224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks != NULL) { 27244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->counts != NULL) { 27254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 27264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < exec->maxRollbacks;i++) 27284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->rollbacks[i].counts != NULL) 27294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->rollbacks[i].counts); 27304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->rollbacks); 27324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->counts != NULL) 27344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->counts); 27354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStack != NULL) { 27364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 27374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27383237023d6a8036dff817c3d46485ce6495d8ae21Daniel Veillard for (i = 0;i < exec->inputStackNr;i++) { 27393237023d6a8036dff817c3d46485ce6495d8ae21Daniel Veillard if (exec->inputStack[i].value != NULL) 27403237023d6a8036dff817c3d46485ce6495d8ae21Daniel Veillard xmlFree(exec->inputStack[i].value); 27413237023d6a8036dff817c3d46485ce6495d8ae21Daniel Veillard } 27424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec->inputStack); 27434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27447bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if (exec->errString != NULL) 27457bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlFree(exec->errString); 27464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(exec); 27474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 27484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 27504255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, 27514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *data) { 27524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 27534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("saving value: %d:%s\n", exec->inputStackNr, value); 27544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 27554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStackMax == 0) { 27564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax = 4; 27574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack = (xmlRegInputTokenPtr) 27584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); 27594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStack == NULL) { 2760ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "pushing input string"); 27614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax = 0; 27624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 27634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { 27654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegInputTokenPtr tmp; 27664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 27674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax *= 2; 27684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, 27694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax * sizeof(xmlRegInputToken)); 27704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (tmp == NULL) { 2771ff46a0443e6b999297e52c160b88536b8089ec56Daniel Veillard xmlRegexpErrMemory(NULL, "pushing input string"); 27724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackMax /= 2; 27734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 27744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack = tmp; 27764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 27774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); 27784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack[exec->inputStackNr].data = data; 27794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStackNr++; 27804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack[exec->inputStackNr].value = NULL; 27814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->inputStack[exec->inputStackNr].data = NULL; 27824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 27834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 2784c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard/** 2785c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * xmlRegStrEqualWildcard: 2786c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * @expStr: the string to be evaluated 2787c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * @valStr: the validation string 2788c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * 2789c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * Checks if both strings are equal or have the same content. "*" 2790c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * can be used as a wildcard in @valStr; "|" is used as a seperator of 2791c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * substrings in both @expStr and @valStr. 2792c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * 2793c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * Returns 1 if the comparison is satisfied and the number of substrings 2794c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * is equal, 0 otherwise. 2795c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard */ 2796c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard 2797c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillardstatic int 2798c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel VeillardxmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { 2799c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (expStr == valStr) return(1); 2800c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (expStr == NULL) return(0); 2801c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (valStr == NULL) return(0); 2802c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard do { 2803c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard /* 2804c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard * Eval if we have a wildcard for the current item. 2805c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard */ 2806c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (*expStr != *valStr) { 2807c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) { 2808c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard do { 2809c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (*valStr == XML_REG_STRING_SEPARATOR) 2810c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard break; 2811c0e833f0024d46f50772620f24e6cf9d93748017Kasimier T. Buchcik valStr++; 2812c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard } while (*valStr != 0); 2813c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard continue; 2814c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard } else 2815c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard return(0); 2816c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard } 2817c0e833f0024d46f50772620f24e6cf9d93748017Kasimier T. Buchcik expStr++; 2818c0e833f0024d46f50772620f24e6cf9d93748017Kasimier T. Buchcik valStr++; 2819c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard } while (*valStr != 0); 2820c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (*expStr != 0) 2821c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard return (0); 2822c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard else 2823c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard return (1); 2824c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard} 28254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 28264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 282723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * xmlRegCompactPushString: 282823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * @exec: a regexp execution context 282923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * @comp: the precompiled exec with a compact table 283023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * @value: a string token input 283123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * @data: data associated to the token to reuse in callbacks 283223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 283323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Push one input token in the execution context 283423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 283523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Returns: 1 if the regexp reached a final state, 0 if non-final, and 283623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * a negative value in case of error. 283723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 283823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillardstatic int 283923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel VeillardxmlRegCompactPushString(xmlRegExecCtxtPtr exec, 284023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlRegexpPtr comp, 284123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard const xmlChar *value, 284223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard void *data) { 284323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int state = exec->index; 284423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int i, target; 284523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 284623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) 284723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(-1); 284823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 284923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (value == NULL) { 285023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 285123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * are we at a final state ? 285223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 285323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (comp->compact[state * (comp->nbstrings + 1)] == 285423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard XML_REGEXP_FINAL_STATE) 285523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(1); 285623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(0); 285723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 285823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 285923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_PUSH 286023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("value pushed: %s\n", value); 286123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 286223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 286323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 2864ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Examine all outside transitions from current state 286523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 286623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0;i < comp->nbstrings;i++) { 286723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 286823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if ((target > 0) && (target <= comp->nbstates)) { 2869c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard target--; /* to avoid 0 */ 2870c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) { 2871c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard exec->index = target; 2872118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if ((exec->callback != NULL) && (comp->transdata != NULL)) { 2873118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard exec->callback(exec->data, value, 2874118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard comp->transdata[state * comp->nbstrings + i], data); 2875118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard } 287623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_PUSH 287723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("entering state %d\n", target); 287823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 287923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (comp->compact[target * (comp->nbstrings + 1)] == 2880cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE) 2881cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard goto error; 2882cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard 2883cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if (comp->compact[target * (comp->nbstrings + 1)] == 288423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard XML_REGEXP_FINAL_STATE) 288523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(1); 288623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(0); 288723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 288823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 288923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 289023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* 289123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Failed to find an exit transition out from current state for the 289223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * current token 289323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 289423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#ifdef DEBUG_PUSH 289523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard printf("failed to find a transition for %s on state %d\n", value, state); 289623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard#endif 2897cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillarderror: 28987bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if (exec->errString != NULL) 28997bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlFree(exec->errString); 29007bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errString = xmlStrdup(value); 29017bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard exec->errStateNo = state; 290223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard exec->status = -1; 29037bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#ifdef DEBUG_ERR 29047bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard testerr(exec); 29057bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#endif 290623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(-1); 290723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard} 290823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 290923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard/** 29106e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * xmlRegExecPushStringInternal: 2911ea7751d53bf497e873dca39b2c305e300e2574f9Daniel Veillard * @exec: a regexp execution context or NULL to indicate the end 29124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @value: a string token input 29134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @data: data associated to the token to reuse in callbacks 29146e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * @compound: value was assembled from 2 strings 29154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 29164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Push one input token in the execution context 29174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 29184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns: 1 if the regexp reached a final state, 0 if non-final, and 29194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * a negative value in case of error. 29204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 29216e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillardstatic int 29226e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel VeillardxmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value, 29236e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard void *data, int compound) { 29244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegTransPtr trans; 29254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 29264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret; 29274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int final = 0; 29289070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard int progress = 1; 29294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 29304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec == NULL) 29314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 293223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (exec->comp == NULL) 293323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(-1); 29344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status != 0) 29354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec->status); 29364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 293723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (exec->comp->compact != NULL) 293823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(xmlRegCompactPushString(exec, exec->comp, value, data)); 293923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 29404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (value == NULL) { 29414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->type == XML_REGEXP_FINAL_STATE) 29424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 29434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard final = 1; 29444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 29454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 29464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 29474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("value pushed: %s\n", value); 29484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 29494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 29504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * If we have an active rollback stack push the new value there 29514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * and get back to where we were left 29524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 29534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((value != NULL) && (exec->inputStackNr > 0)) { 29544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSaveInputString(exec, value, data); 29554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = exec->inputStack[exec->index].value; 29564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = exec->inputStack[exec->index].data; 29574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 29584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("value loaded: %s\n", value); 29594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 29604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 29614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 29624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((exec->status == 0) && 29634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((value != NULL) || 29644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((final == 1) && 29654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (exec->state->type != XML_REGEXP_FINAL_STATE)))) { 29664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 29674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 29684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * End of input on non-terminal state, rollback, however we may 29694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * still have epsilon like transition for counted transitions 29704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * on counters, in that case don't break too early. 29714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 2972b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if ((value == NULL) && (exec->counts == NULL)) 29734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto rollback; 29744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 29754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 0; 29764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (;exec->transno < exec->state->nbTrans;exec->transno++) { 29774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans = &exec->state->trans[exec->transno]; 29784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->to < 0) 29794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 29804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom = trans->atom; 29814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 2982441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if (trans->count == REGEXP_ALL_LAX_COUNTER) { 2983441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard int i; 2984441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard int count; 2985441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlRegTransPtr t; 2986441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlRegCounterPtr counter; 2987441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard 2988441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard ret = 0; 2989441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard 2990441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard#ifdef DEBUG_PUSH 2991441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard printf("testing all lax %d\n", trans->count); 2992441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard#endif 2993441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard /* 2994441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * Check all counted transitions from the current state 2995441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard */ 2996441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if ((value == NULL) && (final)) { 2997441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard ret = 1; 2998441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } else if (value != NULL) { 2999441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard for (i = 0;i < exec->state->nbTrans;i++) { 3000441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard t = &exec->state->trans[i]; 3001441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if ((t->counter < 0) || (t == trans)) 3002441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard continue; 3003441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard counter = &exec->comp->counters[t->counter]; 3004441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard count = exec->counts[t->counter]; 3005441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if ((count < counter->max) && 3006441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard (t->atom != NULL) && 3007441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard (xmlStrEqual(value, t->atom->valuep))) { 3008441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard ret = 0; 3009441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard break; 3010441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } 3011441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if ((count >= counter->min) && 3012441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard (count < counter->max) && 3013441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard (xmlStrEqual(value, t->atom->valuep))) { 3014441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard ret = 1; 3015441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard break; 3016441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } 3017441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } 3018441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } 3019441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } else if (trans->count == REGEXP_ALL_COUNTER) { 30208a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard int i; 30218a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard int count; 30228a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard xmlRegTransPtr t; 30238a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard xmlRegCounterPtr counter; 30248a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard 30258a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard ret = 1; 30268a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard 30278a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard#ifdef DEBUG_PUSH 30288a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard printf("testing all %d\n", trans->count); 30298a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard#endif 30308a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard /* 30318a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard * Check all counted transitions from the current state 30328a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard */ 30338a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard for (i = 0;i < exec->state->nbTrans;i++) { 30348a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard t = &exec->state->trans[i]; 30358a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard if ((t->counter < 0) || (t == trans)) 30368a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard continue; 30378a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard counter = &exec->comp->counters[t->counter]; 30388a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard count = exec->counts[t->counter]; 30398a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard if ((count < counter->min) || (count > counter->max)) { 30408a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard ret = 0; 30418a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard break; 30428a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard } 30438a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard } 30448a001f62c195f956c7655df7464ff753b28bc957Daniel Veillard } else if (trans->count >= 0) { 30454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int count; 30464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounterPtr counter; 30474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 30484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 30494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * A counted transition. 30504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 30514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 30524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard count = exec->counts[trans->count]; 30534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard counter = &exec->comp->counters[trans->count]; 30544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 30554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("testing count %d: val %d, min %d, max %d\n", 30564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans->count, count, counter->min, counter->max); 30574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 30584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((count >= counter->min) && (count <= counter->max)); 30594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (atom == NULL) { 30604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(stderr, "epsilon transition left at runtime\n"); 30614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -2; 30624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 30634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (value != NULL) { 3064c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard ret = xmlRegStrEqualWildcard(atom->valuep, value); 30656e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard if (atom->neg) { 30669efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard ret = !ret; 30676e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard if (!compound) 30686e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard ret = 0; 30696e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard } 3070441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if ((ret == 1) && (trans->counter >= 0)) { 3071441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlRegCounterPtr counter; 3072441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard int count; 3073441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard 3074441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard count = exec->counts[trans->counter]; 3075441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard counter = &exec->comp->counters[trans->counter]; 3076441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard if (count >= counter->max) 3077441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard ret = 0; 3078441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard } 3079441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard 30804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 30814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr to = exec->comp->states[trans->to]; 30824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 30834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 30844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * this is a multiple input sequence 30854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 30864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 30874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStackNr <= 0) { 30884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSaveInputString(exec, value, data); 30894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 30904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 30914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 30924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 1; 30934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard do { 30944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 30954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Try to progress as much as possible on the input 30964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 30974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount == atom->max) { 30984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 30994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index++; 31014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = exec->inputStack[exec->index].value; 31024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = exec->inputStack[exec->index].data; 31034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 31044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("value loaded: %s\n", value); 31054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 31064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 31074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 31084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * End of input: stop here 31094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 31104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (value == NULL) { 31114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index --; 31124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 31134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount >= atom->min) { 31154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int transno = exec->transno; 31164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state = exec->state; 31174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 31184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 31194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The transition is acceptable save it 31204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 31214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = -1; /* trick */ 31224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = to; 31234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStackNr <= 0) { 31244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSaveInputString(exec, value, data); 31254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 31274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = transno; 31284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = state; 31294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlStrEqual(value, atom->valuep); 31314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount++; 31324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } while (ret == 1); 31334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount < atom->min) 31344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 31354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 31364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 31374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * If the last check failed but one transition was found 31384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * possible, rollback 31394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 31404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret < 0) 31414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 31424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) { 31434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto rollback; 31444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 1) { 31489887395b556af391306245b52487c215337c4054William M. Brack if ((exec->callback != NULL) && (atom != NULL) && 31499887395b556af391306245b52487c215337c4054William M. Brack (data != NULL)) { 31504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->callback(exec->data, atom->valuep, 31514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->data, data); 31524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 31544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStackNr <= 0) { 31554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSaveInputString(exec, value, data); 31564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 31584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 31594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->counter >= 0) { 31604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 31614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Increasing count %d\n", trans->counter); 31624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 31634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts[trans->counter]++; 31644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 316510752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard if ((trans->count >= 0) && 316610752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard (trans->count < REGEXP_ALL_COUNTER)) { 316710752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard#ifdef DEBUG_REGEXP_EXEC 316810752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard printf("resetting count %d on transition\n", 316910752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard trans->count); 317010752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard#endif 317110752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard exec->counts[trans->count] = 0; 317210752284e3e3401725a2ab49fee1367201eeff6aDaniel Veillard } 31734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 31744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("entering state %d\n", trans->to); 31754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 3176cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((exec->comp->states[trans->to] != NULL) && 3177cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (exec->comp->states[trans->to]->type == 3178cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE)) { 3179cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard /* 3180cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * entering a sink state, save the current state as error 3181cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * state. 3182cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard */ 3183cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if (exec->errString != NULL) 3184cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard xmlFree(exec->errString); 3185cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard exec->errString = xmlStrdup(value); 3186cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard exec->errState = exec->state; 3187cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard memcpy(exec->errCounts, exec->counts, 3188cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard exec->comp->nbCounters * sizeof(int)); 3189cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 31904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = exec->comp->states[trans->to]; 31914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = 0; 31924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->atom != NULL) { 31934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputStack != NULL) { 31944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index++; 31954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->index < exec->inputStackNr) { 31964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = exec->inputStack[exec->index].value; 31974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = exec->inputStack[exec->index].data; 31984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 31994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("value loaded: %s\n", value); 32004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 32014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 32024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = NULL; 32034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = NULL; 32044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 32054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("end of input\n"); 32064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 32074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 32094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = NULL; 32104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = NULL; 32114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 32124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("end of input\n"); 32134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 32144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto progress; 32174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ret < 0) { 32184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -4; 32194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 32204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 32234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardrollback: 32249070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard /* 3225cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * if we didn't yet rollback on the current input 3226cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * store the current state as the error state. 32279070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard */ 3228cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((progress) && (exec->state != NULL) && 3229cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (exec->state->type != XML_REGEXP_SINK_STATE)) { 32309070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard progress = 0; 32319070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard if (exec->errString != NULL) 32329070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard xmlFree(exec->errString); 32339070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard exec->errString = xmlStrdup(value); 32349070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard exec->errState = exec->state; 32359070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard memcpy(exec->errCounts, exec->counts, 32369070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard exec->comp->nbCounters * sizeof(int)); 32379070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard } 32389070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard 32394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 32404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Failed to find a way out 32414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 32424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->determinist = 0; 32434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecRollBack(exec); 32444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status == 0) { 32454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard value = exec->inputStack[exec->index].value; 32464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard data = exec->inputStack[exec->index].data; 32474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_PUSH 32484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("value loaded: %s\n", value); 32494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 32504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32529070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard continue; 32534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardprogress: 32549070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard progress = 1; 32554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 32564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status == 0) { 32584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec->state->type == XML_REGEXP_FINAL_STATE); 32594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 32607bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#ifdef DEBUG_ERR 32619070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard if (exec->status < 0) { 32627bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard testerr(exec); 32637bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 32649070015b406518e7215f04c0c17eb3cac3e9849bDaniel Veillard#endif 32654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec->status); 32664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 32674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 326852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard/** 32696e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * xmlRegExecPushString: 32706e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * @exec: a regexp execution context or NULL to indicate the end 32716e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * @value: a string token input 32726e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * @data: data associated to the token to reuse in callbacks 32736e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * 32746e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * Push one input token in the execution context 32756e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * 32766e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * Returns: 1 if the regexp reached a final state, 0 if non-final, and 32776e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * a negative value in case of error. 32786e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard */ 32796e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillardint 32806e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel VeillardxmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, 32816e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard void *data) { 32826e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard return(xmlRegExecPushStringInternal(exec, value, data, 0)); 32836e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard} 32846e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard 32856e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard/** 328652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * xmlRegExecPushString2: 328752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @exec: a regexp execution context or NULL to indicate the end 328852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @value: the first string token input 328952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @value2: the second string token input 329052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @data: data associated to the token to reuse in callbacks 329152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * 329252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * Push one input token in the execution context 329352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * 329452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * Returns: 1 if the regexp reached a final state, 0 if non-final, and 329552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * a negative value in case of error. 329652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard */ 329752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillardint 329852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel VeillardxmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value, 329952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard const xmlChar *value2, void *data) { 330052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlChar buf[150]; 330152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard int lenn, lenp, ret; 330252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlChar *str; 330352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 330452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (exec == NULL) 330552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(-1); 330652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (exec->comp == NULL) 330752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(-1); 330852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (exec->status != 0) 330952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(exec->status); 331052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 331152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (value2 == NULL) 331252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(xmlRegExecPushString(exec, value, data)); 331352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 331452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard lenn = strlen((char *) value2); 331552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard lenp = strlen((char *) value); 331652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 331752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (150 < lenn + lenp + 2) { 33183c908dca479ed50dca24b8593bca90e40dbde6b8Daniel Veillard str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 331952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (str == NULL) { 332052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard exec->status = -1; 332152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(-1); 332252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } 332352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } else { 332452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard str = buf; 332552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } 332652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard memcpy(&str[0], value, lenp); 3327c0826a7709eddbf10ade02f0ce80e5d077ac05f6Daniel Veillard str[lenp] = XML_REG_STRING_SEPARATOR; 332852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard memcpy(&str[lenp + 1], value2, lenn); 332952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard str[lenn + lenp + 1] = 0; 333052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 333152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (exec->comp->compact != NULL) 333252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard ret = xmlRegCompactPushString(exec, exec->comp, str, data); 333352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard else 33346e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard ret = xmlRegExecPushStringInternal(exec, str, data, 1); 333552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 333652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (str != buf) 333752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlFree(buf); 333852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(ret); 333952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard} 334052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 33417bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard/** 334277005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard * xmlRegExecGetValues: 3343fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @exec: a regexp execution context 3344fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @err: error extraction or normal one 33457bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * @nbval: pointer to the number of accepted values IN/OUT 3346cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * @nbneg: return number of negative transitions 33477bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * @values: pointer to the array of acceptable values 3348fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @terminal: return value if this was a terminal state 33497bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * 3350fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * Extract informations from the regexp execution, internal routine to 3351fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * implement xmlRegExecNextValues() and xmlRegExecErrInfo() 33527bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * 33537bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard * Returns: 0 in case of success or -1 in case of error. 33547bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard */ 3355fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillardstatic int 3356fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel VeillardxmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, 3357cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard int *nbval, int *nbneg, 3358cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard xmlChar **values, int *terminal) { 33597bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int maxval; 3360cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard int nb = 0; 33617bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 3362cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 3363cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (values == NULL) || (*nbval <= 0)) 33647bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard return(-1); 3365fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 33667bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard maxval = *nbval; 33677bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard *nbval = 0; 3368cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard *nbneg = 0; 33697bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { 33707bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlRegexpPtr comp; 33717bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int target, i, state; 33727bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 33737bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard comp = exec->comp; 3374fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 3375fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (err) { 3376fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec->errStateNo == -1) return(-1); 3377fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard state = exec->errStateNo; 3378fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } else { 3379fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard state = exec->index; 3380fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } 3381fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (terminal != NULL) { 3382fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (comp->compact[state * (comp->nbstrings + 1)] == 3383fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard XML_REGEXP_FINAL_STATE) 3384fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *terminal = 1; 3385fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard else 3386fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *terminal = 0; 3387fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } 3388cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 33897bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 3390cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((target > 0) && (target <= comp->nbstates) && 3391cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (comp->compact[(target - 1) * (comp->nbstrings + 1)] != 3392cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE)) { 3393cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard values[nb++] = comp->stringMap[i]; 33947bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard (*nbval)++; 33957bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 33967bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 3397cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { 3398cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; 3399cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((target > 0) && (target <= comp->nbstates) && 3400cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (comp->compact[(target - 1) * (comp->nbstrings + 1)] == 3401cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE)) { 3402cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard values[nb++] = comp->stringMap[i]; 3403cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (*nbneg)++; 3404cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 3405cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 34067bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } else { 34077bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int transno; 34087bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlRegTransPtr trans; 34097bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlRegAtomPtr atom; 3410fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard xmlRegStatePtr state; 3411fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 3412fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (terminal != NULL) { 3413fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec->state->type == XML_REGEXP_FINAL_STATE) 3414fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *terminal = 1; 3415fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard else 3416fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *terminal = 0; 3417fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } 34187bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 3419fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (err) { 3420fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec->errState == NULL) return(-1); 3421fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard state = exec->errState; 3422fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } else { 3423fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec->state == NULL) return(-1); 3424fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard state = exec->state; 3425fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } 34267bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard for (transno = 0; 3427cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (transno < state->nbTrans) && (nb < maxval); 34287bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard transno++) { 3429fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard trans = &state->trans[transno]; 34307bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if (trans->to < 0) 34317bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard continue; 34327bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard atom = trans->atom; 34337bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if ((atom == NULL) || (atom->valuep == NULL)) 34347bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard continue; 34357bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if (trans->count == REGEXP_ALL_LAX_COUNTER) { 3436cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard /* this should not be reached but ... */ 34377bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard TODO; 34387bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } else if (trans->count == REGEXP_ALL_COUNTER) { 3439cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard /* this should not be reached but ... */ 34407bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard TODO; 34417bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } else if (trans->counter >= 0) { 34427bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard xmlRegCounterPtr counter; 34437bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int count; 34447bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 3445fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (err) 3446fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard count = exec->errCounts[trans->counter]; 3447fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard else 3448fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard count = exec->counts[trans->counter]; 34497bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard counter = &exec->comp->counters[trans->counter]; 34507bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard if (count < counter->max) { 345177005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard if (atom->neg) 345277005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep2; 345377005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard else 345477005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep; 34557bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard (*nbval)++; 34567bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 34577bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } else { 3458cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((exec->comp->states[trans->to] != NULL) && 3459cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (exec->comp->states[trans->to]->type != 3460cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE)) { 346177005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard if (atom->neg) 346277005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep2; 346377005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard else 346477005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep; 3465cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (*nbval)++; 3466cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 3467cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 3468cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 3469cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard for (transno = 0; 3470cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (transno < state->nbTrans) && (nb < maxval); 3471cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard transno++) { 3472cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard trans = &state->trans[transno]; 3473cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if (trans->to < 0) 3474cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard continue; 3475cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard atom = trans->atom; 3476cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((atom == NULL) || (atom->valuep == NULL)) 3477cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard continue; 3478cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if (trans->count == REGEXP_ALL_LAX_COUNTER) { 3479cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard continue; 3480cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } else if (trans->count == REGEXP_ALL_COUNTER) { 3481cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard continue; 3482cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } else if (trans->counter >= 0) { 3483cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard continue; 3484cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } else { 3485cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard if ((exec->comp->states[trans->to] != NULL) && 3486cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (exec->comp->states[trans->to]->type == 3487cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard XML_REGEXP_SINK_STATE)) { 348877005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard if (atom->neg) 348977005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep2; 349077005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard else 349177005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard values[nb++] = (xmlChar *) atom->valuep; 3492cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard (*nbneg)++; 3493cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard } 34947bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 34957bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 34967bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard } 34977bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard return(0); 34987bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard} 34997bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 3500fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard/** 3501fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * xmlRegExecNextValues: 3502fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @exec: a regexp execution context 3503fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @nbval: pointer to the number of accepted values IN/OUT 3504cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * @nbneg: return number of negative transitions 3505fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @values: pointer to the array of acceptable values 3506fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @terminal: return value if this was a terminal state 3507fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * 3508fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * Extract informations from the regexp execution, 3509fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * the parameter @values must point to an array of @nbval string pointers 3510fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * on return nbval will contain the number of possible strings in that 3511fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * state and the @values array will be updated with them. The string values 3512fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * returned will be freed with the @exec context and don't need to be 3513fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * deallocated. 3514fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * 3515fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * Returns: 0 in case of success or -1 in case of error. 3516fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard */ 3517fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillardint 3518cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel VeillardxmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg, 3519cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard xmlChar **values, int *terminal) { 3520cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal)); 3521fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard} 3522fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 3523fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard/** 3524fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * xmlRegExecErrInfo: 3525fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @exec: a regexp execution context generating an error 3526fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @string: return value for the error string 3527fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @nbval: pointer to the number of accepted values IN/OUT 3528cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard * @nbneg: return number of negative transitions 3529fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @values: pointer to the array of acceptable values 3530fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @terminal: return value if this was a terminal state 3531fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * 3532fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * Extract error informations from the regexp execution, the parameter 3533fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * @string will be updated with the value pushed and not accepted, 3534fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * the parameter @values must point to an array of @nbval string pointers 3535fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * on return nbval will contain the number of possible strings in that 3536fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * state and the @values array will be updated with them. The string values 3537fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * returned will be freed with the @exec context and don't need to be 3538fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * deallocated. 3539fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * 3540fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard * Returns: 0 in case of success or -1 in case of error. 3541fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard */ 3542fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillardint 3543fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel VeillardxmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string, 3544cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard int *nbval, int *nbneg, xmlChar **values, int *terminal) { 3545fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec == NULL) 3546fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard return(-1); 3547fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (string != NULL) { 3548fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard if (exec->status != 0) 3549fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *string = exec->errString; 3550fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard else 3551fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard *string = NULL; 3552fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard } 3553cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal)); 3554fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard} 3555fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard 35567bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#ifdef DEBUG_ERR 35577bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillardstatic void testerr(xmlRegExecCtxtPtr exec) { 35587bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard const xmlChar *string; 3559cee2b3a5f124e19db46109132c22e1b8faec1c87Daniel Veillard xmlChar *values[5]; 35607bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard int nb = 5; 3561cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard int nbneg; 3562fc0b6f6adac16ea1bf2ca3bfe935e67d9e9fb974Daniel Veillard int terminal; 3563cc026dc6b069f38e8295bd4115e0620f4ede32adDaniel Veillard xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal); 35647bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard} 35657bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard#endif 35667bd8b4b817db9f3bda399acdb9e5d9919d257e89Daniel Veillard 35674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#if 0 35684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 35694255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { 35704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegTransPtr trans; 35714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 35724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret; 35734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int codepoint, len; 35744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 35754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec == NULL) 35764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 35774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->status != 0) 35784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(exec->status); 35794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 35804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((exec->status == 0) && 35814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((exec->inputString[exec->index] != 0) || 35824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (exec->state->type != XML_REGEXP_FINAL_STATE))) { 35834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 35844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 35854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * End of input on non-terminal state, rollback, however we may 35864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * still have epsilon like transition for counted transitions 35874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * on counters, in that case don't break too early. 35884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 35894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) 35904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto rollback; 35914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 35924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 0; 35934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (;exec->transno < exec->state->nbTrans;exec->transno++) { 35944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans = &exec->state->trans[exec->transno]; 35954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->to < 0) 35964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 35974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom = trans->atom; 35984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 35994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->count >= 0) { 36004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int count; 36014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegCounterPtr counter; 36024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * A counted transition. 36054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard count = exec->counts[trans->count]; 36084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard counter = &exec->comp->counters[trans->count]; 36094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 36104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("testing count %d: val %d, min %d, max %d\n", 36114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard trans->count, count, counter->min, counter->max); 36124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 36134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ((count >= counter->min) && (count <= counter->max)); 36144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (atom == NULL) { 36154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(stderr, "epsilon transition left at runtime\n"); 36164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -2; 36174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 36184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (exec->inputString[exec->index] != 0) { 36194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); 36204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacter(atom, codepoint); 36214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { 36224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr to = exec->comp->states[trans->to]; 36234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * this is a multiple input sequence 36264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 36284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 36294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount = 1; 36314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard do { 36324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Try to progress as much as possible on the input 36344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount == atom->max) { 36364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 36374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index += len; 36394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * End of input: stop here 36414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->inputString[exec->index] == 0) { 36434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index -= len; 36444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 36454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount >= atom->min) { 36474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int transno = exec->transno; 36484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr state = exec->state; 36494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The transition is acceptable save it 36524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = -1; /* trick */ 36544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = to; 36554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 36564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = transno; 36574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = state; 36584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), 36604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard len); 36614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegCheckCharacter(atom, codepoint); 36624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transcount++; 36634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } while (ret == 1); 36644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->transcount < atom->min) 36654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 36664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 36674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 36684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * If the last check failed but one transition was found 36694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * possible, rollback 36704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 36714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret < 0) 36724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = 0; 36734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) { 36744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto rollback; 36754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 1) { 36794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (exec->state->nbTrans > exec->transno + 1) { 36804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecSave(exec); 36814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->counter >= 0) { 36834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 36844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("Increasing count %d\n", trans->counter); 36854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 36864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->counts[trans->counter]++; 36874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef DEBUG_REGEXP_EXEC 36894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard printf("entering state %d\n", trans->to); 36904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 36914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->state = exec->comp->states[trans->to]; 36924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->transno = 0; 36934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (trans->atom != NULL) { 36944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->index += len; 36954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 36964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard goto progress; 36974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ret < 0) { 36984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->status = -4; 36994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 37004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 37014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 37024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { 37034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardrollback: 37044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* 37054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Failed to find a way out 37064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 37074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard exec->determinist = 0; 37084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFARegExecRollBack(exec); 37094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 37104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardprogress: 37114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard continue; 37124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 37134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 37144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif 37154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 37164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 3717ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Parser for the Schemas Datatype Regular Expressions * 37184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * 37194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 37204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 37214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 37224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 37234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAIsChar: 3724441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 37254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 37264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [10] Char ::= [^.\?*+()|#x5B#x5D] 37274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 37284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 37294255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAIsChar(xmlRegParserCtxtPtr ctxt) { 37304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int cur; 37314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int len; 37324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 37334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR_SCHAR(ctxt->cur, len); 37344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((cur == '.') || (cur == '\\') || (cur == '?') || 37354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == '*') || (cur == '+') || (cur == '(') || 37364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == ')') || (cur == '|') || (cur == 0x5B) || 37374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 0x5D) || (cur == 0)) 37384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 37394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(cur); 37404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 37414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 37424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 37434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharProp: 3744441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 37454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 37464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [27] charProp ::= IsCategory | IsBlock 37474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | 37484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Separators | Symbols | Others 37494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [29] Letters ::= 'L' [ultmo]? 37504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [30] Marks ::= 'M' [nce]? 37514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [31] Numbers ::= 'N' [dlo]? 37524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [32] Punctuation ::= 'P' [cdseifo]? 37534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [33] Separators ::= 'Z' [slp]? 37544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [34] Symbols ::= 'S' [mcko]? 37554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [35] Others ::= 'C' [cfon]? 37564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ 37574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 37584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 37594255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { 37604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int cur; 3761779af00750fa86045e94422287d67a2cf5723f65William M. Brack xmlRegAtomType type = (xmlRegAtomType) 0; 37624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlChar *blockName = NULL; 37634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 37644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 37654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'L') { 37664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 37684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'u') { 37694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER_UPPERCASE; 37714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'l') { 37724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER_LOWERCASE; 37744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 't') { 37754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER_TITLECASE; 37774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'm') { 37784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER_MODIFIER; 37804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'o') { 37814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER_OTHERS; 37834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 37844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_LETTER; 37854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 37864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'M') { 37874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 37894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'n') { 37904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* nonspacing */ 37924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_MARK_NONSPACING; 37934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'c') { 37944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* spacing combining */ 37964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_MARK_SPACECOMBINING; 37974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'e') { 37984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 37994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* enclosing */ 38004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_MARK_ENCLOSING; 38014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 38024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all marks */ 38034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_MARK; 38044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 38054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'N') { 38064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 38084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'd') { 38094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* digital */ 38114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NUMBER_DECIMAL; 38124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'l') { 38134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* letter */ 38154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NUMBER_LETTER; 38164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'o') { 38174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* other */ 38194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NUMBER_OTHERS; 38204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 38214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all numbers */ 38224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NUMBER; 38234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 38244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'P') { 38254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 38274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'c') { 38284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* connector */ 38304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_CONNECTOR; 38314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'd') { 38324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* dash */ 38344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_DASH; 38354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 's') { 38364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* open */ 38384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_OPEN; 38394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'e') { 38404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* close */ 38424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_CLOSE; 38434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'i') { 38444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* initial quote */ 38464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_INITQUOTE; 38474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'f') { 38484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* final quote */ 38504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_FINQUOTE; 38514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'o') { 38524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* other */ 38544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT_OTHERS; 38554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 38564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all punctuation */ 38574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_PUNCT; 38584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 38594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'Z') { 38604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 38624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 's') { 38634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* space */ 38654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SEPAR_SPACE; 38664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'l') { 38674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* line */ 38694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SEPAR_LINE; 38704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'p') { 38714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* paragraph */ 38734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SEPAR_PARA; 38744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 38754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all separators */ 38764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SEPAR; 38774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 38784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'S') { 38794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 38814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'm') { 38824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SYMBOL_MATH; 38844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* math */ 38854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'c') { 38864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SYMBOL_CURRENCY; 38884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* currency */ 38894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'k') { 38904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SYMBOL_MODIFIER; 38924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* modifiers */ 38934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'o') { 38944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 38954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SYMBOL_OTHERS; 38964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* other */ 38974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 38984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all symbols */ 38994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_SYMBOL; 39004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'C') { 39024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'c') { 39054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* control */ 39074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_OTHER_CONTROL; 39084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'f') { 39094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* format */ 39114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_OTHER_FORMAT; 39124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'o') { 39134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* private use */ 39154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_OTHER_PRIVATE; 39164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'n') { 39174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* not assigned */ 39194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_OTHER_NA; 39204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 39214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* all others */ 39224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_OTHER; 39234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'I') { 39254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard const xmlChar *start; 39264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur != 's') { 39294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("IsXXXX expected"); 39304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 39314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard start = ctxt->cur; 39344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (((cur >= 'a') && (cur <= 'z')) || 39364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= 'A') && (cur <= 'Z')) || 39374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= '0') && (cur <= '9')) || 39384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 0x2D)) { 39394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while (((cur >= 'a') && (cur <= 'z')) || 39424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= 'A') && (cur <= 'Z')) || 39434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= '0') && (cur <= '9')) || 39444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 0x2D)) { 39454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_BLOCK_NAME; 39504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard blockName = xmlStrndup(start, ctxt->cur - start); 39514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 39524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Unknown char property"); 39534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 39544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) { 39564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, type); 39574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom != NULL) 39584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->valuep = blockName; 39594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 39604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 39614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type, 0, 0, blockName); 39624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 39644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 39654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 39664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharClassEsc: 3967441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 39684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 39694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 39704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] 39714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [25] catEsc ::= '\p{' charProp '}' 39724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [26] complEsc ::= '\P{' charProp '}' 39734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) 39744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 39754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 39764255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { 39774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int cur; 39784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 39794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == '.') { 39804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) { 39814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); 39824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 39834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 39844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_ANYCHAR, 0, 0, NULL); 39854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 39884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '\\') { 39904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Escaped sequence: expecting \\"); 39914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 39924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 39934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 39954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'p') { 39964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 39974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '{') { 39984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting '{'"); 39994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 40004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharProp(ctxt); 40034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '}') { 40044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting '}'"); 40054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 40064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (cur == 'P') { 40094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '{') { 40114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting '{'"); 40124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 40134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharProp(ctxt); 40164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->neg = 1; 40174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '}') { 40184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting '}'"); 40194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 40204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || 40234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || 40244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || 40254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || 40264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 0x5E)) { 40274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) { 40284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 402999c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard if (ctxt->atom != NULL) { 403099c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard switch (cur) { 403199c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard case 'n': 403299c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard ctxt->atom->codepoint = '\n'; 403399c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard break; 403499c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard case 'r': 403599c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard ctxt->atom->codepoint = '\r'; 403699c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard break; 403799c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard case 't': 403899c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard ctxt->atom->codepoint = '\t'; 403999c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard break; 404099c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard default: 404199c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard ctxt->atom->codepoint = cur; 404299c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard } 404399c394d9c50efdaca1d4c437cf22c9c160ed9c65Daniel Veillard } 40444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 40454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 40464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_CHARVAL, cur, cur, NULL); 40474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || 40504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || 40514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (cur == 'w') || (cur == 'W')) { 4052b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlRegAtomType type = XML_REGEXP_ANYSPACE; 40534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 40544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (cur) { 40554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 's': 40564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_ANYSPACE; 40574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'S': 40594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NOTSPACE; 40604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'i': 40624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_INITNAME; 40634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'I': 40654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NOTINITNAME; 40664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'c': 40684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NAMECHAR; 40694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'C': 40714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NOTNAMECHAR; 40724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'd': 40744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_DECIMAL; 40754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'D': 40774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NOTDECIMAL; 40784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'w': 40804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_REALCHAR; 40814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'W': 40834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type = XML_REGEXP_NOTREALCHAR; 40844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 40854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 40874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) { 40884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, type); 40894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (ctxt->atom->type == XML_REGEXP_RANGES) { 40904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 40914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard type, 0, 0, NULL); 40924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 40944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 40954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 40964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 40974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharRef: 4098441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 40994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 41004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' ) 41014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 41024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 41034255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) { 41044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret = 0, cur; 41054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 41064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((CUR != '&') || (NXT(1) != '#')) 41074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 41084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == 'x') { 41124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (((cur >= '0') && (cur <= '9')) || 41154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= 'a') && (cur <= 'f')) || 41164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= 'A') && (cur <= 'F'))) { 41174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while (((cur >= '0') && (cur <= '9')) || 41184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ((cur >= 'A') && (cur <= 'F'))) { 41194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((cur >= '0') && (cur <= '9')) 41204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ret * 16 + cur - '0'; 41214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if ((cur >= 'a') && (cur <= 'f')) 41224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ret * 16 + 10 + (cur - 'a'); 41234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else 41244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ret * 16 + 10 + (cur - 'A'); 41254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 41294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Char ref: expecting [0-9A-F]"); 41304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 41314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 41334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((cur >= '0') && (cur <= '9')) { 41344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((cur >= '0') && (cur <= '9')) { 41354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ret * 10 + cur - '0'; 41364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 41404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Char ref: expecting [0-9]"); 41414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 41424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur != ';') { 41454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Char ref: expecting ';'"); 41464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 41474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 41484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 41514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 41524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 41534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 41544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharRange: 4155441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 41564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 41574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash 41584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [18] seRange ::= charOrEsc '-' charOrEsc 41594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [20] charOrEsc ::= XmlChar | SingleCharEsc 41604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [21] XmlChar ::= [^\#x2D#x5B#x5D] 41614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [22] XmlCharIncDash ::= [^\#x5B#x5D] 41624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 41634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 41644255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { 4165dc99df936c74b6ced82904086544fec365d1f219William M. Brack int cur, len; 41664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int start = -1; 41674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int end = -1; 41684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 41694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((CUR == '&') && (NXT(1) == '#')) { 41704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard end = start = xmlFAParseCharRef(ctxt); 41714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 41724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_CHARVAL, start, end, NULL); 41734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 41744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == '\\') { 41774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 41784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 41794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (cur) { 41804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'n': start = 0xA; break; 41814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'r': start = 0xD; break; 41824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 't': start = 0x9; break; 41834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '\\': case '|': case '.': case '-': case '^': case '?': 41844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '*': case '+': case '{': case '}': case '(': case ')': 41854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '[': case ']': 41864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard start = cur; break; 41874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard default: 41884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Invalid escape value"); 41894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 41904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 41914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard end = start; 4192dc99df936c74b6ced82904086544fec365d1f219William M. Brack len = 1; 41934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if ((cur != 0x5B) && (cur != 0x5D)) { 4194dc99df936c74b6ced82904086544fec365d1f219William M. Brack end = start = CUR_SCHAR(ctxt->cur, len); 41954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 41964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting a char range"); 41974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 41984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 4199dc99df936c74b6ced82904086544fec365d1f219William M. Brack NEXTL(len); 42004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (start == '-') { 42014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 42024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 42034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 420410f1ef4ce875d6affb467f33ab653cd8072e5888William M. Brack if ((cur != '-') || (NXT(1) == ']')) { 42054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 42064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_CHARVAL, start, end, NULL); 42074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 42084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 42094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 42104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 42114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == '\\') { 42124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 42134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 42144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard switch (cur) { 42154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'n': end = 0xA; break; 42164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 'r': end = 0xD; break; 42174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case 't': end = 0x9; break; 42184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '\\': case '|': case '.': case '-': case '^': case '?': 42194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '*': case '+': case '{': case '}': case '(': case ')': 42204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard case '[': case ']': 42214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard end = cur; break; 42224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard default: 42234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Invalid escape value"); 42244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 42254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 4226dc99df936c74b6ced82904086544fec365d1f219William M. Brack len = 1; 42274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if ((cur != 0x5B) && (cur != 0x5D)) { 4228dc99df936c74b6ced82904086544fec365d1f219William M. Brack end = CUR_SCHAR(ctxt->cur, len); 42294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 42304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Expecting the end of a char range"); 42314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 42324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 4233dc99df936c74b6ced82904086544fec365d1f219William M. Brack NEXTL(len); 42344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* TODO check that the values are acceptable character ranges for XML */ 42354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (end < start) { 42364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("End of range is before start of range"); 42374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 42384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, 42394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard XML_REGEXP_CHARVAL, start, end, NULL); 42404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 42414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 42424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 42434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 42444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 42454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParsePosCharGroup: 4246441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 42474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 42484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [14] posCharGroup ::= ( charRange | charClassEsc )+ 42494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 42504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 42514255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { 42524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard do { 42534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((CUR == '\\') || (CUR == '.')) { 42544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharClassEsc(ctxt); 42554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 42564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharRange(ctxt); 42574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 42584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && 42594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard (ctxt->error == 0)); 42604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 42614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 42624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 42634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharGroup: 4264441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 42654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 42664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub 42674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [15] negCharGroup ::= '^' posCharGroup 42684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr 42694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [12] charClassExpr ::= '[' charGroup ']' 42704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 42714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 42724255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { 42734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int n = ctxt->neg; 42744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((CUR != ']') && (ctxt->error == 0)) { 42754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == '^') { 42764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int neg = ctxt->neg; 42774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 42784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 42794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->neg = !ctxt->neg; 42804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParsePosCharGroup(ctxt); 42814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->neg = neg; 428210f1ef4ce875d6affb467f33ab653cd8072e5888William M. Brack } else if ((CUR == '-') && (NXT(1) == '[')) { 4283f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard int neg = ctxt->neg; 4284f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard ctxt->neg = 2; 428510f1ef4ce875d6affb467f33ab653cd8072e5888William M. Brack NEXT; /* eat the '-' */ 428610f1ef4ce875d6affb467f33ab653cd8072e5888William M. Brack NEXT; /* eat the '[' */ 42874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharGroup(ctxt); 42884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == ']') { 42894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 42904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 42914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("charClassExpr: ']' expected"); 42924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 42934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 4294f8b9de32543a23c932a392362d4e4a6704c2c953Daniel Veillard ctxt->neg = neg; 42954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard break; 42964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (CUR != ']') { 42974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParsePosCharGroup(ctxt); 42984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 42994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->neg = n; 43014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 43024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 43044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseCharClass: 4305441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 43064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 43074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [11] charClass ::= charClassEsc | charClassExpr 43084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [12] charClassExpr ::= '[' charGroup ']' 43094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 43104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 43114255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { 43124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == '[') { 43134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 43144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); 43154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) 43164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 43174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharGroup(ctxt); 43184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == ']') { 43194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 43204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 43214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("xmlFAParseCharClass: ']' expected"); 43224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 43244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharClassEsc(ctxt); 43254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 43274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 43294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseQuantExact: 4330441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 43314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 43324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [8] QuantExact ::= [0-9]+ 4333a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard * 4334a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard * Returns 0 if success or -1 in case of error 43354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 43364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 43374255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { 43384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret = 0; 43394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ok = 0; 43404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((CUR >= '0') && (CUR <= '9')) { 43424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = ret * 10 + (CUR - '0'); 43434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ok = 1; 43444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 43454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ok != 1) { 43474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 43484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 43504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 43514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 43534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseQuantifier: 4354441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 43554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 43564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) 43574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [5] quantity ::= quantRange | quantMin | QuantExact 43584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [6] quantRange ::= QuantExact ',' QuantExact 43594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [7] quantMin ::= QuantExact ',' 43604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [8] QuantExact ::= [0-9]+ 43614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 43624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 43634255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { 43644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int cur; 43654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = CUR; 43674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((cur == '?') || (cur == '*') || (cur == '+')) { 43684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom != NULL) { 43694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == '?') 43704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->quant = XML_REGEXP_QUANT_OPT; 43714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if (cur == '*') 43724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->quant = XML_REGEXP_QUANT_MULT; 43734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else if (cur == '+') 43744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; 43754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 43774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 43784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur == '{') { 43804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int min = 0, max = 0; 43814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 43824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 43834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard cur = xmlFAParseQuantExact(ctxt); 43844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (cur >= 0) 43854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard min = cur; 43864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == ',') { 43874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 4388ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard if (CUR == '}') 4389ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard max = INT_MAX; 4390ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard else { 4391ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard cur = xmlFAParseQuantExact(ctxt); 4392ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard if (cur >= 0) 4393ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard max = cur; 4394ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard else { 4395ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard ERROR("Improper quantifier"); 4396ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard } 4397ebe48c60cc941881718222e69fabf604fe23e43dDaniel Veillard } 43984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 43994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == '}') { 44004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 44014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 44024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("Unterminated quantifier"); 44034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (max == 0) 44054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard max = min; 44064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom != NULL) { 44074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; 44084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->min = min; 44094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->max = max; 44104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 44124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 44154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 44174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseAtom: 4418441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 44194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 44204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [9] atom ::= Char | charClass | ( '(' regExp ')' ) 44214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 44224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 44234255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { 44244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int codepoint, len; 44254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = xmlFAIsChar(ctxt); 44274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (codepoint > 0) { 44284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); 44294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) 44304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 44314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard codepoint = CUR_SCHAR(ctxt->cur, len); 44324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->codepoint = codepoint; 44334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXTL(len); 44344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 44354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (CUR == '|') { 44364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (CUR == 0) { 44384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (CUR == ')') { 44404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if (CUR == '(') { 44424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr start, oldend; 44434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 44454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); 44464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard start = ctxt->state; 44474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard oldend = ctxt->end; 44484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = NULL; 44494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = NULL; 44504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseRegExp(ctxt, 0); 44514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR == ')') { 44524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 44534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else { 44544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("xmlFAParseAtom: expecting ')'"); 44554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); 44574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) 44584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 44594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->start = start; 44604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom->stop = ctxt->state; 44614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = oldend; 44624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 44634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { 44644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseCharClass(ctxt); 44654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 44664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 44694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 44714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParsePiece: 4472441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 44734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 44744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [3] piece ::= atom quantifier? 44754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 44764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic int 44774255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { 44784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret; 44794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = NULL; 44814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlFAParseAtom(ctxt); 44824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret == 0) 44834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 44844255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->atom == NULL) { 44854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("internal: no atom generated"); 44864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 44874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseQuantifier(ctxt); 44884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(1); 44894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 44904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 44914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 44924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseBranch: 4493441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 44944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 44954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [2] branch ::= piece* 4496a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard 8 44974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 4498a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillardstatic int 44992cbf596c7f6d4ef21798bb06efd7542781124300Daniel VeillardxmlFAParseBranch(xmlRegParserCtxtPtr ctxt) { 45004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePtr previous; 45014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int ret; 45024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard previous = ctxt->state; 45044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlFAParsePiece(ctxt); 45054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret != 0) { 45062cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0) 45072cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard return(-1); 45082cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard previous = ctxt->state; 45094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = NULL; 45104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 45114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((ret != 0) && (ctxt->error == 0)) { 45124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlFAParsePiece(ctxt); 45134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ret != 0) { 45142cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard if (xmlFAGenerateTransitions(ctxt, previous, NULL, 45152cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->atom) < 0) 4516a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(-1); 45174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard previous = ctxt->state; 45184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->atom = NULL; 45194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 45204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 4521a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(0); 45224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 45234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 45254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFAParseRegExp: 4526441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard * @ctxt: a regexp parser context 4527ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * @top: is this the top-level expression ? 45284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 45294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * [1] regExp ::= branch ( '|' branch )* 45304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 45314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardstatic void 45324255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { 4533c7e3cc49bade82dba0cda4ae7c07ffcd1e32fe25Daniel Veillard xmlRegStatePtr start, end; 45344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45352cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard /* if not top start should have been generated by an epsilon trans */ 45364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard start = ctxt->state; 45372cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->end = NULL; 45382cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard xmlFAParseBranch(ctxt); 45392cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard if (top) { 45402cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 45412cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("State %d is final\n", ctxt->state->no); 45422cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard#endif 45432cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->state->type = XML_REGEXP_FINAL_STATE; 45442cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard } 45454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != '|') { 45464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = ctxt->state; 45474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 45484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 45494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard end = ctxt->state; 45504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard while ((CUR == '|') && (ctxt->error == 0)) { 45514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard NEXT; 45524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->state = start; 45532cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->end = NULL; 45542cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard xmlFAParseBranch(ctxt); 45552cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard if (top) { 45562cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->state->type = XML_REGEXP_FINAL_STATE; 45572cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard#ifdef DEBUG_REGEXP_GRAPH 45582cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard printf("State %d is final\n", ctxt->state->no); 45592cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard#endif 45602cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard } else { 45612cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end); 45622cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard } 45632cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard } 45642cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard if (!top) { 45652cbf596c7f6d4ef21798bb06efd7542781124300Daniel Veillard ctxt->state = end; 45664255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = end; 45674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 45684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 45694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 45714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 45724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The basic API * 45734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 45744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 45754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 45764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 45774255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegexpPrint: 45784255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @output: the file for the output debug 45794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @regexp: the compiled regexp 45804255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 45814255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Print the content of the compiled regular expression 45824255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 45834255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardvoid 45844255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { 45854255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 45864255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 4587a82b182655ccee95e3b7210066206ddb3918823fDaniel Veillard if (output == NULL) 4588a82b182655ccee95e3b7210066206ddb3918823fDaniel Veillard return; 45894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " regexp: "); 45904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp == NULL) { 45914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "NULL\n"); 45924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 45934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 45944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "'%s' ", regexp->string); 45954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "\n"); 45964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d atoms:\n", regexp->nbAtoms); 45974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < regexp->nbAtoms; i++) { 45984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " %02d ", i); 45994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintAtom(output, regexp->atoms[i]); 46004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 46014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d states:", regexp->nbStates); 46024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "\n"); 46034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < regexp->nbStates; i++) { 46044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegPrintState(output, regexp->states[i]); 46054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 46064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, "%d counters:\n", regexp->nbCounters); 46074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < regexp->nbCounters; i++) { 46084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, 46094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard regexp->counters[i].max); 46104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 46114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 46124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 46144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegexpCompile: 46154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @regexp: a regular expression string 46164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 46174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Parses a regular expression conforming to XML Schemas Part 2 Datatype 4618ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Appendix F and builds an automata suitable for testing strings against 46194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * that regular expression 46204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 46214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the compiled expression or NULL in case of error 46224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 46234255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegexpPtr 46244255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegexpCompile(const xmlChar *regexp) { 46254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegexpPtr ret; 46264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegParserCtxtPtr ctxt; 46274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt = xmlRegNewParserCtxt(regexp); 46294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt == NULL) 46304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 46314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* initialize the parser */ 46334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = NULL; 46344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->start = ctxt->state = xmlRegNewState(ctxt); 46354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(ctxt, ctxt->start); 46364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* parse the expression building an automata */ 46384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAParseRegExp(ctxt, 1); 46394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (CUR != 0) { 46404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ERROR("xmlFAParseRegExp: extra characters"); 46414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 46424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = ctxt->state; 46434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->start->type = XML_REGEXP_START_STATE; 46444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end->type = XML_REGEXP_FINAL_STATE; 46454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* remove the Epsilon except for counted transitions */ 46474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAEliminateEpsilonTransitions(ctxt); 46484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt->error != 0) { 46514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeParserCtxt(ctxt); 46524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 46534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 46544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegEpxFromParse(ctxt); 46554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeParserCtxt(ctxt); 46564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 46574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 46584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 46604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegexpExec: 46614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @comp: the compiled regular expression 46624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @content: the value to check against the regular expression 46634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 4664ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Check if the regular expression generates the value 46654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 4666ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Returns 1 if it matches, 0 if not and a negative value in case of error 46674255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 46684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardint 46694255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { 46704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((comp == NULL) || (content == NULL)) 46714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 46724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(xmlFARegExec(comp, content)); 46734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 46744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 46754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 467623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * xmlRegexpIsDeterminist: 467723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * @comp: the compiled regular expression 467823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 467923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * Check if the regular expression is determinist 468023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard * 4681ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * Returns 1 if it yes, 0 if not and a negative value in case of error 468223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard */ 468323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillardint 468423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel VeillardxmlRegexpIsDeterminist(xmlRegexpPtr comp) { 468523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlAutomataPtr am; 468623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard int ret; 468723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 468823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (comp == NULL) 468923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(-1); 469023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (comp->determinist != -1) 469123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(comp->determinist); 469223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 469323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am = xmlNewAutomata(); 4694bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard if (am->states != NULL) { 4695bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard int i; 4696bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard 4697bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard for (i = 0;i < am->nbStates;i++) 4698bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard xmlRegFreeState(am->states[i]); 4699bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard xmlFree(am->states); 4700bd9afb529069415baf1f32d907f035de19dae788Daniel Veillard } 470123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->nbAtoms = comp->nbAtoms; 470223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->atoms = comp->atoms; 470323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->nbStates = comp->nbStates; 470423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->states = comp->states; 470523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->determinist = -1; 470623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard ret = xmlFAComputesDeterminism(am); 470723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->atoms = NULL; 470823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard am->states = NULL; 470923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFreeAutomata(am); 471023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard return(ret); 471123e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard} 471223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 471323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard/** 47144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlRegFreeRegexp: 47154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @regexp: the regexp 47164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 47174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free a regexp 47184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 47194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardvoid 47204255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegFreeRegexp(xmlRegexpPtr regexp) { 47214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int i; 47224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp == NULL) 47234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 47244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp->string != NULL) 47264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(regexp->string); 47274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp->states != NULL) { 47284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < regexp->nbStates;i++) 47294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeState(regexp->states[i]); 47304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(regexp->states); 47314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 47324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp->atoms != NULL) { 47334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard for (i = 0;i < regexp->nbAtoms;i++) 47344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeAtom(regexp->atoms[i]); 47354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(regexp->atoms); 47364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard } 47374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (regexp->counters != NULL) 47384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(regexp->counters); 473923e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (regexp->compact != NULL) 474023e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(regexp->compact); 4741118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard if (regexp->transdata != NULL) 4742118aed78f360f51d182770e62b251ef324707aa2Daniel Veillard xmlFree(regexp->transdata); 474323e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard if (regexp->stringMap != NULL) { 474423e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard for (i = 0; i < regexp->nbstrings;i++) 474523e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(regexp->stringMap[i]); 474623e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard xmlFree(regexp->stringMap); 474723e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard } 474823e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard 47494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFree(regexp); 47504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 47514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#ifdef LIBXML_AUTOMATA_ENABLED 47534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/************************************************************************ 47544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 47554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The Automata interface * 47564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * * 47574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ************************************************************************/ 47584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47594255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 47604255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlNewAutomata: 47614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 47624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Create a new automata 47634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 47644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new object or NULL in case of failure 47654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 47664255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataPtr 47674255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlNewAutomata(void) { 47684255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlAutomataPtr ctxt; 47694255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt = xmlRegNewParserCtxt(NULL); 47714255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (ctxt == NULL) 47724255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 47734255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47744255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard /* initialize the parser */ 47754255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->end = NULL; 47764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ctxt->start = ctxt->state = xmlRegNewState(ctxt); 4777db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard ctxt->start->type = XML_REGEXP_START_STATE; 4778a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (ctxt->start == NULL) { 4779a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFreeAutomata(ctxt); 4780a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 4781a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 4782a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (xmlRegStatePush(ctxt, ctxt->start) < 0) { 4783a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlRegFreeState(ctxt->start); 4784a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlFreeAutomata(ctxt); 4785a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 4786a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 47874255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ctxt); 47894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 47904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 47914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 47924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlFreeAutomata: 47934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 47944255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 47954255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Free an automata 47964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 47974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardvoid 47984255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlFreeAutomata(xmlAutomataPtr am) { 47994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (am == NULL) 48004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return; 48014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegFreeParserCtxt(am); 48024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 48034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 48044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 48054255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataGetInitState: 48064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 48074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 4808a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * Initial state lookup 4809a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * 48104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the initial state of the automata 48114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 48124255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataStatePtr 48134255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataGetInitState(xmlAutomataPtr am) { 48144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (am == NULL) 48154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 48164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(am->start); 48174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 48184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 48194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 48204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataSetFinalState: 48214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 48224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @state: a state in this automata 48234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 48244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Makes that state a final state 48254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 48264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns 0 or -1 in case of error 48274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 48284255d504151db75c17f85192ce74f45dd2d65533Daniel Veillardint 48294255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { 48304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((am == NULL) || (state == NULL)) 48314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(-1); 48324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard state->type = XML_REGEXP_FINAL_STATE; 48334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(0); 48344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 48354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 48364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 48374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataNewTransition: 48384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 48394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @from: the starting point of the transition 48404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @to: the target point of the transition or NULL 48414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @token: the input string associated to that transition 48424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @data: data passed to the callback function if the transition is activated 48434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 4844ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 48454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * and then adds a transition from the @from state to the target state 48464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * activated by the value of @token 48474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 48484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the target state or NULL in case of error 48494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 48504255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataStatePtr 48514255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, 48524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlAutomataStatePtr to, const xmlChar *token, 48534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard void *data) { 48544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 48554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 48564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((am == NULL) || (from == NULL) || (token == NULL)) 48574255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 48584255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 4859a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (atom == NULL) 4860a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 48614255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->data = data; 48624255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) 48634255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 48644255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->valuep = xmlStrdup(token); 48654255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 4866a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 4867a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlRegFreeAtom(atom); 4868a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 4869a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 48704255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) 487152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(am->state); 487252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(to); 487352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard} 487452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 487552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard/** 487652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * xmlAutomataNewTransition2: 487752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @am: an automata 487852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @from: the starting point of the transition 487952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @to: the target point of the transition or NULL 488052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @token: the first input string associated to that transition 488152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @token2: the second input string associated to that transition 488252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * @data: data passed to the callback function if the transition is activated 488352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * 4884ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 488552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * and then adds a transition from the @from state to the target state 488652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * activated by the value of @token 488752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * 488852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard * Returns the target state or NULL in case of error 488952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard */ 489052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel VeillardxmlAutomataStatePtr 489152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel VeillardxmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from, 489252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlAutomataStatePtr to, const xmlChar *token, 489352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard const xmlChar *token2, void *data) { 489452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlRegAtomPtr atom; 489552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 489652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if ((am == NULL) || (from == NULL) || (token == NULL)) 489752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(NULL); 489852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 489952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard atom->data = data; 490052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (atom == NULL) 490152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(NULL); 490252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if ((token2 == NULL) || (*token2 == 0)) { 490352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard atom->valuep = xmlStrdup(token); 490452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } else { 490552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard int lenn, lenp; 490652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlChar *str; 490752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 490852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard lenn = strlen((char *) token2); 490952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard lenp = strlen((char *) token); 491052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 49113c908dca479ed50dca24b8593bca90e40dbde6b8Daniel Veillard str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 491252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (str == NULL) { 491352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard xmlRegFreeAtom(atom); 491452b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard return(NULL); 491552b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } 491652b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard memcpy(&str[0], token, lenp); 491752b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard str[lenp] = '|'; 491852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard memcpy(&str[lenp + 1], token2, lenn); 491952b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard str[lenn + lenp + 1] = 0; 492052b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 492152b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard atom->valuep = str; 492252b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard } 492352b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard 4924a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 4925a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard xmlRegFreeAtom(atom); 4926a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard return(NULL); 4927a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 492852b48c7a7bfb338f434d39f9fc3e54768e301575Daniel Veillard if (to == NULL) 49294255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(am->state); 49309efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(to); 49319efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard} 49329efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49339efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard/** 49349efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * xmlAutomataNewNegTrans: 49359efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @am: an automata 49369efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @from: the starting point of the transition 49379efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @to: the target point of the transition or NULL 49389efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @token: the first input string associated to that transition 49399efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @token2: the second input string associated to that transition 49409efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * @data: data passed to the callback function if the transition is activated 49419efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * 49429efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * If @to is NULL, this creates first a new target state in the automata 49439efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * and then adds a transition from the @from state to the target state 49449efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * activated by any value except (@token,@token2) 49456e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard * Note that if @token2 is not NULL, then (X, NULL) won't match to follow 49466e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard # the semantic of XSD ##other 49479efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * 49489efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard * Returns the target state or NULL in case of error 49499efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard */ 49509efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel VeillardxmlAutomataStatePtr 49519efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel VeillardxmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 49529efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard xmlAutomataStatePtr to, const xmlChar *token, 49539efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard const xmlChar *token2, void *data) { 49549efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard xmlRegAtomPtr atom; 495577005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard xmlChar err_msg[200]; 49569efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49579efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if ((am == NULL) || (from == NULL) || (token == NULL)) 49589efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(NULL); 49599efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 49609efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if (atom == NULL) 49619efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(NULL); 49629efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard atom->data = data; 49639efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard atom->neg = 1; 49649efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if ((token2 == NULL) || (*token2 == 0)) { 49659efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard atom->valuep = xmlStrdup(token); 49669efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard } else { 49679efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard int lenn, lenp; 49689efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard xmlChar *str; 49699efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49709efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard lenn = strlen((char *) token2); 49719efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard lenp = strlen((char *) token); 49729efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49739efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 49749efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if (str == NULL) { 49759efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard xmlRegFreeAtom(atom); 49769efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(NULL); 49779efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard } 49789efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard memcpy(&str[0], token, lenp); 49799efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard str[lenp] = '|'; 49809efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard memcpy(&str[lenp + 1], token2, lenn); 49819efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard str[lenn + lenp + 1] = 0; 49829efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49839efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard atom->valuep = str; 49849efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard } 4985db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep); 498677005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard err_msg[199] = 0; 498777005e6ff0426ae3cfefdcfb5796f29a8e83a83bDaniel Veillard atom->valuep2 = xmlStrdup(err_msg); 49889efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard 49899efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if (xmlFAGenerateTransitions(am, from, to, atom) < 0) { 49909efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard xmlRegFreeAtom(atom); 49919efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(NULL); 49929efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard } 49936e65e15777ebb281aec362fa2aba51e2cb5aa87fDaniel Veillard am->negs++; 49949efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard if (to == NULL) 49959efc476bb6fe9d7d84ee0fe1190888801d9374ccDaniel Veillard return(am->state); 49964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(to); 49974255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 49984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 49994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 500087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * xmlAutomataNewCountTrans2: 500187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @am: an automata 500287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @from: the starting point of the transition 500387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @to: the target point of the transition or NULL 500487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @token: the input string associated to that transition 500587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @token2: the second input string associated to that transition 500687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @min: the minimum successive occurences of token 500787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @max: the maximum successive occurences of token 500887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @data: data associated to the transition 500987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * 501087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * If @to is NULL, this creates first a new target state in the automata 501187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * and then adds a transition from the @from state to the target state 501287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * activated by a succession of input of value @token and @token2 and 501387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * whose number is between @min and @max 501487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * 501587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * Returns the target state or NULL in case of error 501687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik */ 501787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. BuchcikxmlAutomataStatePtr 501887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. BuchcikxmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 501987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlAutomataStatePtr to, const xmlChar *token, 502087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik const xmlChar *token2, 502187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int min, int max, void *data) { 502287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegAtomPtr atom; 502387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int counter; 502487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 502587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((am == NULL) || (from == NULL) || (token == NULL)) 502687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 502787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (min < 0) 502887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 502987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((max < min) || (max < 1)) 503087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 503187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 503287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (atom == NULL) 503387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 503487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((token2 == NULL) || (*token2 == 0)) { 503587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->valuep = xmlStrdup(token); 503687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } else { 503787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int lenn, lenp; 503887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlChar *str; 503987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 504087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik lenn = strlen((char *) token2); 504187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik lenp = strlen((char *) token); 504287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 504387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 504487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (str == NULL) { 504587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegFreeAtom(atom); 504687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 504787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 504887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik memcpy(&str[0], token, lenp); 504987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str[lenp] = '|'; 505087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik memcpy(&str[lenp + 1], token2, lenn); 505187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str[lenn + lenp + 1] = 0; 505287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 505387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->valuep = str; 505487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 505587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->data = data; 505687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (min == 0) 505787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->min = 1; 505887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik else 505987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->min = min; 506087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->max = max; 506187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 506287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik /* 506387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * associate a counter to the transition. 506487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik */ 506587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik counter = xmlRegGetCounter(am); 506687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->counters[counter].min = min; 506787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->counters[counter].max = max; 506887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 506987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik /* xmlFAGenerateTransitions(am, from, to, atom); */ 507087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (to == NULL) { 507187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik to = xmlRegNewState(am); 507287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegStatePush(am, to); 507387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 5074db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 507587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegAtomPush(am, atom); 507687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->state = to; 507787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 507887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (to == NULL) 507987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik to = am->state; 508087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (to == NULL) 508187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 508287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (min == 0) 508387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlFAGenerateEpsilonTransition(am, from, to); 508487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(to); 508587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik} 508687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 508787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik/** 50884255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataNewCountTrans: 50894255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 50904255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @from: the starting point of the transition 50914255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @to: the target point of the transition or NULL 50924255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @token: the input string associated to that transition 50934255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @min: the minimum successive occurences of token 5094a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * @max: the maximum successive occurences of token 5095a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * @data: data associated to the transition 50964255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 5097ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 50984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * and then adds a transition from the @from state to the target state 50994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * activated by a succession of input of value @token and whose number 51004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * is between @min and @max 51014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 51024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the target state or NULL in case of error 51034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 51044255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataStatePtr 51054255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 51064255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlAutomataStatePtr to, const xmlChar *token, 51074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard int min, int max, void *data) { 51084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegAtomPtr atom; 51090ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard int counter; 51104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 51114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((am == NULL) || (from == NULL) || (token == NULL)) 51124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 51134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (min < 0) 51144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 51154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((max < min) || (max < 1)) 51164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 51174255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 51184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (atom == NULL) 51194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 51204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->valuep = xmlStrdup(token); 51214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->data = data; 51224255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (min == 0) 51234255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->min = 1; 51244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard else 51254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->min = min; 51264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard atom->max = max; 51274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 51280ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard /* 51290ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard * associate a counter to the transition. 51300ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard */ 51310ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard counter = xmlRegGetCounter(am); 51320ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard am->counters[counter].min = min; 51330ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard am->counters[counter].max = max; 51340ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard 51350ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard /* xmlFAGenerateTransitions(am, from, to, atom); */ 51360ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard if (to == NULL) { 51370ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard to = xmlRegNewState(am); 51380ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard xmlRegStatePush(am, to); 5139a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard } 5140db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 51410ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard xmlRegAtomPush(am, atom); 51420ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard am->state = to; 51430ddb21c46ca6ac5297ff5f6537480de8463223eaDaniel Veillard 51444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) 51454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = am->state; 51464255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) 51474255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 51484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (min == 0) 51494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(am, from, to); 51504255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(to); 51514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 51524255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 51534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 515487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * xmlAutomataNewOnceTrans2: 515587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @am: an automata 515687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @from: the starting point of the transition 515787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @to: the target point of the transition or NULL 515887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @token: the input string associated to that transition 515987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @token2: the second input string associated to that transition 516087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @min: the minimum successive occurences of token 516187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @max: the maximum successive occurences of token 516287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * @data: data associated to the transition 516387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * 516487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * If @to is NULL, this creates first a new target state in the automata 516587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * and then adds a transition from the @from state to the target state 516687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * activated by a succession of input of value @token and @token2 and whose 516787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * number is between @min and @max, moreover that transition can only be 516887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * crossed once. 516987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * 517087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * Returns the target state or NULL in case of error 517187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik */ 517287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. BuchcikxmlAutomataStatePtr 517387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. BuchcikxmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from, 517487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlAutomataStatePtr to, const xmlChar *token, 517587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik const xmlChar *token2, 517687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int min, int max, void *data) { 517787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegAtomPtr atom; 517887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int counter; 517987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 518087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((am == NULL) || (from == NULL) || (token == NULL)) 518187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 518287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (min < 1) 518387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 518487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((max < min) || (max < 1)) 518587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 518687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 518787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (atom == NULL) 518887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 518987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if ((token2 == NULL) || (*token2 == 0)) { 519087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->valuep = xmlStrdup(token); 519187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } else { 519287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik int lenn, lenp; 519387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlChar *str; 519487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 519587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik lenn = strlen((char *) token2); 519687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik lenp = strlen((char *) token); 519787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 519887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2); 519987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (str == NULL) { 520087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegFreeAtom(atom); 520187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(NULL); 520287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 520387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik memcpy(&str[0], token, lenp); 520487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str[lenp] = '|'; 520587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik memcpy(&str[lenp + 1], token2, lenn); 520687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik str[lenn + lenp + 1] = 0; 520787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 520887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->valuep = str; 520987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 521087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->data = data; 521187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->quant = XML_REGEXP_QUANT_ONCEONLY; 521287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (min == 0) 521387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->min = 1; 521487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik else 521587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->min = min; 521687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik atom->max = max; 521787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik /* 521887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik * associate a counter to the transition. 521987876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik */ 522087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik counter = xmlRegGetCounter(am); 522187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->counters[counter].min = 1; 522287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->counters[counter].max = 1; 522387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 522487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik /* xmlFAGenerateTransitions(am, from, to, atom); */ 522587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik if (to == NULL) { 522687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik to = xmlRegNewState(am); 522787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegStatePush(am, to); 522887876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik } 5229db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 523087876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik xmlRegAtomPush(am, atom); 523187876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik am->state = to; 523287876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik return(to); 523387876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik} 523487876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 523587876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 523687876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik 523787876407ced312c9b2c8d8b03f988a7dd484a68eKasimier T. Buchcik/** 52387646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * xmlAutomataNewOnceTrans: 52397646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @am: an automata 52407646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @from: the starting point of the transition 52417646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @to: the target point of the transition or NULL 52427646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @token: the input string associated to that transition 52437646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @min: the minimum successive occurences of token 5244a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * @max: the maximum successive occurences of token 5245a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * @data: data associated to the transition 52467646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * 5247ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 52487646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * and then adds a transition from the @from state to the target state 52497646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * activated by a succession of input of value @token and whose number 5250ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * is between @min and @max, moreover that transition can only be crossed 52517646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * once. 52527646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * 52537646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * Returns the target state or NULL in case of error 52547646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard */ 52557646b18d64b6c739d04ca453493070e88c4aab13Daniel VeillardxmlAutomataStatePtr 52567646b18d64b6c739d04ca453493070e88c4aab13Daniel VeillardxmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 52577646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard xmlAutomataStatePtr to, const xmlChar *token, 52587646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard int min, int max, void *data) { 52597646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard xmlRegAtomPtr atom; 52607646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard int counter; 52617646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 52627646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if ((am == NULL) || (from == NULL) || (token == NULL)) 52637646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(NULL); 52647646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (min < 1) 52657646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(NULL); 52667646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if ((max < min) || (max < 1)) 52677646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(NULL); 52687646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom = xmlRegNewAtom(am, XML_REGEXP_STRING); 52697646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (atom == NULL) 52707646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(NULL); 52717646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->valuep = xmlStrdup(token); 52727646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->data = data; 52737646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->quant = XML_REGEXP_QUANT_ONCEONLY; 52747646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (min == 0) 52757646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->min = 1; 52767646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard else 52777646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->min = min; 52787646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard atom->max = max; 52797646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard /* 52807646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * associate a counter to the transition. 52817646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard */ 52827646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard counter = xmlRegGetCounter(am); 52837646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard am->counters[counter].min = 1; 52847646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard am->counters[counter].max = 1; 52857646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 52867646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard /* xmlFAGenerateTransitions(am, from, to, atom); */ 52877646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (to == NULL) { 52887646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard to = xmlRegNewState(am); 52897646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard xmlRegStatePush(am, to); 52907646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard } 5291db68b74dc7ec531361a736de7769a3e8ce881f79Daniel Veillard xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0); 52927646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard xmlRegAtomPush(am, atom); 52937646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard am->state = to; 52947646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(to); 52957646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard} 52967646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 52977646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard/** 52984255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataNewState: 52994255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 53004255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 53014255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Create a new disconnected state in the automata 53024255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 53034255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the new state or NULL in case of error 53044255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 53054255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataStatePtr 53064255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataNewState(xmlAutomataPtr am) { 53074255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlAutomataStatePtr to; 53084255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 53094255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (am == NULL) 53104255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 53114255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard to = xmlRegNewState(am); 53124255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegStatePush(am, to); 53134255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(to); 53144255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 53154255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 53164255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 5317a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * xmlAutomataNewEpsilon: 53184255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 53194255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @from: the starting point of the transition 53204255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @to: the target point of the transition or NULL 53214255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 5322ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 5323ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * and then adds an epsilon transition from the @from state to the 53244255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * target state 53254255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 53264255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the target state or NULL in case of error 53274255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 53284255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataStatePtr 53294255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, 53304255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlAutomataStatePtr to) { 53314255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if ((am == NULL) || (from == NULL)) 53324255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(NULL); 53334255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAGenerateEpsilonTransition(am, from, to); 53344255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard if (to == NULL) 53354255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(am->state); 53364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(to); 53374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 53384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 5339b509f1543df71549969eeac076349e05d2f78044Daniel Veillard/** 53407646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * xmlAutomataNewAllTrans: 53417646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @am: an automata 53427646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @from: the starting point of the transition 53437646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * @to: the target point of the transition or NULL 5344a9b66d00b5fbae2381f59c0be4d8c7b8e3c16cf7Daniel Veillard * @lax: allow to transition if not all all transitions have been activated 53457646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * 5346ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 53477646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * and then adds a an ALL transition from the @from state to the 53487646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * target state. That transition is an epsilon transition allowed only when 53497646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * all transitions from the @from node have been activated. 53507646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * 53517646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard * Returns the target state or NULL in case of error 53527646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard */ 53537646b18d64b6c739d04ca453493070e88c4aab13Daniel VeillardxmlAutomataStatePtr 53547646b18d64b6c739d04ca453493070e88c4aab13Daniel VeillardxmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5355441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlAutomataStatePtr to, int lax) { 53567646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if ((am == NULL) || (from == NULL)) 53577646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(NULL); 5358441bc32e3167ed55df415500e3a22eda4eec1ac6Daniel Veillard xmlFAGenerateAllTransition(am, from, to, lax); 53597646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard if (to == NULL) 53607646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(am->state); 53617646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard return(to); 53627646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard} 53637646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard 53647646b18d64b6c739d04ca453493070e88c4aab13Daniel Veillard/** 5365b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * xmlAutomataNewCounter: 5366b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @am: an automata 5367b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @min: the minimal value on the counter 5368b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @max: the maximal value on the counter 5369b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5370b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * Create a new counter 5371b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5372b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * Returns the counter number or -1 in case of error 5373b509f1543df71549969eeac076349e05d2f78044Daniel Veillard */ 5374b509f1543df71549969eeac076349e05d2f78044Daniel Veillardint 5375b509f1543df71549969eeac076349e05d2f78044Daniel VeillardxmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { 5376b509f1543df71549969eeac076349e05d2f78044Daniel Veillard int ret; 5377b509f1543df71549969eeac076349e05d2f78044Daniel Veillard 5378b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (am == NULL) 5379b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(-1); 5380b509f1543df71549969eeac076349e05d2f78044Daniel Veillard 5381b509f1543df71549969eeac076349e05d2f78044Daniel Veillard ret = xmlRegGetCounter(am); 5382b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (ret < 0) 5383b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(-1); 5384b509f1543df71549969eeac076349e05d2f78044Daniel Veillard am->counters[ret].min = min; 5385b509f1543df71549969eeac076349e05d2f78044Daniel Veillard am->counters[ret].max = max; 5386b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(ret); 5387b509f1543df71549969eeac076349e05d2f78044Daniel Veillard} 5388b509f1543df71549969eeac076349e05d2f78044Daniel Veillard 5389b509f1543df71549969eeac076349e05d2f78044Daniel Veillard/** 5390b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * xmlAutomataNewCountedTrans: 5391b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @am: an automata 5392b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @from: the starting point of the transition 5393b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @to: the target point of the transition or NULL 5394b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @counter: the counter associated to that transition 5395b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5396ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 5397b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * and then adds an epsilon transition from the @from state to the target state 5398b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * which will increment the counter provided 5399b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5400b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * Returns the target state or NULL in case of error 5401b509f1543df71549969eeac076349e05d2f78044Daniel Veillard */ 5402b509f1543df71549969eeac076349e05d2f78044Daniel VeillardxmlAutomataStatePtr 5403b509f1543df71549969eeac076349e05d2f78044Daniel VeillardxmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5404b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlAutomataStatePtr to, int counter) { 5405b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if ((am == NULL) || (from == NULL) || (counter < 0)) 5406b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(NULL); 5407b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlFAGenerateCountedEpsilonTransition(am, from, to, counter); 5408b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to == NULL) 5409b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(am->state); 5410b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(to); 5411b509f1543df71549969eeac076349e05d2f78044Daniel Veillard} 5412b509f1543df71549969eeac076349e05d2f78044Daniel Veillard 5413b509f1543df71549969eeac076349e05d2f78044Daniel Veillard/** 5414b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * xmlAutomataNewCounterTrans: 5415b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @am: an automata 5416b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @from: the starting point of the transition 5417b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @to: the target point of the transition or NULL 5418b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * @counter: the counter associated to that transition 5419b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5420ddf71d61c939b561a2014f27a88e6a9899355b79William M. Brack * If @to is NULL, this creates first a new target state in the automata 5421b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * and then adds an epsilon transition from the @from state to the target state 5422b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * which will be allowed only if the counter is within the right range. 5423b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * 5424b509f1543df71549969eeac076349e05d2f78044Daniel Veillard * Returns the target state or NULL in case of error 5425b509f1543df71549969eeac076349e05d2f78044Daniel Veillard */ 5426b509f1543df71549969eeac076349e05d2f78044Daniel VeillardxmlAutomataStatePtr 5427b509f1543df71549969eeac076349e05d2f78044Daniel VeillardxmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, 5428b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlAutomataStatePtr to, int counter) { 5429b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if ((am == NULL) || (from == NULL) || (counter < 0)) 5430b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(NULL); 5431b509f1543df71549969eeac076349e05d2f78044Daniel Veillard xmlFAGenerateCountedTransition(am, from, to, counter); 5432b509f1543df71549969eeac076349e05d2f78044Daniel Veillard if (to == NULL) 5433b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(am->state); 5434b509f1543df71549969eeac076349e05d2f78044Daniel Veillard return(to); 5435b509f1543df71549969eeac076349e05d2f78044Daniel Veillard} 54364255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 54374255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard/** 54384255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * xmlAutomataCompile: 54394255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * @am: an automata 54404255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 54414255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Compile the automata into a Reg Exp ready for being executed. 54424255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * The automata should be free after this point. 54434255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * 54444255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard * Returns the compiled regexp or NULL in case of error 54454255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard */ 54464255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlRegexpPtr 54474255d504151db75c17f85192ce74f45dd2d65533Daniel VeillardxmlAutomataCompile(xmlAutomataPtr am) { 54484255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlRegexpPtr ret; 54494255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 5450a76fe5ca11ebf9e9322dfcf7728dc55077086d43Daniel Veillard if ((am == NULL) || (am->error != 0)) return(NULL); 54514255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard xmlFAEliminateEpsilonTransitions(am); 545223e73571f8f6918e4ea7be3506ee5bd24ee86c52Daniel Veillard /* xmlFAComputesDeterminism(am); */ 54534255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard ret = xmlRegEpxFromParse(am); 54544255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard 54554255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard return(ret); 54564255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard} 5457e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 5458e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard/** 5459e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * xmlAutomataIsDeterminist: 5460e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * @am: an automata 5461e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 5462e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Checks if an automata is determinist. 5463e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * 5464e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard * Returns 1 if true, 0 if not, and -1 in case of error 5465e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard */ 5466e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillardint 5467e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel VeillardxmlAutomataIsDeterminist(xmlAutomataPtr am) { 5468e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard int ret; 5469e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 5470e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard if (am == NULL) 5471e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(-1); 5472e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard 5473e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard ret = xmlFAComputesDeterminism(am); 5474e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard return(ret); 5475e19fc23b6427f4df516af7b3f6df7baa942e4207Daniel Veillard} 54764255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif /* LIBXML_AUTOMATA_ENABLED */ 54775d4644ef6e38479a648615eca758c5e962a141d5Daniel Veillard#define bottom_xmlregexp 54785d4644ef6e38479a648615eca758c5e962a141d5Daniel Veillard#include "elfgcchack.h" 54794255d504151db75c17f85192ce74f45dd2d65533Daniel Veillard#endif /* LIBXML_REGEXP_ENABLED */ 5480