1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 2005, International Business Machines Corporation and * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved. * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru and standard BNF is the WBNF accepts weight for its alternation items. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru The weight specifies the opportunity it will be selected. 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru The purpose of WBNF is to help generate a random string from a given grammar 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru which can be described with standard BNF. The introduction of 'weight' 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru is to guide the generator to give the specific parts different chances to be 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru generated. 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Usually, the user gives LanguageGenerator the grammar description in WBNF, 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru then LanguageGenerator will generate a random string on every next() call. 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru The return code of parseBNF() can help user to determine the error, 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru either in the grammar description or in the WBNF parser itself. 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru The grammar of WBNF itself can be described in standard BNF, 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru escaping = _single character with a leading back slash, either inside or outside quoting_ 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quoting = _quoted with a pair of single quotation marks_ 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru string = string alphabet | string digit | string quoting | string escaping | 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru alphabet | quoting | escaping 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru alphabet = 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru digit = 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru integer = integer digit | digit 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru weight = integer % 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru weight-list = weight-list weight | weight 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru var = var alphabet | var digit | $ alphabet 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru var-defs = var-defs var-def | var-def 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru var-def = var '=' definition; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru alternation = alternation '|' alt-item | alt-item 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru alt-item = sequence | sequence weight 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sequence = sequence modified | modified 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru modified = core | morph | quote | repeat 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru morph = modified ~ 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quote = modified @ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru repeat = modified quantifier | modified quantifier weight-list 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru core = var | string | '(' definition ')' 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definition = core | modified | sequence | alternation 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru definition = alternation 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Remarks: 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o Following characters are literals in preceding definition 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru but are syntax symbols in WBNF 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru % $ ~ @ ? * + { } , 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o Following character are syntax symbols in preceding definition 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sapce) contact operation, or separators to increase readability 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru = definition 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru | selection operation 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ( ) precedence select 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ' ' override special-character to plain character 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o the definition of 'escaping' and 'quoting' are preceding definition text 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o infinite is actually a predefine value PSEUDO_INFINIT defined in this file 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o if weight is not presented in "alt-item' and 'repeat', 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru a default weight DEFAULT_WEIGHT defined in this file is used 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o * == {0, } 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru + == {1, } 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ? == {0, 1} 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o the weight-list for repeat assigns the weights for repeat itmes one by one 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru To find more explain of the weight-list, please see the LIMITATION of the grammar 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru o but the weight-list for question mark has different meaning 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo ? 30% != demo{0,1} 30% 100% 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo ? 30% == demo{0,1} 70% 30% 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru the 70% is calculated from (DEFAULT_WEIGHT - weight) 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Known LIMITATION of the grammar 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru For 'repeat', the parser will eat up as much as possible weights at one time, 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru discard superfluous weights if it is too much, 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fill insufficient weights with default weight if it is too less. 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru This behavior means following definitions are equal 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo{1,3} 30% 40% 100% 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo{1,3} 30% 40% 100% 50% 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo{1,3} 30% 40% 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru This behavior will cause a little confusion when defining an alternation 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru demo{1,3} 30% 40% 100% 50% | show 20% 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru is interpreted as 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (demo{1,3} 30% 40% 100%) 100% | show 20% 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru not 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (demo{1,3} 30% 40% 100%) 50% | show 20% 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru to get an expected definition, please use parentheses. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Known LIMITATION of current implement 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Due to the well known point alias problem, current Parser will be effectively 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru crashed if the definition looks like 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $a = demo; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $b = $a; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $c = $a; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru or 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $a = demo; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $b = $a $a; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru or 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $a = demo; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $b = $b $a; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru The crash will occur at delete operation in destructor or other memory release code. 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Several plans are on hard to fix the problem. Use a smart point with reference count, 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru or use a central memory management solution. But now, it works well with collation 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru monkey test, which is the only user for WBNF. 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _WBNF 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _WBNF 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst int DEFAULT_WEIGHT = 100; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst int PSEUDO_INFINIT = 200; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageGenerator_impl; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageGenerator{ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LanguageGenerator_impl * lang_gen; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LanguageGenerator(); 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~LanguageGenerator(); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */ 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid TestWbnf(void); 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* _WBNF */ 158