1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 2005, International Business Machines Corporation and * 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved. * 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) and standard BNF is the WBNF accepts weight for its alternation items. 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) The weight specifies the opportunity it will be selected. 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) The purpose of WBNF is to help generate a random string from a given grammar 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) which can be described with standard BNF. The introduction of 'weight' 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) is to guide the generator to give the specific parts different chances to be 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generated. 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Usually, the user gives LanguageGenerator the grammar description in WBNF, 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) then LanguageGenerator will generate a random string on every next() call. 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) The return code of parseBNF() can help user to determine the error, 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) either in the grammar description or in the WBNF parser itself. 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) The grammar of WBNF itself can be described in standard BNF, 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) escaping = _single character with a leading back slash, either inside or outside quoting_ 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) quoting = _quoted with a pair of single quotation marks_ 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) string = string alphabet | string digit | string quoting | string escaping | 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) alphabet | quoting | escaping 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) alphabet = 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) digit = 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) integer = integer digit | digit 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) weight = integer % 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) weight-list = weight-list weight | weight 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) var = var alphabet | var digit | $ alphabet 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) var-defs = var-defs var-def | var-def 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) var-def = var '=' definition; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) alternation = alternation '|' alt-item | alt-item 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) alt-item = sequence | sequence weight 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sequence = sequence modified | modified 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) modified = core | morph | quote | repeat 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) morph = modified ~ 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) quote = modified @ 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeat = modified quantifier | modified quantifier weight-list 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) core = var | string | '(' definition ')' 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) definition = core | modified | sequence | alternation 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) definition = alternation 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Remarks: 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o Following characters are literals in preceding definition 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) but are syntax symbols in WBNF 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) % $ ~ @ ? * + { } , 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o Following character are syntax symbols in preceding definition 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (sapce) contact operation, or separators to increase readability 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) = definition 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | selection operation 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ( ) precedence select 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ' ' override special-character to plain character 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o the definition of 'escaping' and 'quoting' are preceding definition text 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o infinite is actually a predefine value PSEUDO_INFINIT defined in this file 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o if weight is not presented in "alt-item' and 'repeat', 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) a default weight DEFAULT_WEIGHT defined in this file is used 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o * == {0, } 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + == {1, } 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ? == {0, 1} 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o the weight-list for repeat assigns the weights for repeat itmes one by one 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) To find more explain of the weight-list, please see the LIMITATION of the grammar 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o but the weight-list for question mark has different meaning 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo ? 30% != demo{0,1} 30% 100% 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo ? 30% == demo{0,1} 70% 30% 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) the 70% is calculated from (DEFAULT_WEIGHT - weight) 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Known LIMITATION of the grammar 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) For 'repeat', the parser will eat up as much as possible weights at one time, 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) discard superfluous weights if it is too much, 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fill insufficient weights with default weight if it is too less. 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This behavior means following definitions are equal 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo{1,3} 30% 40% 100% 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo{1,3} 30% 40% 100% 50% 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo{1,3} 30% 40% 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This behavior will cause a little confusion when defining an alternation 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) demo{1,3} 30% 40% 100% 50% | show 20% 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) is interpreted as 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (demo{1,3} 30% 40% 100%) 100% | show 20% 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) not 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (demo{1,3} 30% 40% 100%) 50% | show 20% 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) to get an expected definition, please use parentheses. 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Known LIMITATION of current implement 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Due to the well known point alias problem, current Parser will be effectively 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) crashed if the definition looks like 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $a = demo; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $b = $a; 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $c = $a; 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) or 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $a = demo; 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $b = $a $a; 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) or 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $a = demo; 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) $b = $b $a; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) The crash will occur at delete operation in destructor or other memory release code. 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Several plans are on hard to fix the problem. Use a smart point with reference count, 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) or use a central memory management solution. But now, it works well with collation 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) monkey test, which is the only user for WBNF. 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef _WBNF 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _WBNF 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const int DEFAULT_WEIGHT = 100; 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const int PSEUDO_INFINIT = 200; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class LanguageGenerator_impl; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class LanguageGenerator{ 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LanguageGenerator_impl * lang_gen; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LanguageGenerator(); 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ~LanguageGenerator(); 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE); 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */ 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void TestWbnf(void); 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* _WBNF */ 158