1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 2005, International Business Machines Corporation and   *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved.                                               *
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  and standard BNF is the WBNF accepts weight for its alternation items.
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  The weight specifies the opportunity it will be selected.
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  The purpose of WBNF is to help generate a random string from a given grammar
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  which can be described with standard BNF. The introduction of 'weight'
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  is to guide the generator to give the specific parts different chances to be
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  generated.
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  Usually, the user gives LanguageGenerator the grammar description in WBNF,
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  then LanguageGenerator will generate a random string on every next() call.
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  The return code of parseBNF() can help user to determine the error,
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  either in the grammar description or in the WBNF parser itself.
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  The grammar of WBNF itself can be described in standard BNF,
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    escaping        = _single character with a leading back slash, either inside or outside quoting_
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quoting         = _quoted with a pair of single quotation marks_
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    string          = string alphabet | string digit | string quoting | string escaping |
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      alphabet | quoting | escaping
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    alphabet        =
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    digit           =
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    integer         = integer digit | digit
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    weight          = integer %
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    weight-list     = weight-list weight | weight
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    var             = var alphabet | var digit | $ alphabet
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    var-defs        = var-defs var-def | var-def
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    var-def         = var '=' definition;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    alternation     = alternation '|' alt-item | alt-item
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    alt-item        = sequence | sequence weight
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sequence        = sequence modified | modified
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    modified        = core | morph | quote | repeat
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    morph           = modified ~
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quote           = modified @
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    repeat          = modified quantifier | modified quantifier weight-list
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    quantifier      = ? | * | + | { integer , integer} | {integer, } | {integer}
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    core            = var | string | '(' definition ')'
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    definition      = core | modified | sequence | alternation
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    definition      = alternation
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Remarks:
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o Following characters are literals in preceding definition
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      but are syntax symbols in WBNF
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      % $ ~ @ ? * + { } ,
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o Following character are syntax symbols in preceding definition
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              (sapce) contact operation, or separators to increase readability
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      =       definition
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      |       selection operation
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      ( )     precedence select
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      ' '     override special-character to plain character
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o the definition of 'escaping' and 'quoting' are preceding definition text
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o if weight is not presented in "alt-item' and 'repeat',
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      a default weight DEFAULT_WEIGHT defined in this file is used
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o * == {0,  }
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      + == {1,  }
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      ? == {0, 1}
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o the weight-list for repeat assigns the weights for repeat itmes one by one
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      demo{1,3} 30% 40% 100%  ==  (demo)30% | (demodemo)40% | (demodemodemo)100%
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      To find more explain of the weight-list, please see the LIMITATION of the grammar
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    o but the weight-list for question mark has different meaning
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      demo ? 30%   != demo{0,1} 30% 100%
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      demo ? 30%   == demo{0,1} 70% 30%
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      the 70% is calculated from (DEFAULT_WEIGHT - weight)
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  Known LIMITATION of the grammar
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    For 'repeat', the parser will eat up as much as possible weights at one time,
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    discard superfluous weights if it is too much,
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fill insufficient weights with default weight if it is too less.
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    This behavior means following definitions are equal
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        demo{1,3} 30% 40% 100%
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        demo{1,3} 30% 40% 100% 50%
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        demo{1,3} 30% 40%
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    This behavior will cause a little confusion when defining an alternation
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        demo{1,3} 30% 40% 100% 50% | show 20%
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    is interpreted as
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (demo{1,3} 30% 40% 100%) 100% | show 20%
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    not
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (demo{1,3} 30% 40% 100%) 50% | show 20%
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    to get an expected definition, please use parentheses.
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  Known LIMITATION of current implement
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Due to the well known point alias problem, current Parser will be effectively
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    crashed if the definition looks like
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $a = demo;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $b = $a;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $c = $a;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    or
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $a = demo;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $b = $a $a;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    or
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $a = demo;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $b = $b $a;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    The crash will occur at delete operation in destructor or other memory release code.
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Several plans are on hard to fix the problem. Use a smart point with reference count,
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    or use a central memory management solution. But now, it works well with collation
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    monkey test, which is the only user for WBNF.
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _WBNF
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _WBNF
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst int DEFAULT_WEIGHT = 100;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst int PSEUDO_INFINIT = 200;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageGenerator_impl;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageGenerator{
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    LanguageGenerator_impl * lang_gen;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE};
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    LanguageGenerator();
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~LanguageGenerator();
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE);
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid TestWbnf(void);
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* _WBNF */
158