1/*
2 *******************************************************************************
3 * Copyright (C) 2002-2012, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.util;
8
9import java.util.ArrayList;
10import java.util.HashMap;
11import java.util.HashSet;
12import java.util.Iterator;
13import java.util.Map;
14import java.util.Random;
15import java.util.Set;
16
17import com.ibm.icu.text.UnicodeSet;
18
19public class BNF {
20    private Map map = new HashMap();
21    private Set variables = new HashSet();
22    private Pick pick = null;
23    private Pick.Target target = null;
24    private Tokenizer t;
25    private Quoter quoter;
26    private Random random;
27
28    public String next() {
29        return target.next();
30    }
31
32    public String getInternal() {
33        return pick.getInternal(0, new HashSet());
34    }
35
36    /*
37    + "weight = integer '%';"
38    + "range = '{' integer (',' integer?)? '}' weight*;"
39    + "quote = '@';"
40    + "star = '*' weight*;"
41    + "plus = '+' weight*;"
42    + "maybe = '?' weight?;"
43    + "quantifier = range | star | maybe | plus;"
44    + "core = string | unicodeSet | '(' alternation ')';"
45    + "sequence = (core quantifier*)+;"
46    + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
47    + "rule = string '=' alternation;";
48
49
50    *      Match 0 or more times
51    +      Match 1 or more times
52    ?      Match 1 or 0 times
53    {n}    Match exactly n times
54    {n,}   Match at least n times
55    {n,m}  Match at least n but not more than m times
56
57
58
59    */
60
61    public BNF(Random random, Quoter quoter) {
62        this.random = random;
63        this.quoter = quoter;
64        t = new Tokenizer();
65    }
66
67    public BNF addRules(String rules) {
68        t.setSource(rules);
69        while (addRule()) {
70        }
71        return this; // for chaining
72    }
73
74    public BNF complete() {
75        // check that the rules match the variables, except for $root in rules
76        Set ruleSet = map.keySet();
77        // add also
78        variables.add("$root");
79        variables.addAll(t.getLookedUpItems());
80        if (!ruleSet.equals(variables)) {
81            String msg = showDiff(variables, ruleSet);
82            if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
83            String temp = showDiff(ruleSet, variables);
84            if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
85            if (msg.length() == 0) msg = temp;
86            else if (temp.length() != 0) {
87                msg = msg + "; " + temp;
88            }
89            error(msg);
90        }
91
92        if (!ruleSet.equals(variables)) {
93            String msg = showDiff(variables, ruleSet);
94            if (msg.length() != 0) msg = "Missing definitions for: " + msg;
95            String temp = showDiff(ruleSet, variables);
96            if (temp.length() != 0) temp = "Defined but not used: " + temp;
97            if (msg.length() == 0) msg = temp;
98            else if (temp.length() != 0) {
99                msg = msg + "; " + temp;
100            }
101            error(msg);
102        }
103
104        // replace variables by definitions
105        Iterator it = ruleSet.iterator();
106        while (it.hasNext()) {
107            String key = (String) it.next();
108            Pick expression = (Pick) map.get(key);
109            Iterator it2 = ruleSet.iterator();
110            if (false && key.equals("$crlf")) {
111                System.out.println("debug") ;
112            }
113            while (it2.hasNext()) {
114                Object key2 = it2.next();
115                if (key.equals(key2)) continue;
116                Pick expression2 = (Pick) map.get(key2);
117                expression2.replace(key, expression);
118            }
119        }
120        pick = (Pick) map.get("$root");
121        target = Pick.Target.make(pick, random, quoter);
122        // TODO remove temp collections
123        return this;
124    }
125
126    String showDiff(Set a, Set b) {
127        Set temp = new HashSet();
128        temp.addAll(a);
129        temp.removeAll(b);
130        if (temp.size() == 0) return "";
131        StringBuffer buffer = new StringBuffer();
132        Iterator it = temp.iterator();
133        while (it.hasNext()) {
134            if (buffer.length() != 0) buffer.append(", ");
135            buffer.append(it.next().toString());
136        }
137        return buffer.toString();
138    }
139
140    void error(String msg) {
141        throw new IllegalArgumentException(msg
142        + "\r\n" + t.toString());
143    }
144
145    private boolean addRule() {
146        int type = t.next();
147        if (type == Tokenizer.DONE) return false;
148        if (type != Tokenizer.STRING) error("missing weight");
149        String s = t.getString();
150        if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
151        if (t.next() != '=') error("missing =");
152        int startBody = t.index;
153        Pick rule = getAlternation();
154        if (rule == null) error("missing expression");
155        t.addSymbol(s, t.getSource(), startBody, t.index);
156        if (t.next() != ';') error("missing ;");
157        return addPick(s, rule);
158    }
159
160    protected boolean addPick(String s, Pick rule) {
161        Object temp = map.get(s);
162        if (temp != null) error("duplicate variable");
163        if (rule.name == null) rule.name(s);
164        map.put(s, rule);
165        return true;
166    }
167
168    public BNF addSet(String variable, UnicodeSet set) {
169        if (set != null) {
170            String body = set.toString();
171            t.addSymbol(variable, body, 0, body.length());
172            addPick(variable, Pick.codePoint(set));
173        }
174        return this;
175    }
176
177    int maxRepeat = 99;
178
179    Pick qualify(Pick item) {
180        int[] weights;
181        int type = t.next();
182        switch(type) {
183            case '@':
184                return new Pick.Quote(item);
185            case '~':
186                return new Pick.Morph(item);
187            case '?':
188                int weight = getWeight();
189                if (weight == NO_WEIGHT) weight = 50;
190                weights = new int[] {100-weight, weight};
191                return Pick.repeat(0, 1, weights, item);
192            case '*':
193                weights = getWeights();
194                return Pick.repeat(1, maxRepeat, weights, item);
195            case '+':
196                weights = getWeights();
197                return Pick.repeat(1, maxRepeat, weights, item);
198            case '{':
199                if (t.next() != Tokenizer.NUMBER) error("missing number");
200                int start = (int) t.getNumber();
201                int end = start;
202                type = t.next();
203                if (type == ',') {
204                    end = maxRepeat;
205                    type = t.next();
206                    if (type == Tokenizer.NUMBER) {
207                        end = (int)t.getNumber();
208                        type = t.next();
209                    }
210                }
211                if (type != '}') error("missing }");
212                weights = getWeights();
213                return Pick.repeat(start, end, weights, item);
214        }
215        t.backup();
216        return item;
217    }
218
219    Pick getCore() {
220        int token = t.next();
221        if (token == Tokenizer.STRING) {
222            String s = t.getString();
223            if (s.charAt(0) == '$') variables.add(s);
224            return Pick.string(s);
225        }
226        if (token == Tokenizer.UNICODESET) {
227            return Pick.codePoint(t.getUnicodeSet());
228        }
229        if (token != '(') {
230            t.backup();
231            return null;
232        }
233        Pick temp = getAlternation();
234        token = t.next();
235        if (token != ')') error("missing )");
236        return temp;
237    }
238
239    Pick getSequence() {
240        Pick.Sequence result = null;
241        Pick last = null;
242        while (true) {
243            Pick item = getCore();
244            if (item == null) {
245                if (result != null) return result;
246                if (last != null) return last;
247                error("missing item");
248            }
249            // qualify it as many times as possible
250            Pick oldItem;
251            do {
252                oldItem = item;
253                item = qualify(item);
254            } while (item != oldItem);
255            // add it in
256            if (last == null) {
257                last = item;
258            } else {
259                if (result == null) result = Pick.makeSequence().and2(last);
260                result = result.and2(item);
261            }
262        }
263    }
264
265    // for simplicity, we just use recursive descent
266    Pick getAlternation() {
267        Pick.Alternation result = null;
268        Pick last = null;
269        int lastWeight = NO_WEIGHT;
270        while (true) {
271            Pick temp = getSequence();
272            if (temp == null) error("empty alternation");
273            int weight = getWeight();
274            if (weight == NO_WEIGHT) weight = 1;
275            if (last == null) {
276                last = temp;
277                lastWeight = weight;
278            } else {
279                if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
280                result = result.or2(weight, temp);
281            }
282            int token = t.next();
283            if (token != '|') {
284                t.backup();
285                if (result != null) return result;
286                if (last != null) return last;
287            }
288        }
289    }
290
291    private static final int NO_WEIGHT = Integer.MIN_VALUE;
292
293    int getWeight() {
294        int weight;
295        int token = t.next();
296        if (token != Tokenizer.NUMBER) {
297            t.backup();
298            return NO_WEIGHT;
299        }
300        weight = (int)t.getNumber();
301        token = t.next();
302        if (token != '%') error("missing %");
303        return weight;
304    }
305
306    int[] getWeights() {
307        ArrayList list = new ArrayList();
308        while (true) {
309            int weight = getWeight();
310            if (weight == NO_WEIGHT) break;
311            list.add(new Integer(weight));
312        }
313        if (list.size() == 0) return null;
314        int[] result = new int[list.size()];
315        for (int i = 0; i < list.size(); ++i) {
316            result[i] = ((Integer)list.get(i)).intValue();
317        }
318        return result;
319    }
320
321    public int getMaxRepeat() {
322      return maxRepeat;
323    }
324
325    public BNF setMaxRepeat(int maxRepeat) {
326      this.maxRepeat = maxRepeat;
327      return this;
328    }
329}
330