1/* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.util; 8 9import java.util.ArrayList; 10import java.util.HashMap; 11import java.util.HashSet; 12import java.util.Iterator; 13import java.util.Map; 14import java.util.Random; 15import java.util.Set; 16 17import com.ibm.icu.text.UnicodeSet; 18 19public class BNF { 20 private Map map = new HashMap(); 21 private Set variables = new HashSet(); 22 private Pick pick = null; 23 private Pick.Target target = null; 24 private Tokenizer t; 25 private Quoter quoter; 26 private Random random; 27 28 public String next() { 29 return target.next(); 30 } 31 32 public String getInternal() { 33 return pick.getInternal(0, new HashSet()); 34 } 35 36 /* 37 + "weight = integer '%';" 38 + "range = '{' integer (',' integer?)? '}' weight*;" 39 + "quote = '@';" 40 + "star = '*' weight*;" 41 + "plus = '+' weight*;" 42 + "maybe = '?' weight?;" 43 + "quantifier = range | star | maybe | plus;" 44 + "core = string | unicodeSet | '(' alternation ')';" 45 + "sequence = (core quantifier*)+;" 46 + "alternation = sequence (weight? ('|' sequence weight?)+)?;" 47 + "rule = string '=' alternation;"; 48 49 50 * Match 0 or more times 51 + Match 1 or more times 52 ? Match 1 or 0 times 53 {n} Match exactly n times 54 {n,} Match at least n times 55 {n,m} Match at least n but not more than m times 56 57 58 59 */ 60 61 public BNF(Random random, Quoter quoter) { 62 this.random = random; 63 this.quoter = quoter; 64 t = new Tokenizer(); 65 } 66 67 public BNF addRules(String rules) { 68 t.setSource(rules); 69 while (addRule()) { 70 } 71 return this; // for chaining 72 } 73 74 public BNF complete() { 75 // check that the rules match the variables, except for $root in rules 76 Set ruleSet = map.keySet(); 77 // add also 78 variables.add("$root"); 79 variables.addAll(t.getLookedUpItems()); 80 if (!ruleSet.equals(variables)) { 81 String msg = showDiff(variables, ruleSet); 82 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; 83 String temp = showDiff(ruleSet, variables); 84 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; 85 if (msg.length() == 0) msg = temp; 86 else if (temp.length() != 0) { 87 msg = msg + "; " + temp; 88 } 89 error(msg); 90 } 91 92 if (!ruleSet.equals(variables)) { 93 String msg = showDiff(variables, ruleSet); 94 if (msg.length() != 0) msg = "Missing definitions for: " + msg; 95 String temp = showDiff(ruleSet, variables); 96 if (temp.length() != 0) temp = "Defined but not used: " + temp; 97 if (msg.length() == 0) msg = temp; 98 else if (temp.length() != 0) { 99 msg = msg + "; " + temp; 100 } 101 error(msg); 102 } 103 104 // replace variables by definitions 105 Iterator it = ruleSet.iterator(); 106 while (it.hasNext()) { 107 String key = (String) it.next(); 108 Pick expression = (Pick) map.get(key); 109 Iterator it2 = ruleSet.iterator(); 110 if (false && key.equals("$crlf")) { 111 System.out.println("debug") ; 112 } 113 while (it2.hasNext()) { 114 Object key2 = it2.next(); 115 if (key.equals(key2)) continue; 116 Pick expression2 = (Pick) map.get(key2); 117 expression2.replace(key, expression); 118 } 119 } 120 pick = (Pick) map.get("$root"); 121 target = Pick.Target.make(pick, random, quoter); 122 // TODO remove temp collections 123 return this; 124 } 125 126 String showDiff(Set a, Set b) { 127 Set temp = new HashSet(); 128 temp.addAll(a); 129 temp.removeAll(b); 130 if (temp.size() == 0) return ""; 131 StringBuffer buffer = new StringBuffer(); 132 Iterator it = temp.iterator(); 133 while (it.hasNext()) { 134 if (buffer.length() != 0) buffer.append(", "); 135 buffer.append(it.next().toString()); 136 } 137 return buffer.toString(); 138 } 139 140 void error(String msg) { 141 throw new IllegalArgumentException(msg 142 + "\r\n" + t.toString()); 143 } 144 145 private boolean addRule() { 146 int type = t.next(); 147 if (type == Tokenizer.DONE) return false; 148 if (type != Tokenizer.STRING) error("missing weight"); 149 String s = t.getString(); 150 if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable"); 151 if (t.next() != '=') error("missing ="); 152 int startBody = t.index; 153 Pick rule = getAlternation(); 154 if (rule == null) error("missing expression"); 155 t.addSymbol(s, t.getSource(), startBody, t.index); 156 if (t.next() != ';') error("missing ;"); 157 return addPick(s, rule); 158 } 159 160 protected boolean addPick(String s, Pick rule) { 161 Object temp = map.get(s); 162 if (temp != null) error("duplicate variable"); 163 if (rule.name == null) rule.name(s); 164 map.put(s, rule); 165 return true; 166 } 167 168 public BNF addSet(String variable, UnicodeSet set) { 169 if (set != null) { 170 String body = set.toString(); 171 t.addSymbol(variable, body, 0, body.length()); 172 addPick(variable, Pick.codePoint(set)); 173 } 174 return this; 175 } 176 177 int maxRepeat = 99; 178 179 Pick qualify(Pick item) { 180 int[] weights; 181 int type = t.next(); 182 switch(type) { 183 case '@': 184 return new Pick.Quote(item); 185 case '~': 186 return new Pick.Morph(item); 187 case '?': 188 int weight = getWeight(); 189 if (weight == NO_WEIGHT) weight = 50; 190 weights = new int[] {100-weight, weight}; 191 return Pick.repeat(0, 1, weights, item); 192 case '*': 193 weights = getWeights(); 194 return Pick.repeat(1, maxRepeat, weights, item); 195 case '+': 196 weights = getWeights(); 197 return Pick.repeat(1, maxRepeat, weights, item); 198 case '{': 199 if (t.next() != Tokenizer.NUMBER) error("missing number"); 200 int start = (int) t.getNumber(); 201 int end = start; 202 type = t.next(); 203 if (type == ',') { 204 end = maxRepeat; 205 type = t.next(); 206 if (type == Tokenizer.NUMBER) { 207 end = (int)t.getNumber(); 208 type = t.next(); 209 } 210 } 211 if (type != '}') error("missing }"); 212 weights = getWeights(); 213 return Pick.repeat(start, end, weights, item); 214 } 215 t.backup(); 216 return item; 217 } 218 219 Pick getCore() { 220 int token = t.next(); 221 if (token == Tokenizer.STRING) { 222 String s = t.getString(); 223 if (s.charAt(0) == '$') variables.add(s); 224 return Pick.string(s); 225 } 226 if (token == Tokenizer.UNICODESET) { 227 return Pick.codePoint(t.getUnicodeSet()); 228 } 229 if (token != '(') { 230 t.backup(); 231 return null; 232 } 233 Pick temp = getAlternation(); 234 token = t.next(); 235 if (token != ')') error("missing )"); 236 return temp; 237 } 238 239 Pick getSequence() { 240 Pick.Sequence result = null; 241 Pick last = null; 242 while (true) { 243 Pick item = getCore(); 244 if (item == null) { 245 if (result != null) return result; 246 if (last != null) return last; 247 error("missing item"); 248 } 249 // qualify it as many times as possible 250 Pick oldItem; 251 do { 252 oldItem = item; 253 item = qualify(item); 254 } while (item != oldItem); 255 // add it in 256 if (last == null) { 257 last = item; 258 } else { 259 if (result == null) result = Pick.makeSequence().and2(last); 260 result = result.and2(item); 261 } 262 } 263 } 264 265 // for simplicity, we just use recursive descent 266 Pick getAlternation() { 267 Pick.Alternation result = null; 268 Pick last = null; 269 int lastWeight = NO_WEIGHT; 270 while (true) { 271 Pick temp = getSequence(); 272 if (temp == null) error("empty alternation"); 273 int weight = getWeight(); 274 if (weight == NO_WEIGHT) weight = 1; 275 if (last == null) { 276 last = temp; 277 lastWeight = weight; 278 } else { 279 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); 280 result = result.or2(weight, temp); 281 } 282 int token = t.next(); 283 if (token != '|') { 284 t.backup(); 285 if (result != null) return result; 286 if (last != null) return last; 287 } 288 } 289 } 290 291 private static final int NO_WEIGHT = Integer.MIN_VALUE; 292 293 int getWeight() { 294 int weight; 295 int token = t.next(); 296 if (token != Tokenizer.NUMBER) { 297 t.backup(); 298 return NO_WEIGHT; 299 } 300 weight = (int)t.getNumber(); 301 token = t.next(); 302 if (token != '%') error("missing %"); 303 return weight; 304 } 305 306 int[] getWeights() { 307 ArrayList list = new ArrayList(); 308 while (true) { 309 int weight = getWeight(); 310 if (weight == NO_WEIGHT) break; 311 list.add(new Integer(weight)); 312 } 313 if (list.size() == 0) return null; 314 int[] result = new int[list.size()]; 315 for (int i = 0; i < list.size(); ++i) { 316 result[i] = ((Integer)list.get(i)).intValue(); 317 } 318 return result; 319 } 320 321 public int getMaxRepeat() { 322 return maxRepeat; 323 } 324 325 public BNF setMaxRepeat(int maxRepeat) { 326 this.maxRepeat = maxRepeat; 327 return this; 328 } 329} 330