1/* 2********************************************************************** 3* Copyright (c) 2002-2010, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Author: Mark Davis 7********************************************************************** 8*/ 9package com.ibm.icu.dev.test.cldr; 10 11import java.io.File; 12import java.io.IOException; 13import java.io.PrintWriter; 14import java.io.StringWriter; 15import java.text.ParseException; 16import java.util.ArrayList; 17import java.util.Collection; 18import java.util.Date; 19import java.util.HashMap; 20import java.util.Iterator; 21import java.util.List; 22import java.util.Map; 23import java.util.Set; 24import java.util.TreeMap; 25import java.util.TreeSet; 26import java.util.regex.Matcher; 27import java.util.regex.Pattern; 28 29import javax.xml.parsers.SAXParser; 30import javax.xml.parsers.SAXParserFactory; 31 32import org.xml.sax.Attributes; 33import org.xml.sax.SAXException; 34import org.xml.sax.helpers.DefaultHandler; 35 36import com.ibm.icu.dev.test.TestFmwk; 37import com.ibm.icu.text.DateFormat; 38import com.ibm.icu.text.NumberFormat; 39import com.ibm.icu.text.SimpleDateFormat; 40import com.ibm.icu.text.UTF16; 41import com.ibm.icu.text.UnicodeSet; 42import com.ibm.icu.util.Currency; 43import com.ibm.icu.util.TimeZone; 44import com.ibm.icu.util.ULocale; 45 46/** 47 * This is a test file that takes in the CLDR XML test files and test against 48 * ICU4J. This test file is used to verify that ICU4J is implemented correctly. 49 * As it stands, the test generates all the errors to the console by logging it. 50 * The logging is only possible if "-v" or verbose is set as an argument. 51 * This will allow users to know what problems occurred within CLDR and ICU. 52 * Collator was disabled in this test file and therefore will be skipped. 53 * 54 * Instructions: 55 * 1) In order for this to work correctly, you must download the latest CLDR data 56 * in the form of XML. You must also set the CLDR directory using: 57 * -DCLDR_DIRECTORY=<top level of cldr> 58 * 2) You may also consider increasing the memory using -Xmx512m. 59 * 3) For speed purposes, you may consider creating a temporary directory for the 60 * CLDR cache using: 61 * -DCLDR_DTD_CACHE=<cldr cache directory> 62 * 4) You may use other environment variables to narrow down your tests using: 63 * -DXML_MATCH=".*" 64 * -DXML_MATCH="de.*" (or whatever regex you want) to just test certain locales. 65 * -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc. 66 * -DZONE_MATCH="(?!America/Argentina).*" 67 * -DZONE_MATCH=".*Moscow.*" (to only test certain zones) 68 69 * @author medavis 70 * @author John Huan Vu (johnvu@us.ibm.com) 71 */ 72public class TestCLDRVsICU extends TestFmwk { 73 static final boolean DEBUG = false; 74 75 // ULocale uLocale = ULocale.ENGLISH; 76 // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere 77 // static PrintWriter log; 78 SAXParser SAX; 79 static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH; 80 static String CLDR_DIRECTORY; 81 static { 82 System.out.println(); 83 LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*"); 84 TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*"); 85 ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); 86 87 // CLDR_DIRECTORY is where all the CLDR XML test files are located 88 // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT 89 CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\"); 90 System.out.println(); 91 } 92 93 private static Matcher getEnvironmentRegex(String key, String defaultValue) { 94 return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher(""); 95 } 96 97 private static String getEnvironmentString(String key, String defaultValue) { 98 String temp = System.getProperty(key); 99 if (temp == null) 100 temp = defaultValue; 101 else 102 System.out.print("-D" + key + "=\"" + temp + "\" "); 103 return temp; 104 } 105 106 public static void main(String[] args) throws Exception { 107 new TestCLDRVsICU().run(args); 108 } 109 110 Set allLocales = new TreeSet(); 111 112 public void TestFiles() throws SAXException, IOException { 113 // only get ICU's locales 114 Set s = new TreeSet(); 115 addLocales(NumberFormat.getAvailableULocales(), s); 116 addLocales(DateFormat.getAvailableULocales(), s); 117 118 // johnvu: Collator was originally disabled 119 // addLocales(Collator.getAvailableULocales(), s); 120 121 // filter, to make tracking down bugs easier 122 for (Iterator it = s.iterator(); it.hasNext();) { 123 String locale = (String) it.next(); 124 if (!LOCALE_MATCH.reset(locale).matches()) 125 continue; 126 _test(locale); 127 } 128 } 129 130 public void addLocales(ULocale[] list, Collection s) { 131 for (int i = 0; i < list.length; ++i) { 132 allLocales.add(list[i].toString()); 133 s.add(list[i].getLanguage()); 134 } 135 } 136 137 public String getLanguage(ULocale uLocale) { 138 String result = uLocale.getLanguage(); 139 String script = uLocale.getScript(); 140 if (script.length() != 0) 141 result += "_" + script; 142 return result; 143 } 144 145 public void _test(String localeName) throws SAXException, IOException { 146 // uLocale = new ULocale(localeName); 147 // oLocale = uLocale.toLocale(); 148 149 File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml"); 150 logln("Testing " + f.getCanonicalPath()); 151 SAX.parse(f, DEFAULT_HANDLER); 152 } 153 154 private static class ToHex { 155 public String transliterate(String in) { 156 StringBuilder sb = new StringBuilder(); 157 for (int i = 0; i < in.length(); ++i) { 158 char c = in.charAt(i); 159 sb.append("\\u"); 160 if (c < 1000) { 161 sb.append('0'); 162 if (c < 100) { 163 sb.append('0'); 164 if (c < 10) { 165 sb.append('0'); 166 } 167 } 168 } 169 sb.append(Integer.toHexString((int) c)); 170 } 171 return sb.toString(); 172 } 173 } 174 175 // static Transliterator toUnicode = Transliterator.getInstance("any-hex"); 176 private static final ToHex toUnicode = new ToHex(); 177 178 static public String showString(String in) { 179 return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")"; 180 } 181 182 // ============ SAX Handler Infrastructure ============ 183 184 abstract public class Handler { 185 Map settings = new TreeMap(); 186 String name; 187 List currentLocales = new ArrayList(); 188 int failures = 0; 189 190 void setName(String name) { 191 this.name = name; 192 } 193 194 void set(String attributeName, String attributeValue) { 195 // if (DEBUG) logln(attributeName + " => " + attributeValue); 196 settings.put(attributeName, attributeValue); 197 } 198 199 void checkResult(String value) { 200 if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) { 201 return; // skip draft 202 } 203 ULocale ul = new ULocale("xx"); 204 try { 205 for (int i = 0; i < currentLocales.size(); ++i) { 206 ul = (ULocale) currentLocales.get(i); 207 // loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name); 208 handleResult(ul, value); 209 if (failures != 0) { 210 errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) 211 + ")"); 212 failures = 0; 213 } 214 } 215 } catch (Exception e) { 216 StringWriter sw = new StringWriter(); 217 PrintWriter pw = new PrintWriter(sw); 218 e.printStackTrace(pw); 219 pw.flush(); 220 errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString()); 221 } 222 } 223 224 public void loglnSAX(String message) { 225 String temp = message + "\t[" + name; 226 for (Iterator it = settings.keySet().iterator(); it.hasNext();) { 227 String attributeName = (String) it.next(); 228 String attributeValue = (String) settings.get(attributeName); 229 temp += " " + attributeName + "=<" + attributeValue + ">"; 230 } 231 logln(temp + "]"); 232 } 233 234 int lookupValue(Object x, Object[] list) { 235 for (int i = 0; i < list.length; ++i) { 236 if (x.equals(list[i])) 237 return i; 238 } 239 loglnSAX("Unknown String: " + x); 240 return -1; 241 } 242 243 abstract void handleResult(ULocale currentLocale, String value) throws Exception; 244 245 /** 246 * @param attributes 247 */ 248 public void setAttributes(Attributes attributes) { 249 String localeList = attributes.getValue("locales"); 250 String[] currentLocaleString = new String[50]; 251 com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString); 252 currentLocales.clear(); 253 for (int i = 0; i < currentLocaleString.length; ++i) { 254 if (currentLocaleString[i].length() == 0) 255 continue; 256 if (allLocales.contains("")) { 257 logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]); 258 continue; 259 } 260 currentLocales.add(new ULocale(currentLocaleString[i])); 261 } 262 if (DEBUG) 263 logln("Setting locales: " + currentLocales); 264 } 265 } 266 267 public Handler getHandler(String name, Attributes attributes) { 268 if (DEBUG) 269 logln("Creating Handler: " + name); 270 Handler result = (Handler) RegisteredHandlers.get(name); 271 if (result == null) 272 logln("Unexpected test type: " + name); 273 else { 274 result.setAttributes(attributes); 275 } 276 return result; 277 } 278 279 public void addHandler(String name, Handler handler) { 280 if (!TEST_MATCH.reset(name).matches()) 281 handler = new NullHandler(); 282 handler.setName(name); 283 RegisteredHandlers.put(name, handler); 284 } 285 286 Map RegisteredHandlers = new HashMap(); 287 288 class NullHandler extends Handler { 289 void handleResult(ULocale currentLocale, String value) throws Exception { 290 } 291 } 292 293 // ============ Statics for Date/Number Support ============ 294 295 static TimeZone utc = TimeZone.getTimeZone("GMT"); 296 static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); 297 { 298 iso.setTimeZone(utc); 299 } 300 301 static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL }; 302 303 // The following are different data format types that are part of the parameters in CLDR 304 static String[] DateFormatNames = { "none", "short", "medium", "long", "full" }; 305 306 // The following are different number types that are part of the parameters in CLDR 307 static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" }; 308 309 310 // ============ Handler for Collation ============ 311 static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]"); 312 313 static String remove(String in, UnicodeSet toRemove) { 314 int cp; 315 StringBuffer result = new StringBuffer(); 316 for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) { 317 cp = UTF16.charAt(in, i); 318 if (!toRemove.contains(cp)) 319 UTF16.append(result, cp); 320 } 321 return result.toString(); 322 } 323 324 { 325 // johnvu: Collator was originally disabled 326 // TODO (dougfelt) move this test 327 /* 328 addHandler("collation", new Handler() { 329 public void handleResult(ULocale currentLocale, String value) { 330 Collator col = Collator.getInstance(currentLocale); 331 String lastLine = ""; 332 int count = 0; 333 for (int pos = 0; pos < value.length();) { 334 int nextPos = value.indexOf('\n', pos); 335 if (nextPos < 0) 336 nextPos = value.length(); 337 String line = value.substring(pos, nextPos); 338 line = remove(line, controlsAndSpace); HACK for SAX 339 if (line.trim().length() != 0) { HACK for SAX 340 int comp = col.compare(lastLine, line); 341 if (comp > 0) { 342 failures++; 343 errln("\tLine " + (count + 1) + "\tFailure: " 344 + showString(lastLine) + " should be leq " 345 + showString(line)); 346 } else if (DEBUG) { 347 logln("OK: " + line); 348 } 349 lastLine = line; 350 } 351 pos = nextPos + 1; 352 count++; 353 } 354 } 355 }); 356 */ 357 358 // ============ Handler for Numbers ============ 359 addHandler("number", new Handler() { 360 public void handleResult(ULocale locale, String result) { 361 NumberFormat nf = null; 362 double v = Double.NaN; 363 for (Iterator it = settings.keySet().iterator(); it.hasNext();) { 364 String attributeName = (String) it.next(); 365 String attributeValue = (String) settings.get(attributeName); 366 367 // Checks if the attribute name is a draft and whether 368 // or not it has been approved / contributed by CLDR yet 369 // otherwise, skips it because it is most likely rejected by ICU 370 if (attributeName.equals("draft")) { 371 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) { 372 break; 373 } 374 continue; 375 } 376 377 // Update the value to be checked 378 if (attributeName.equals("input")) { 379 v = Double.parseDouble(attributeValue); 380 continue; 381 } 382 383 // At this point, it must be a numberType 384 int index = lookupValue(attributeValue, NumberNames); 385 386 if (DEBUG) 387 logln("Getting number format for " + locale); 388 switch (index) { 389 case 0: 390 nf = NumberFormat.getInstance(locale); 391 break; 392 case 1: 393 nf = NumberFormat.getIntegerInstance(locale); 394 break; 395 case 2: 396 nf = NumberFormat.getNumberInstance(locale); 397 break; 398 case 3: 399 nf = NumberFormat.getPercentInstance(locale); 400 break; 401 case 4: 402 nf = NumberFormat.getScientificInstance(locale); 403 break; 404 default: 405 nf = NumberFormat.getCurrencyInstance(locale); 406 nf.setCurrency(Currency.getInstance(attributeValue)); 407 break; 408 } 409 String temp = nf.format(v).trim(); 410 result = result.trim(); // HACK because of SAX 411 if (!temp.equals(result)) { 412 logln("Number: Locale: " + locale + 413 "\n\tType: " + attributeValue + 414 "\n\tDraft: " + settings.get("draft") + 415 "\n\tCLDR: <" + result + ">" + 416 "\n\tICU: <" + temp + ">"); 417 } 418 419 } 420 } 421 }); 422 423 // ============ Handler for Dates ============ 424 addHandler("date", new Handler() { 425 public void handleResult(ULocale locale, String result) throws ParseException { 426 int dateFormat = 0; 427 int timeFormat = 0; 428 Date date = new Date(); 429 boolean approved = true; 430 431 for (Iterator it = settings.keySet().iterator(); it.hasNext();) { 432 String attributeName = (String) it.next(); 433 String attributeValue = (String) settings.get(attributeName); 434 435 // Checks if the attribute name is a draft and whether 436 // or not it has been approved / contributed by CLDR yet 437 // otherwise, skips it because it is most likely rejected by ICU 438 if (attributeName.equals("draft")) { 439 if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) { 440 approved = false; 441 break; 442 } 443 continue; 444 } 445 446 // Update the value to be checked 447 if (attributeName.equals("input")) { 448 date = iso.parse(attributeValue); 449 continue; 450 } 451 // At this point, it must be either dateType or timeType 452 int index = lookupValue(attributeValue, DateFormatNames); 453 if (attributeName.equals("dateType")) 454 dateFormat = index; 455 else if (attributeName.equals("timeType")) 456 timeFormat = index; 457 458 } 459 460 // The attribute value must be approved in order to be checked, 461 // if it hasn't been approved, it shouldn't be checked if it 462 // matches with ICU 463 if (approved) { 464 SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat); 465 dt.setTimeZone(utc); 466 String temp = dt.format(date).trim(); 467 result = result.trim(); // HACK because of SAX 468 if (!temp.equals(result)) { 469 logln("DateTime: Locale: " + locale + 470 "\n\tDate: " + DateFormatNames[dateFormat] + 471 "\n\tTime: " + DateFormatNames[timeFormat] + 472 "\n\tDraft: " + settings.get("draft") + 473 "\n\tCLDR: <" + result + "> " + 474 "\n\tICU: <" + temp + ">"); 475 } 476 } 477 } 478 479 private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) { 480 if (DEBUG) 481 logln("Getting date/time format for " + locale); 482 if (DEBUG && "ar_EG".equals(locale.toString())) { 483 logln("debug here"); 484 } 485 DateFormat dt; 486 if (dateFormat == 0) { 487 dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale); 488 if (DEBUG) 489 System.out.print("getTimeInstance"); 490 } else if (timeFormat == 0) { 491 dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale); 492 if (DEBUG) 493 System.out.print("getDateInstance"); 494 } else { 495 dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], 496 locale); 497 if (DEBUG) 498 System.out.print("getDateTimeInstance"); 499 } 500 if (DEBUG) 501 logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern()); 502 return (SimpleDateFormat) dt; 503 } 504 }); 505 506 // ============ Handler for Zones ============ 507 addHandler("zoneFields", new Handler() { 508 String date = ""; 509 String zone = ""; 510 String parse = ""; 511 String pattern = ""; 512 513 public void handleResult(ULocale locale, String result) throws ParseException { 514 for (Iterator it = settings.keySet().iterator(); it.hasNext();) { 515 String attributeName = (String) it.next(); 516 String attributeValue = (String) settings.get(attributeName); 517 if (attributeName.equals("date")) { 518 date = attributeValue; 519 } else if (attributeName.equals("field")) { 520 pattern = attributeValue; 521 } else if (attributeName.equals("zone")) { 522 zone = attributeValue; 523 } else if (attributeName.equals("parse")) { 524 parse = attributeValue; 525 } 526 } 527 528 if (!ZONE_MATCH.reset(zone).matches()) return; 529 Date dateValue = iso.parse(date); 530 SimpleDateFormat field = new SimpleDateFormat(pattern, locale); 531 field.setTimeZone(TimeZone.getTimeZone(zone)); 532 String temp = field.format(dateValue).trim(); 533 // SKIP PARSE FOR NOW 534 result = result.trim(); // HACK because of SAX 535 if (!temp.equals(result)) { 536 temp = field.format(dateValue).trim(); // call again for debugging 537 logln("Zone Format: Locale: " + locale 538 + "\n\tZone: " + zone 539 + "\n\tDate: " + date 540 + "\n\tField: " + pattern 541 + "\n\tParse: " + parse 542 + "\n\tDraft: " + settings.get("draft") 543 + "\n\tCLDR: <" + result 544 + ">\n\tICU: <" + temp + ">"); 545 } 546 } 547 }); 548 } 549 550 // ============ Gorp for SAX ============ 551 552 { 553 try { 554 SAXParserFactory factory = SAXParserFactory.newInstance(); 555 factory.setValidating(true); 556 SAX = factory.newSAXParser(); 557 } catch (Exception e) { 558 throw new IllegalArgumentException("SAXParserFacotry was unable to start."); 559 } 560 } 561 562 DefaultHandler DEFAULT_HANDLER = new DefaultHandler() { 563 static final boolean DEBUG = false; 564 StringBuffer lastChars = new StringBuffer(); 565 // boolean justPopped = false; 566 Handler handler; 567 568 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 569 // data.put(new ContextStack(contextStack), lastChars); 570 // lastChars = ""; 571 try { 572 if (qName.equals("cldrTest")) { 573 // skip 574 } else if (qName.equals("result") && handler != null) { 575 for (int i = 0; i < attributes.getLength(); ++i) { 576 handler.set(attributes.getQName(i), attributes.getValue(i)); 577 } 578 } else { 579 handler = getHandler(qName, attributes); 580 // handler.set("locale", uLocale.toString()); 581 } 582 // if (DEBUG) logln("startElement:\t" + contextStack); 583 // justPopped = false; 584 } catch (RuntimeException e) { 585 e.printStackTrace(); 586 throw e; 587 } 588 } 589 590 public void endElement(String uri, String localName, String qName) throws SAXException { 591 try { 592 // if (DEBUG) logln("endElement:\t" + contextStack); 593 if (qName.equals("result") && handler != null) { 594 handler.checkResult(lastChars.toString()); 595 } else if (qName.length() != 0) { 596 // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">"); 597 } 598 lastChars.setLength(0); 599 // justPopped = true; 600 } catch (RuntimeException e) { 601 e.printStackTrace(); 602 throw e; 603 } 604 } 605 606 // Have to hack around the fact that the character data might be in pieces 607 public void characters(char[] ch, int start, int length) throws SAXException { 608 try { 609 String value = new String(ch, start, length); 610 if (DEBUG) 611 logln("characters:\t" + value); 612 lastChars.append(value); 613 // justPopped = false; 614 } catch (RuntimeException e) { 615 e.printStackTrace(); 616 throw e; 617 } 618 } 619 620 // just for debugging 621 622 public void notationDecl(String name, String publicId, String systemId) throws SAXException { 623 logln("notationDecl: " + name + ", " + publicId + ", " + systemId); 624 } 625 626 public void processingInstruction(String target, String data) throws SAXException { 627 logln("processingInstruction: " + target + ", " + data); 628 } 629 630 public void skippedEntity(String name) throws SAXException { 631 logln("skippedEntity: " + name); 632 } 633 634 public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) 635 throws SAXException { 636 logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName); 637 } 638 }; 639} 640