1/*
2**********************************************************************
3* Copyright (c) 2002-2010, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Mark Davis
7**********************************************************************
8*/
9package com.ibm.icu.dev.test.cldr;
10
11import java.io.File;
12import java.io.IOException;
13import java.io.PrintWriter;
14import java.io.StringWriter;
15import java.text.ParseException;
16import java.util.ArrayList;
17import java.util.Collection;
18import java.util.Date;
19import java.util.HashMap;
20import java.util.Iterator;
21import java.util.List;
22import java.util.Map;
23import java.util.Set;
24import java.util.TreeMap;
25import java.util.TreeSet;
26import java.util.regex.Matcher;
27import java.util.regex.Pattern;
28
29import javax.xml.parsers.SAXParser;
30import javax.xml.parsers.SAXParserFactory;
31
32import org.xml.sax.Attributes;
33import org.xml.sax.SAXException;
34import org.xml.sax.helpers.DefaultHandler;
35
36import com.ibm.icu.dev.test.TestFmwk;
37import com.ibm.icu.text.DateFormat;
38import com.ibm.icu.text.NumberFormat;
39import com.ibm.icu.text.SimpleDateFormat;
40import com.ibm.icu.text.UTF16;
41import com.ibm.icu.text.UnicodeSet;
42import com.ibm.icu.util.Currency;
43import com.ibm.icu.util.TimeZone;
44import com.ibm.icu.util.ULocale;
45
46/**
47 * This is a test file that takes in the CLDR XML test files and test against
48 * ICU4J. This test file is used to verify that ICU4J is implemented correctly.
49 * As it stands, the test generates all the errors to the console by logging it.
50 * The logging is only possible if "-v" or verbose is set as an argument.
51 * This will allow users to know what problems occurred within CLDR and ICU.
52 * Collator was disabled in this test file and therefore will be skipped.
53 *
54 * Instructions:
55 * 1)   In order for this to work correctly, you must download the latest CLDR data
56 *      in the form of XML. You must also set the CLDR directory using:
57 *          -DCLDR_DIRECTORY=<top level of cldr>
58 * 2)   You may also consider increasing the memory using -Xmx512m.
59 * 3)   For speed purposes, you may consider creating a temporary directory for the
60 *      CLDR cache using:
61 *          -DCLDR_DTD_CACHE=<cldr cache directory>
62 * 4)   You may use other environment variables to narrow down your tests using:
63 *          -DXML_MATCH=".*"
64 *              -DXML_MATCH="de.*"  (or whatever regex you want) to just test certain locales.
65 *          -DTEST_MATCH="zone.*"   (or whatever regex you want) to just test collation, numbers, etc.
66 *          -DZONE_MATCH="(?!America/Argentina).*"
67 *              -DZONE_MATCH=".*Moscow.*" (to only test certain zones)
68
69 * @author medavis
70 * @author John Huan Vu (johnvu@us.ibm.com)
71 */
72public class TestCLDRVsICU extends TestFmwk {
73    static final boolean DEBUG = false;
74
75    // ULocale uLocale = ULocale.ENGLISH;
76    // Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
77    // static PrintWriter log;
78    SAXParser SAX;
79    static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH;
80    static String CLDR_DIRECTORY;
81    static {
82        System.out.println();
83        LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*");
84        TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*");
85        ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*");
86
87        // CLDR_DIRECTORY is where all the CLDR XML test files are located
88        // WARNING: THIS IS TEMPORARY DIRECTORY UNTIL THE FILES ARE STRAIGHTENED OUT
89        CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\");
90        System.out.println();
91    }
92
93    private static Matcher getEnvironmentRegex(String key, String defaultValue) {
94        return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher("");
95    }
96
97    private static String getEnvironmentString(String key, String defaultValue) {
98        String temp = System.getProperty(key);
99        if (temp == null)
100            temp = defaultValue;
101        else
102            System.out.print("-D" + key + "=\"" + temp + "\" ");
103        return temp;
104    }
105
106    public static void main(String[] args) throws Exception {
107        new TestCLDRVsICU().run(args);
108    }
109
110    Set allLocales = new TreeSet();
111
112    public void TestFiles() throws SAXException, IOException {
113        // only get ICU's locales
114        Set s = new TreeSet();
115        addLocales(NumberFormat.getAvailableULocales(), s);
116        addLocales(DateFormat.getAvailableULocales(), s);
117
118        // johnvu: Collator was originally disabled
119        // addLocales(Collator.getAvailableULocales(), s);
120
121        // filter, to make tracking down bugs easier
122        for (Iterator it = s.iterator(); it.hasNext();) {
123            String locale = (String) it.next();
124            if (!LOCALE_MATCH.reset(locale).matches())
125                continue;
126            _test(locale);
127        }
128    }
129
130    public void addLocales(ULocale[] list, Collection s) {
131        for (int i = 0; i < list.length; ++i) {
132            allLocales.add(list[i].toString());
133            s.add(list[i].getLanguage());
134        }
135    }
136
137    public String getLanguage(ULocale uLocale) {
138        String result = uLocale.getLanguage();
139        String script = uLocale.getScript();
140        if (script.length() != 0)
141            result += "_" + script;
142        return result;
143    }
144
145    public void _test(String localeName) throws SAXException, IOException {
146        // uLocale = new ULocale(localeName);
147        // oLocale = uLocale.toLocale();
148
149        File f = new File(CLDR_DIRECTORY, "test/" + localeName + ".xml");
150        logln("Testing " + f.getCanonicalPath());
151        SAX.parse(f, DEFAULT_HANDLER);
152    }
153
154    private static class ToHex {
155        public String transliterate(String in) {
156            StringBuilder sb = new StringBuilder();
157            for (int i = 0; i < in.length(); ++i) {
158                char c = in.charAt(i);
159                sb.append("\\u");
160                if (c < 1000) {
161                    sb.append('0');
162                    if (c < 100) {
163                        sb.append('0');
164                        if (c < 10) {
165                            sb.append('0');
166                        }
167                    }
168                }
169                sb.append(Integer.toHexString((int) c));
170            }
171            return sb.toString();
172        }
173    }
174
175    // static Transliterator toUnicode = Transliterator.getInstance("any-hex");
176    private static final ToHex toUnicode = new ToHex();
177
178    static public String showString(String in) {
179        return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
180    }
181
182    // ============ SAX Handler Infrastructure ============
183
184    abstract public class Handler {
185        Map settings = new TreeMap();
186        String name;
187        List currentLocales = new ArrayList();
188        int failures = 0;
189
190        void setName(String name) {
191            this.name = name;
192        }
193
194        void set(String attributeName, String attributeValue) {
195            // if (DEBUG) logln(attributeName + " => " + attributeValue);
196            settings.put(attributeName, attributeValue);
197        }
198
199        void checkResult(String value) {
200            if (settings.get("draft").equals("unconfirmed") || settings.get("draft").equals("provisional")) {
201                return; // skip draft
202            }
203            ULocale ul = new ULocale("xx");
204            try {
205                for (int i = 0; i < currentLocales.size(); ++i) {
206                    ul = (ULocale) currentLocales.get(i);
207                    // loglnSAX("  Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
208                    handleResult(ul, value);
209                    if (failures != 0) {
210                        errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH)
211                                + ")");
212                        failures = 0;
213                    }
214                }
215            } catch (Exception e) {
216                StringWriter sw = new StringWriter();
217                PrintWriter pw = new PrintWriter(sw);
218                e.printStackTrace(pw);
219                pw.flush();
220                errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString());
221            }
222        }
223
224        public void loglnSAX(String message) {
225            String temp = message + "\t[" + name;
226            for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
227                String attributeName = (String) it.next();
228                String attributeValue = (String) settings.get(attributeName);
229                temp += " " + attributeName + "=<" + attributeValue + ">";
230            }
231            logln(temp + "]");
232        }
233
234        int lookupValue(Object x, Object[] list) {
235            for (int i = 0; i < list.length; ++i) {
236                if (x.equals(list[i]))
237                    return i;
238            }
239            loglnSAX("Unknown String: " + x);
240            return -1;
241        }
242
243        abstract void handleResult(ULocale currentLocale, String value) throws Exception;
244
245        /**
246         * @param attributes
247         */
248        public void setAttributes(Attributes attributes) {
249            String localeList = attributes.getValue("locales");
250            String[] currentLocaleString = new String[50];
251            com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString);
252            currentLocales.clear();
253            for (int i = 0; i < currentLocaleString.length; ++i) {
254                if (currentLocaleString[i].length() == 0)
255                    continue;
256                if (allLocales.contains("")) {
257                    logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]);
258                    continue;
259                }
260                currentLocales.add(new ULocale(currentLocaleString[i]));
261            }
262            if (DEBUG)
263                logln("Setting locales: " + currentLocales);
264        }
265    }
266
267    public Handler getHandler(String name, Attributes attributes) {
268        if (DEBUG)
269            logln("Creating Handler: " + name);
270        Handler result = (Handler) RegisteredHandlers.get(name);
271        if (result == null)
272            logln("Unexpected test type: " + name);
273        else {
274            result.setAttributes(attributes);
275        }
276        return result;
277    }
278
279    public void addHandler(String name, Handler handler) {
280        if (!TEST_MATCH.reset(name).matches())
281            handler = new NullHandler();
282        handler.setName(name);
283        RegisteredHandlers.put(name, handler);
284    }
285
286    Map RegisteredHandlers = new HashMap();
287
288    class NullHandler extends Handler {
289        void handleResult(ULocale currentLocale, String value) throws Exception {
290        }
291    }
292
293    // ============ Statics for Date/Number Support ============
294
295    static TimeZone utc = TimeZone.getTimeZone("GMT");
296    static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
297    {
298        iso.setTimeZone(utc);
299    }
300
301    static int[] DateFormatValues = { -1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL };
302
303    // The following are different data format types that are part of the parameters in CLDR
304    static String[] DateFormatNames = { "none", "short", "medium", "long", "full" };
305
306    // The following are different number types that are part of the parameters in CLDR
307    static String[] NumberNames = { "standard", "integer", "decimal", "percent", "scientific", "GBP" };
308
309
310    // ============ Handler for Collation ============
311    static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
312
313    static String remove(String in, UnicodeSet toRemove) {
314        int cp;
315        StringBuffer result = new StringBuffer();
316        for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
317            cp = UTF16.charAt(in, i);
318            if (!toRemove.contains(cp))
319                UTF16.append(result, cp);
320        }
321        return result.toString();
322    }
323
324    {
325        // johnvu: Collator was originally disabled
326        // TODO (dougfelt) move this test
327        /*
328          addHandler("collation", new Handler() {
329             public void handleResult(ULocale currentLocale, String value) {
330                 Collator col = Collator.getInstance(currentLocale);
331                 String lastLine = "";
332                 int count = 0;
333                 for (int pos = 0; pos < value.length();) {
334                     int nextPos = value.indexOf('\n', pos);
335                     if (nextPos < 0)
336                         nextPos = value.length();
337                     String line = value.substring(pos, nextPos);
338                     line = remove(line, controlsAndSpace);  HACK for SAX
339                     if (line.trim().length() != 0) {  HACK for SAX
340                         int comp = col.compare(lastLine, line);
341                         if (comp > 0) {
342                             failures++;
343                             errln("\tLine " + (count + 1) + "\tFailure: "
344                                     + showString(lastLine) + " should be leq "
345                                     + showString(line));
346                         } else if (DEBUG) {
347                             logln("OK: " + line);
348                         }
349                         lastLine = line;
350                     }
351                     pos = nextPos + 1;
352                     count++;
353                 }
354             }
355         });
356        */
357
358        // ============ Handler for Numbers ============
359        addHandler("number", new Handler() {
360            public void handleResult(ULocale locale, String result) {
361                NumberFormat nf = null;
362                double v = Double.NaN;
363                for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
364                    String attributeName = (String) it.next();
365                    String attributeValue = (String) settings.get(attributeName);
366
367                    // Checks if the attribute name is a draft and whether
368                    // or not it has been approved / contributed by CLDR yet
369                    // otherwise, skips it because it is most likely rejected by ICU
370                    if (attributeName.equals("draft")) {
371                        if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
372                            break;
373                        }
374                        continue;
375                    }
376
377                    // Update the value to be checked
378                    if (attributeName.equals("input")) {
379                        v = Double.parseDouble(attributeValue);
380                        continue;
381                    }
382
383                    // At this point, it must be a numberType
384                    int index = lookupValue(attributeValue, NumberNames);
385
386                    if (DEBUG)
387                        logln("Getting number format for " + locale);
388                    switch (index) {
389                    case 0:
390                        nf = NumberFormat.getInstance(locale);
391                        break;
392                    case 1:
393                        nf = NumberFormat.getIntegerInstance(locale);
394                        break;
395                    case 2:
396                        nf = NumberFormat.getNumberInstance(locale);
397                        break;
398                    case 3:
399                        nf = NumberFormat.getPercentInstance(locale);
400                        break;
401                    case 4:
402                        nf = NumberFormat.getScientificInstance(locale);
403                        break;
404                    default:
405                        nf = NumberFormat.getCurrencyInstance(locale);
406                        nf.setCurrency(Currency.getInstance(attributeValue));
407                        break;
408                    }
409                    String temp = nf.format(v).trim();
410                    result = result.trim(); // HACK because of SAX
411                    if (!temp.equals(result)) {
412                        logln("Number: Locale: " + locale +
413                                "\n\tType: " + attributeValue +
414                                "\n\tDraft: " + settings.get("draft") +
415                                "\n\tCLDR: <" + result + ">" +
416                                "\n\tICU: <" + temp + ">");
417                    }
418
419                }
420            }
421        });
422
423        // ============ Handler for Dates ============
424        addHandler("date", new Handler() {
425            public void handleResult(ULocale locale, String result) throws ParseException {
426                int dateFormat = 0;
427                int timeFormat = 0;
428                Date date = new Date();
429                boolean approved = true;
430
431                for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
432                    String attributeName = (String) it.next();
433                    String attributeValue = (String) settings.get(attributeName);
434
435                    // Checks if the attribute name is a draft and whether
436                    // or not it has been approved / contributed by CLDR yet
437                    // otherwise, skips it because it is most likely rejected by ICU
438                    if (attributeName.equals("draft")) {
439                        if (attributeValue.indexOf("approved") == -1 && attributeValue.indexOf("contributed") == -1) {
440                            approved = false;
441                            break;
442                        }
443                        continue;
444                    }
445
446                    // Update the value to be checked
447                    if (attributeName.equals("input")) {
448                        date = iso.parse(attributeValue);
449                        continue;
450                    }
451                    // At this point, it must be either dateType or timeType
452                    int index = lookupValue(attributeValue, DateFormatNames);
453                    if (attributeName.equals("dateType"))
454                        dateFormat = index;
455                    else if (attributeName.equals("timeType"))
456                        timeFormat = index;
457
458                }
459
460                // The attribute value must be approved in order to be checked,
461                // if it hasn't been approved, it shouldn't be checked if it
462                // matches with ICU
463                if (approved) {
464                    SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat);
465                    dt.setTimeZone(utc);
466                    String temp = dt.format(date).trim();
467                    result = result.trim(); // HACK because of SAX
468                    if (!temp.equals(result)) {
469                        logln("DateTime: Locale: " + locale +
470                                "\n\tDate: " + DateFormatNames[dateFormat] +
471                                "\n\tTime: " + DateFormatNames[timeFormat] +
472                                "\n\tDraft: " + settings.get("draft") +
473                                "\n\tCLDR: <" + result + "> " +
474                                "\n\tICU: <" + temp + ">");
475                    }
476                }
477            }
478
479            private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) {
480                if (DEBUG)
481                    logln("Getting date/time format for " + locale);
482                if (DEBUG && "ar_EG".equals(locale.toString())) {
483                    logln("debug here");
484                }
485                DateFormat dt;
486                if (dateFormat == 0) {
487                    dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale);
488                    if (DEBUG)
489                        System.out.print("getTimeInstance");
490                } else if (timeFormat == 0) {
491                    dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale);
492                    if (DEBUG)
493                        System.out.print("getDateInstance");
494                } else {
495                    dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat],
496                            locale);
497                    if (DEBUG)
498                        System.out.print("getDateTimeInstance");
499                }
500                if (DEBUG)
501                    logln("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat) dt).toPattern());
502                return (SimpleDateFormat) dt;
503            }
504        });
505
506        // ============ Handler for Zones ============
507        addHandler("zoneFields", new Handler() {
508            String date = "";
509            String zone = "";
510            String parse = "";
511            String pattern = "";
512
513            public void handleResult(ULocale locale, String result) throws ParseException {
514                for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
515                    String attributeName = (String) it.next();
516                    String attributeValue = (String) settings.get(attributeName);
517                    if (attributeName.equals("date")) {
518                        date = attributeValue;
519                    } else if (attributeName.equals("field")) {
520                        pattern = attributeValue;
521                    } else if (attributeName.equals("zone")) {
522                        zone = attributeValue;
523                    } else if (attributeName.equals("parse")) {
524                        parse = attributeValue;
525                    }
526                }
527
528                if (!ZONE_MATCH.reset(zone).matches()) return;
529                Date dateValue = iso.parse(date);
530                SimpleDateFormat field = new SimpleDateFormat(pattern, locale);
531                field.setTimeZone(TimeZone.getTimeZone(zone));
532                String temp = field.format(dateValue).trim();
533                // SKIP PARSE FOR NOW
534                result = result.trim(); // HACK because of SAX
535                if (!temp.equals(result)) {
536                    temp = field.format(dateValue).trim(); // call again for debugging
537                    logln("Zone Format: Locale: " + locale
538                            + "\n\tZone: " + zone
539                            + "\n\tDate: " + date
540                            + "\n\tField: " + pattern
541                            + "\n\tParse: " + parse
542                            + "\n\tDraft: " + settings.get("draft")
543                            + "\n\tCLDR: <" + result
544                            + ">\n\tICU: <" + temp + ">");
545                }
546            }
547        });
548    }
549
550    // ============ Gorp for SAX ============
551
552    {
553        try {
554            SAXParserFactory factory = SAXParserFactory.newInstance();
555            factory.setValidating(true);
556            SAX = factory.newSAXParser();
557        } catch (Exception e) {
558            throw new IllegalArgumentException("SAXParserFacotry was unable to start.");
559        }
560    }
561
562    DefaultHandler DEFAULT_HANDLER = new DefaultHandler() {
563        static final boolean DEBUG = false;
564        StringBuffer lastChars = new StringBuffer();
565        // boolean justPopped = false;
566        Handler handler;
567
568        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
569            // data.put(new ContextStack(contextStack), lastChars);
570            // lastChars = "";
571            try {
572                if (qName.equals("cldrTest")) {
573                    // skip
574                } else if (qName.equals("result") && handler != null) {
575                    for (int i = 0; i < attributes.getLength(); ++i) {
576                        handler.set(attributes.getQName(i), attributes.getValue(i));
577                    }
578                } else {
579                    handler = getHandler(qName, attributes);
580                    // handler.set("locale", uLocale.toString());
581                }
582                // if (DEBUG) logln("startElement:\t" + contextStack);
583                // justPopped = false;
584            } catch (RuntimeException e) {
585                e.printStackTrace();
586                throw e;
587            }
588        }
589
590        public void endElement(String uri, String localName, String qName) throws SAXException {
591            try {
592                // if (DEBUG) logln("endElement:\t" + contextStack);
593                if (qName.equals("result") && handler != null) {
594                    handler.checkResult(lastChars.toString());
595                } else if (qName.length() != 0) {
596                    // logln("Unexpected contents of: " + qName + ", <" + lastChars + ">");
597                }
598                lastChars.setLength(0);
599                // justPopped = true;
600            } catch (RuntimeException e) {
601                e.printStackTrace();
602                throw e;
603            }
604        }
605
606        // Have to hack around the fact that the character data might be in pieces
607        public void characters(char[] ch, int start, int length) throws SAXException {
608            try {
609                String value = new String(ch, start, length);
610                if (DEBUG)
611                    logln("characters:\t" + value);
612                lastChars.append(value);
613                // justPopped = false;
614            } catch (RuntimeException e) {
615                e.printStackTrace();
616                throw e;
617            }
618        }
619
620        // just for debugging
621
622        public void notationDecl(String name, String publicId, String systemId) throws SAXException {
623            logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
624        }
625
626        public void processingInstruction(String target, String data) throws SAXException {
627            logln("processingInstruction: " + target + ", " + data);
628        }
629
630        public void skippedEntity(String name) throws SAXException {
631            logln("skippedEntity: " + name);
632        }
633
634        public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName)
635                throws SAXException {
636            logln("unparsedEntityDecl: " + name + ", " + publicId + ", " + systemId + ", " + notationName);
637        }
638    };
639}
640