1/*
2*******************************************************************************
3* Copyright (C) 2009-2014, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7
8package com.ibm.icu.impl.text;
9
10import java.util.HashMap;
11import java.util.Map;
12
13import com.ibm.icu.impl.ICUDebug;
14import com.ibm.icu.text.CollationElementIterator;
15import com.ibm.icu.text.Collator;
16import com.ibm.icu.text.RbnfLenientScanner;
17import com.ibm.icu.text.RbnfLenientScannerProvider;
18import com.ibm.icu.text.RuleBasedCollator;
19import com.ibm.icu.util.ULocale;
20
21/**
22 * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
23 * implementation behind setLenientParseMode, which is based on Collator.
24 * @internal
25 * @deprecated This API is ICU internal only.
26 */
27@Deprecated
28public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
29    private static final boolean DEBUG = ICUDebug.enabled("rbnf");
30    private Map<String, RbnfLenientScanner> cache;
31
32    /**
33     * @internal
34     * @deprecated This API is ICU internal only.
35     */
36    @Deprecated
37    public RbnfScannerProviderImpl() {
38        cache = new HashMap<String, RbnfLenientScanner>();
39    }
40
41    /**
42     * Returns a collation-based scanner.
43     *
44     * Only primary differences are treated as significant.  This means that case
45     * differences, accent differences, alternate spellings of the same letter
46     * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
47     * matching the text.  In many cases, numerals will be accepted in place of words
48     * or phrases as well.
49     *
50     * For example, all of the following will correctly parse as 255 in English in
51     * lenient-parse mode:
52     * <br>"two hundred fifty-five"
53     * <br>"two hundred fifty five"
54     * <br>"TWO HUNDRED FIFTY-FIVE"
55     * <br>"twohundredfiftyfive"
56     * <br>"2 hundred fifty-5"
57     *
58     * The Collator used is determined by the locale that was
59     * passed to this object on construction.  The description passed to this object
60     * on construction may supply additional collation rules that are appended to the
61     * end of the default collator for the locale, enabling additional equivalences
62     * (such as adding more ignorable characters or permitting spelled-out version of
63     * symbols; see the demo program for examples).
64     *
65     * It's important to emphasize that even strict parsing is relatively lenient: it
66     * will accept some text that it won't produce as output.  In English, for example,
67     * it will correctly parse "two hundred zero" and "fifteen hundred".
68     *
69     * @internal
70     * @deprecated This API is ICU internal only.
71     */
72    @Deprecated
73    public RbnfLenientScanner get(ULocale locale, String extras) {
74        RbnfLenientScanner result = null;
75        String key = locale.toString() + "/" + extras;
76        synchronized(cache) {
77            result = cache.get(key);
78            if (result != null) {
79                return result;
80            }
81        }
82        result = createScanner(locale, extras);
83        synchronized(cache) {
84            cache.put(key, result);
85        }
86        return result;
87    }
88
89    /**
90     * @internal
91     * @deprecated This API is ICU internal only.
92     */
93    @Deprecated
94    protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
95        RuleBasedCollator collator = null;
96        try {
97            // create a default collator based on the locale,
98            // then pull out that collator's rules, append any additional
99            // rules specified in the description, and create a _new_
100            // collator based on the combination of those rules
101            collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
102            if (extras != null) {
103                String rules = collator.getRules() + extras;
104                collator = new RuleBasedCollator(rules);
105            }
106            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
107        }
108        catch (Exception e) {
109            // If we get here, it means we have a malformed set of
110            // collation rules, which hopefully won't happen
111            ///CLOVER:OFF
112            if (DEBUG){ // debug hook
113                e.printStackTrace(); System.out.println("++++");
114            }
115            collator = null;
116            ///CLOVER:ON
117        }
118
119        return new RbnfLenientScannerImpl(collator);
120    }
121
122    private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
123        private final RuleBasedCollator collator;
124
125        private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
126            this.collator = rbc;
127        }
128
129        public boolean allIgnorable(String s) {
130            CollationElementIterator iter = collator.getCollationElementIterator(s);
131
132            int o = iter.next();
133            while (o != CollationElementIterator.NULLORDER
134                   && CollationElementIterator.primaryOrder(o) == 0) {
135                o = iter.next();
136            }
137            return o == CollationElementIterator.NULLORDER;
138        }
139
140        public int[] findText(String str, String key, int startingAt) {
141            int p = startingAt;
142            int keyLen = 0;
143
144            // basically just isolate smaller and smaller substrings of
145            // the target string (each running to the end of the string,
146            // and with the first one running from startingAt to the end)
147            // and then use prefixLength() to see if the search key is at
148            // the beginning of each substring.  This is excruciatingly
149            // slow, but it will locate the key and tell use how long the
150            // matching text was.
151            while (p < str.length() && keyLen == 0) {
152                keyLen = prefixLength(str.substring(p), key);
153                if (keyLen != 0) {
154                    return new int[] { p, keyLen };
155                }
156                ++p;
157            }
158            // if we make it to here, we didn't find it.  Return -1 for the
159            // location.  The length should be ignored, but set it to 0,
160            // which should be "safe"
161            return new int[] { -1, 0 };
162        }
163
164        ///CLOVER:OFF
165        // The following method contains the same signature as findText
166        //  and has never been used by anything once.
167        @SuppressWarnings("unused")
168        public int[] findText2(String str, String key, int startingAt) {
169
170            CollationElementIterator strIter = collator.getCollationElementIterator(str);
171            CollationElementIterator keyIter = collator.getCollationElementIterator(key);
172
173            int keyStart = -1;
174
175            strIter.setOffset(startingAt);
176
177            int oStr = strIter.next();
178            int oKey = keyIter.next();
179            while (oKey != CollationElementIterator.NULLORDER) {
180                while (oStr != CollationElementIterator.NULLORDER &&
181                       CollationElementIterator.primaryOrder(oStr) == 0)
182                    oStr = strIter.next();
183
184                while (oKey != CollationElementIterator.NULLORDER &&
185                       CollationElementIterator.primaryOrder(oKey) == 0)
186                    oKey = keyIter.next();
187
188                if (oStr == CollationElementIterator.NULLORDER) {
189                    return new int[] { -1, 0 };
190                }
191
192                if (oKey == CollationElementIterator.NULLORDER) {
193                    break;
194                }
195
196                if (CollationElementIterator.primaryOrder(oStr) ==
197                    CollationElementIterator.primaryOrder(oKey)) {
198                    keyStart = strIter.getOffset();
199                    oStr = strIter.next();
200                    oKey = keyIter.next();
201                } else {
202                    if (keyStart != -1) {
203                        keyStart = -1;
204                        keyIter.reset();
205                    } else {
206                        oStr = strIter.next();
207                    }
208                }
209            }
210
211            if (oKey == CollationElementIterator.NULLORDER) {
212                return new int[] { keyStart, strIter.getOffset() - keyStart };
213            }
214
215            return new int[] { -1, 0 };
216        }
217        ///CLOVER:ON
218
219        public int prefixLength(String str, String prefix) {
220            // Create two collation element iterators, one over the target string
221            // and another over the prefix.
222            //
223            // Previous code was matching "fifty-" against " fifty" and leaving
224            // the number " fifty-7" to parse as 43 (50 - 7).
225            // Also it seems that if we consume the entire prefix, that's ok even
226            // if we've consumed the entire string, so I switched the logic to
227            // reflect this.
228
229            CollationElementIterator strIter = collator.getCollationElementIterator(str);
230            CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
231
232            // match collation elements between the strings
233            int oStr = strIter.next();
234            int oPrefix = prefixIter.next();
235
236            while (oPrefix != CollationElementIterator.NULLORDER) {
237                // skip over ignorable characters in the target string
238                while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
239                       CollationElementIterator.NULLORDER) {
240                    oStr = strIter.next();
241                }
242
243                // skip over ignorable characters in the prefix
244                while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
245                       CollationElementIterator.NULLORDER) {
246                    oPrefix = prefixIter.next();
247                }
248
249                // if skipping over ignorables brought to the end of
250                // the prefix, we DID match: drop out of the loop
251                if (oPrefix == CollationElementIterator.NULLORDER) {
252                    break;
253                }
254
255                // if skipping over ignorables brought us to the end
256                // of the target string, we didn't match and return 0
257                if (oStr == CollationElementIterator.NULLORDER) {
258                    return 0;
259                }
260
261                // match collation elements from the two strings
262                // (considering only primary differences).  If we
263                // get a mismatch, dump out and return 0
264                if (CollationElementIterator.primaryOrder(oStr) !=
265                    CollationElementIterator.primaryOrder(oPrefix)) {
266                    return 0;
267                }
268
269                // otherwise, advance to the next character in each string
270                // and loop (we drop out of the loop when we exhaust
271                // collation elements in the prefix)
272
273                oStr = strIter.next();
274                oPrefix = prefixIter.next();
275            }
276
277            int result = strIter.getOffset();
278            if (oStr != CollationElementIterator.NULLORDER) {
279                --result;
280            }
281            return result;
282        }
283    }
284}
285