12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 52d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * Copyright (C) 1996-2016, International Business Machines Corporation and 62d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * others. All Rights Reserved. 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.MessageFormat; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Collections; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Enumeration; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.MissingResourceException; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 212d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubertimport com.ibm.icu.impl.ICUData; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUResourceBundle; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UtilityExtensions; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedTransliterator.Data; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.TransliteratorIDParser.SingleID; 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CaseInsensitiveString; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale.Category; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.UResourceBundle; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>Transliterator</code> is an abstract class that transliterates text from one format to another. The most common 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * kind of transliterator is a script, or alphabet, transliterator. For example, a Russian to Latin transliterator 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * changes Russian text written in Cyrillic characters to phonetically equivalent Latin characters. It does not 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <em>translate</em> Russian to English! Transliteration, unlike translation, operates on characters, without reference 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to the meanings of words and sentences. 382d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Although script conversion is its most common use, a transliterator can actually perform a more general class of 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * tasks. In fact, <code>Transliterator</code> defines a very general API which specifies only that a segment of the 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * input text is replaced by new text. The particulars of this conversion are determined entirely by subclasses of 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>Transliterator</code>. 442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Transliterators are stateless</b> 472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>Transliterator</code> objects are <em>stateless</em>; they retain no information between calls to 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>transliterate()</code>. As a result, threads may share transliterators without synchronizing them. This might 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * seem to limit the complexity of the transliteration operation. In practice, subclasses perform complex 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterations by delaying the replacement of text until it is known that no other replacements are possible. In 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * other words, although the <code>Transliterator</code> objects are stateless, the source text itself embodies all the 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * needed information, and delayed operation allows arbitrary complexity. 552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Batch transliteration</b> 582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The simplest way to perform transliteration is all at once, on a string of existing text. This is referred to as 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <em>batch</em> transliteration. For example, given a string <code>input</code> and a transliterator <code>t</code>, 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the call 632d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote><code>String result = t.transliterate(input); 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </code></blockquote> 662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will transliterate it and return the result. Other methods allow the client to specify a substring to be 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated and to use {@link Replaceable} objects instead of strings, in order to preserve out-of-band 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * information (such as text styles). 702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Keyboard transliteration</b> 732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Somewhat more involved is <em>keyboard</em>, or incremental transliteration. This is the transliteration of text that 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is arriving from some source (typically the user's keyboard) one character at a time, or in some other piecemeal 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * fashion. 782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In keyboard transliteration, a <code>Replaceable</code> buffer stores the text. As text is inserted, as much as 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * possible is transliterated on the fly. This means a GUI that displays the contents of the buffer may show text being 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * modified as each new character arrives. 832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Consider the simple <code>RuleBasedTransliterator</code>: 862d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote><code> 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * th>{theta}<br> 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * t>{tau} 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </code></blockquote> 912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * When the user types 't', nothing will happen, since the transliterator is waiting to see if the next character is 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 'h'. To remedy this, we introduce the notion of a cursor, marked by a '|' in the output string: 942d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <blockquote><code> 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * t>|{tau}<br> 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {tau}h>{theta} 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </code></blockquote> 992d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Now when the user types 't', tau appears, and if the next character is 'h', the tau changes to a theta. This is 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * accomplished by maintaining a cursor position (independent of the insertion point, and invisible in the GUI) across 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * calls to <code>transliterate()</code>. Typically, the cursor will be coincident with the insertion point, but in a 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * case like the one above, it will precede the insertion point. 1042d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Keyboard transliteration methods maintain a set of three indices that are updated with each call to 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>transliterate()</code>, including the cursor, start, and limit. These indices are changed by the method, and 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * they are passed in and out via a Position object. The <code>start</code> index marks the beginning of the substring 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that the transliterator will look at. It is advanced as text becomes committed (but it is not the committed index; 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that's the <code>cursor</code>). The <code>cursor</code> index, described above, marks the point at which the 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator last stopped, either because it reached the end, or because it required more characters to 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * disambiguate between possible inputs. The <code>cursor</code> can also be explicitly set by rules in a 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>RuleBasedTransliterator</code>. Any characters before the <code>cursor</code> index are frozen; future keyboard 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliteration calls within this input sequence will not change them. New text is inserted at the <code>limit</code> 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * index, which marks the end of the substring that the transliterator looks at. 1162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Because keyboard transliteration assumes that more characters are to arrive, it is conservative in its operation. It 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * only transliterates when it can do so unambiguously. Otherwise it waits for more characters to arrive. When the 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * client code knows that no more characters are forthcoming, perhaps because the user has performed some input 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * termination operation, then it should call <code>finishTransliteration()</code> to complete any pending 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterations. 1232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Inverses</b> 1262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Pairs of transliterators may be inverses of one another. For example, if transliterator <b>A</b> transliterates 1299e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * characters by incrementing their Unicode value (so "abc" -> "def"), and transliterator <b>B</b> decrements character 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * values, then <b>A</b> is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> with <b>B</b> in a compound 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator, the result is the indentity transliterator, that is, a transliterator that does not change its input 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text. 1332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The <code>Transliterator</code> method <code>getInverse()</code> returns a transliterator's inverse, if one exists, 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or <code>null</code> otherwise. However, the result of <code>getInverse()</code> usually will <em>not</em> be a true 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * mathematical inverse. This is because true inverse transliterators are difficult to formulate. For example, consider 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * two transliterators: <b>AB</b>, which transliterates the character 'A' to 'B', and <b>BA</b>, which transliterates 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 'B' to 'A'. It might seem that these are exact inverses, since 1392d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1409e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <blockquote>"A" x <b>AB</b> -> "B"<br> 1419e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * "B" x <b>BA</b> -> "A"</blockquote> 1422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * where 'x' represents transliteration. However, 1442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1459e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br> 1469e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * "BBCD" x <b>BA</b> -> "AACD"</blockquote> 1472d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * so <b>AB</b> composed with <b>BA</b> is not the identity. Nonetheless, <b>BA</b> may be usefully considered to be 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>AB</b>'s inverse, and it is on this basis that <b>AB</b><code>.getInverse()</code> could legitimately return 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>BA</b>. 1512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Filtering</b> 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Each transliterator has a filter, which restricts changes to those characters selected by the filter. The 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * filter affects just the characters that are changed -- the characters outside of the filter are still part of the 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * context for the filter. For example, in the following even though 'x' is filtered out, and doesn't convert to y, it does affect the conversion of 'a'. 1572d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String rules = "x > y; x{a} > b; "; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator tempTrans = Transliterator.createFromRules("temp", rules, Transliterator.FORWARD); 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * tempTrans.setFilter(new UnicodeSet("[a]")); 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String tempResult = tempTrans.transform("xa"); 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * // results in "xb" 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *</pre> 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>IDs and display names</b> 1672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator is designated by a short identifier string or <em>ID</em>. IDs follow the format 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <em>source-destination</em>, where <em>source</em> describes the entity being replaced, and <em>destination</em> 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * describes the entity replacing <em>source</em>. The entities may be the names of scripts, particular sequences of 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters, or whatever else it is that the transliterator converts to or from. For example, a transliterator from 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Russian to Latin might be named "Russian-Latin". A transliterator from keyboard escape sequences to Latin-1 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters might be named "KeyboardEscape-Latin1". By convention, system entity names are in English, with the 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * initial letters of words capitalized; user entity names may follow any format so long as they do not contain dashes. 1762d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In addition to programmatic IDs, transliterator objects have display names for presentation in user interfaces, 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returned by {@link #getDisplayName}. 1802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Factory methods and registration</b> 1832d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In general, client code should use the factory method <code>getInstance()</code> to obtain an instance of a 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator given its ID. Valid IDs may be enumerated using <code>getAvailableIDs()</code>. Since transliterators 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are stateless, multiple calls to <code>getInstance()</code> with the same ID will return the same object. 1882d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In addition to the system transliterators registered at startup, user transliterators may be registered by calling 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>registerInstance()</code> at run time. To register a transliterator subclass without instantiating it (until it 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is needed), users may call <code>registerClass()</code>. 1932d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Composed transliterators</b> 1962d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In addition to built-in system transliterators like "Latin-Greek", there are also built-in <em>composed</em> 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators. These are implemented by composing two or more component transliterators. For example, if we have 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * scripts "A", "B", "C", and "D", and we want to transliterate between all pairs of them, then we need to write 12 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators: "A-B", "A-C", "A-D", "B-A",..., "D-A", "D-B", "D-C". If it is possible to convert all scripts to an 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * intermediate script "M", then instead of writing 12 rule sets, we only need to write 8: "A~M", "B~M", "C~M", "D~M", 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "M~A", "M~B", "M~C", "M~D". (This might not seem like a big win, but it's really 2<em>n</em> vs. <em>n</em> 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <sup>2</sup> - <em>n</em>, so as <em>n</em> gets larger the gain becomes significant. With 9 scripts, it's 18 vs. 72 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * rule sets, a big difference.) Note the use of "~" rather than "-" for the script separator here; this indicates that 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the given transliterator is intended to be composed with others, rather than be used as is. 2072d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Composed transliterators can be instantiated as usual. For example, the system transliterator "Devanagari-Gujarati" 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is a composed transliterator built internally as "Devanagari~InterIndic;InterIndic~Gujarati". When this 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator is instantiated, it appears externally to be a standard transliterator (e.g., getID() returns 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "Devanagari-Gujarati"). 2132d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b>Subclassing</b> 2162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Subclasses must implement the abstract method <code>handleTransliterate()</code>. 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Subclasses should override the <code>transliterate()</code> method taking a <code>Replaceable</code> and the 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>transliterate()</code> method taking a <code>String</code> and <code>StringBuffer</code> if the performance of 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * these methods can be improved over the performance obtained by the default implementations in this class. 2232d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class Transliterator implements StringTransform { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Direction constant indicating the forward direction in a transliterator, 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * e.g., the forward rules of a RuleBasedTransliterator. An "A-B" 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator transliterates A to B when operating in the forward 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * direction, and B to A when operating in the reverse direction. 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int FORWARD = 0; 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Direction constant indicating the reverse direction in a transliterator, 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * e.g., the reverse rules of a RuleBasedTransliterator. An "A-B" 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator transliterates A to B when operating in the forward 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * direction, and B to A when operating in the reverse direction. 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int REVERSE = 1; 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Position structure for incremental transliteration. This data 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * structure defines two substrings of the text being 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated. The first region, [contextStart, 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contextLimit), defines what characters the transliterator will 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * read as context. The second region, [start, limit), defines 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * what characters will actually be transliterated. The second 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * region should be a subset of the first. 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>After a transliteration operation, some of the indices in this 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * structure will be modified. See the field descriptions for 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * details. 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2599e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <p>contextStart <= start <= limit <= contextLimit 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Note: All index values in this structure must be at code point 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * boundaries. That is, none of them may occur between two code units 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of a surrogate pair. If any index does split a surrogate pair, 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * results are unspecified. 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static class Position { 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Beginning index, inclusive, of the context to be considered for 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a transliteration operation. The transliterator will ignore 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * anything before this index. INPUT/OUTPUT parameter: This parameter 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is updated by a transliteration operation to reflect the maximum 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * amount of antecontext needed by a transliterator. 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int contextStart; 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Ending index, exclusive, of the context to be considered for a 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliteration operation. The transliterator will ignore 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * anything at or after this index. INPUT/OUTPUT parameter: This 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * parameter is updated to reflect changes in the length of the 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text, but points to the same logical position in the text. 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int contextLimit; 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Beginning index, inclusive, of the text to be transliteratd. 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * INPUT/OUTPUT parameter: This parameter is advanced past 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters that have already been transliterated by a 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliteration operation. 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int start; 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Ending index, exclusive, of the text to be transliteratd. 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * INPUT/OUTPUT parameter: This parameter is updated to reflect 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * changes in the length of the text, but points to the same 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * logical position in the text. 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int limit; 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a Position object with start, limit, 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contextStart, and contextLimit all equal to zero. 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Position() { 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this(0, 0, 0, 0); 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a Position object with the given start, 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contextStart, and contextLimit. The limit is set to the 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contextLimit. 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Position(int contextStart, int contextLimit, int start) { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this(contextStart, contextLimit, start, contextLimit); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a Position object with the given start, limit, 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contextStart, and contextLimit. 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Position(int contextStart, int contextLimit, 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int start, int limit) { 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.contextStart = contextStart; 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.contextLimit = contextLimit; 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.start = start; 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.limit = limit; 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a Position object that is a copy of another. 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.6 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Position(Position pos) { 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert set(pos); 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copies the indices of this position from another. 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.6 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void set(Position pos) { 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextStart = pos.contextStart; 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextLimit = pos.contextLimit; 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = pos.start; 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit = pos.limit; 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns true if this Position is equal to the given object. 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.6 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean equals(Object obj) { 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (obj instanceof Position) { 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position pos = (Position) obj; 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return contextStart == pos.contextStart && 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextLimit == pos.contextLimit && 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start == pos.start && 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit == pos.limit; 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3732d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Mock implementation of hashCode(). This implementation always returns a constant 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * value. When Java assertion is enabled, this method triggers an assertion failure. 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int hashCode() { 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert false : "hashCode not designed"; 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 42; 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a string representation of this Position. 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.6 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3912d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toString() { 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return "[cs=" + contextStart 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + ", s=" + start 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + ", l=" + limit 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + ", cl=" + contextLimit 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + "]"; 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Check all bounds. If they are invalid, throw an exception. 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param length the length of the string this object applies to 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException if any indices are out 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of bounds 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void validate(int length) { 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (contextStart < 0 || 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start < contextStart || 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit < start || 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextLimit < limit || 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length < contextLimit) { 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Invalid Position {cs=" + 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextStart + ", s=" + 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start + ", l=" + 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit + ", cl=" + 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contextLimit + "}, len=" + 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length); 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Programmatic name, e.g., "Latin-Arabic". 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String ID; 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This transliterator's filter. Any character for which 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * altered by this transliterator. If <tt>filter</tt> is 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>null</tt> then no filtering is applied. 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UnicodeSet filter; 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int maximumContextLength = 0; 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System transliterator registry. 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static TransliteratorRegistry registry; 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Map<CaseInsensitiveString, String> displayNameCache; 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Prefix for resource bundle key for the display name for a 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator. The ID is appended to this to form the key. 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The resource bundle value should be a String. 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String RB_DISPLAY_NAME_PREFIX = "%Translit%%"; 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Prefix for resource bundle key for the display name for a 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator SCRIPT. The ID is appended to this to form the key. 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The resource bundle value should be a String. 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String RB_SCRIPT_DISPLAY_NAME_PREFIX = "%Translit%"; 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Resource bundle key for display name pattern. 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The resource bundle value should be a String forming a 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * MessageFormat pattern, e.g.: 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}". 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern"; 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Delimiter between elements in a compound ID. 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char ID_DELIM = ';'; 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Delimiter before target in an ID. 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char ID_SEP = '-'; 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Delimiter before variant in an ID. 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char VARIANT_SEP = '/'; 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * To enable debugging output in the Transliterator component, set 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * DEBUG to true. 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * N.B. Make sure to recompile all of the com.ibm.icu.text package 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * after changing this. Easiest way to do this is 'ant clean 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * core' ('ant' will NOT pick up the dependency automatically). 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <<This generates a lot of output.>> 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final boolean DEBUG = false; 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Default constructor. 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID the string identifier for this transliterator 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param filter the filter. Any character for which 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * altered by this transliterator. If <tt>filter</tt> is 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>null</tt> then no filtering is applied. 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected Transliterator(String ID, UnicodeFilter filter) { 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ID == null) { 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new NullPointerException(); 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.ID = ID; 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setFilter(filter); 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterates a segment of a string, with optional filtering. 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the string to be transliterated 5159e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * @param start the beginning index, inclusive; <code>0 <= start 5169e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <= limit</code>. 5179e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * @param limit the ending index, exclusive; <code>start <= limit 5189e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <= text.length()</code>. 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The new limit index. The text previously occupying <code>[start, 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * limit)</code> has been transliterated, possibly to a string of a different 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <em>new-limit</em> is the return value. If the input offsets are out of bounds, 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the returned value is -1 and the input string remains unchanged. 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int transliterate(Replaceable text, int start, int limit) { 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (start < 0 || 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit < start || 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.length() < limit) { 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position pos = new Position(start, limit, start); 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filteredTransliterate(text, pos, false, true); 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos.limit; 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterates an entire string in place. Convenience method. 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the string to be transliterated 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void transliterate(Replaceable text) { 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterate(text, 0, text.length()); 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterate an entire string and returns the result. Convenience method. 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the string to be transliterated 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The transliterated text 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final String transliterate(String text) { 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ReplaceableString result = new ReplaceableString(text); 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterate(result); 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.toString(); 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterates the portion of the text buffer that can be 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated unambiguosly after new text has been inserted, 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * typically as a result of a keyboard event. The new text in 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>insertion</code> will be inserted into <code>text</code> 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at <code>index.contextLimit</code>, advancing 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.contextLimit</code> by <code>insertion.length()</code>. 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Then the transliterator will try to transliterate characters of 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>text</code> between <code>index.start</code> and 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.contextLimit</code>. Characters before 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.start</code> will not be changed. 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Upon return, values in <code>index</code> will be updated. 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.contextStart</code> will be advanced to the first 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * character that future calls to this method will read. 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.start</code> and <code>index.contextLimit</code> will 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be adjusted to delimit the range of text that future calls to 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this method may change. 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Typical usage of this method begins with an initial call 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with <code>index.contextStart</code> and <code>index.contextLimit</code> 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * set to indicate the portion of <code>text</code> to be 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated, and <code>index.start == index.contextStart</code>. 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Thereafter, <code>index</code> can be used without 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * modification in future calls, provided that all changes to 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>text</code> are made via this method. 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>This method assumes that future calls may be made that will 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * insert new text into the buffer. As a result, it only performs 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * unambiguous transliterations. After the last call to this 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * method, there may be untransliterated text that is waiting for 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * more input to resolve an ambiguity. In order to perform these 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pending transliterations, clients should call {@link 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * #finishTransliteration} after the last call to this 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * method has been made. 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the buffer holding transliterated and untransliterated text 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the start and limit of the text, the position 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of the cursor, and the start and limit of transliteration. 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param insertion text to be inserted and possibly 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated into the translation buffer at 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.contextLimit</code>. If <code>null</code> then no text 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is inserted. 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #handleTransliterate 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException if <code>index</code> 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is invalid 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void transliterate(Replaceable text, Position index, 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String insertion) { 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.validate(text.length()); 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// int originalStart = index.contextStart; 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (insertion != null) { 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(index.limit, index.limit, insertion); 6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit += insertion.length(); 6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.contextLimit += insertion.length(); 6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index.limit > 0 && 6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.isLeadSurrogate(text.charAt(index.limit - 1))) { 6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Oops, there is a dangling lead surrogate in the buffer. 6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This will break most transliterators, since they will 6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // assume it is part of a pair. Don't transliterate until 6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // more text comes in. 6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filteredTransliterate(text, index, true, true); 6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// TODO 6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// This doesn't work once we add quantifier support. Need to rewrite 6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// this code to support quantifiers and 'use maximum backup <n>;'. 6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// 6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// index.contextStart = Math.max(index.start - getMaximumContextLength(), 6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// originalStart); 6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterates the portion of the text buffer that can be 6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated unambiguosly after a new character has been 6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * inserted, typically as a result of a keyboard event. This is a 6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * convenience method; see {@link #transliterate(Replaceable, 6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.Position, String)} for details. 6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the buffer holding transliterated and 6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * untransliterated text 6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the start and limit of the text, the position 6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of the cursor, and the start and limit of transliteration. 6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param insertion text to be inserted and possibly 6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated into the translation buffer at 6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>index.contextLimit</code>. 6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #transliterate(Replaceable, Transliterator.Position, String) 6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void transliterate(Replaceable text, Position index, 6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int insertion) { 6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterate(text, index, UTF16.valueOf(insertion)); 6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterates the portion of the text buffer that can be 6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterated unambiguosly. This is a convenience method; see 6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #transliterate(Replaceable, Transliterator.Position, 6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String)} for details. 6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the buffer holding transliterated and 6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * untransliterated text 6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the start and limit of the text, the position 6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of the cursor, and the start and limit of transliteration. 6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #transliterate(Replaceable, Transliterator.Position, String) 6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void transliterate(Replaceable text, Position index) { 6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterate(text, index, null); 6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finishes any pending transliterations that were waiting for 6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * more characters. Clients should call this method as the last 6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * call after a sequence of one or more calls to 6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>transliterate()</code>. 6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the buffer holding transliterated and 6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * untransliterated text. 6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the array of indices previously passed to {@link 6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * #transliterate} 6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final void finishTransliteration(Replaceable text, 6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position index) { 6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.validate(text.length()); 6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filteredTransliterate(text, index, false, true); 6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Abstract method that concrete subclasses define to implement 6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * their transliteration algorithm. This method handles both 6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * incremental and non-incremental transliteration. Let 6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>originalStart</code> refer to the value of 6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> upon entry. 6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul> 7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>If <code>incremental</code> is false, then this method 7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should transliterate all characters between 7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> and <code>pos.limit</code>. Upon return 7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> must == <code> pos.limit</code>.</li> 7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>If <code>incremental</code> is true, then this method 7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should transliterate all characters between 7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> and <code>pos.limit</code> that can be 7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * unambiguously transliterated, regardless of future insertions 7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of text at <code>pos.limit</code>. Upon return, 7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> should be in the range 7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * [<code>originalStart</code>, <code>pos.limit</code>). 7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.start</code> should be positioned such that 7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters [<code>originalStart</code>, <code> 7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pos.start</code>) will not be changed in the future by this 7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator and characters [<code>pos.start</code>, 7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code>) are unchanged.</li> 7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul> 7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Implementations of this method should also obey the 7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * following invariants:</p> 7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul> 7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li> <code>pos.limit</code> and <code>pos.contextLimit</code> 7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should be updated to reflect changes in length of the text 7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between <code>pos.start</code> and <code>pos.limit</code>. The 7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * difference <code> pos.contextLimit - pos.limit</code> should 7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * not change.</li> 7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li><code>pos.contextStart</code> should not change.</li> 7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>Upon return, neither <code>pos.start</code> nor 7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code> should be less than 7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>originalStart</code>.</li> 7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>Text before <code>originalStart</code> and text after 7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code> should not change.</li> 7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>Text before <code>pos.contextStart</code> and text after 7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code> pos.contextLimit</code> should be ignored.</li> 7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul> 7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Subclasses may safely assume that all characters in 7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * [<code>pos.start</code>, <code>pos.limit</code>) are filtered. 7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In other words, the filter has already been applied by the time 7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this method is called. See 7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>filteredTransliterate()</code>. 7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>This method is <b>not</b> for public consumption. Calling 7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this method directly will transliterate 7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * [<code>pos.start</code>, <code>pos.limit</code>) without 7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * applying the filter. End user code should call <code> 7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterate()</code> instead of this method. Subclass code 7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should call <code>filteredTransliterate()</code> instead of 7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this method.<p> 7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the buffer holding transliterated and 7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * untransliterated text 7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param pos the indices indicating the start, limit, context 7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * start, and context limit of the text. 7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param incremental if true, assume more text may be inserted at 7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code> and act accordingly. Otherwise, 7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterate all text between <code>pos.start</code> and 7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code> and move <code>pos.start</code> up to 7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>pos.limit</code>. 7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #transliterate 7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected abstract void handleTransliterate(Replaceable text, 7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position pos, boolean incremental); 7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Top-level transliteration method, handling filtering, incremental and 7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * non-incremental transliteration, and rollback. All transliteration 7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * public API methods eventually call this method with a rollback argument 7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of TRUE. Other entities may call this method but rollback should be 7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * FALSE. 7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>If this transliterator has a filter, break up the input text into runs 7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of unfiltered characters. Pass each run to 7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <subclass>.handleTransliterate(). 7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>In incremental mode, if rollback is TRUE, perform a special 7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * incremental procedure in which several passes are made over the input 7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text, adding one character at a time, and committing successful 7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterations as they occur. Unsuccessful transliterations are rolled 7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * back and retried with additional characters to give correct results. 7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the text to be transliterated 7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the position indices 7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param incremental if TRUE, then assume more characters may be inserted 7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at index.limit, and postpone processing to accomodate future incoming 7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters 7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param rollback if TRUE and if incremental is TRUE, then perform special 7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * incremental processing, as described above, and undo partial 7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterations where necessary. If incremental is FALSE then this 7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * parameter is ignored. 8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void filteredTransliterate(Replaceable text, 8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position index, 8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean incremental, 8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean rollback) { 8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Short circuit path for transliterators with no filter in 8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // non-incremental mode. 8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (filter == null && !rollback) { 8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert handleTransliterate(text, index, incremental); 8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //---------------------------------------------------------------------- 8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This method processes text in two groupings: 8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // RUNS -- A run is a contiguous group of characters which are contained 8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // in the filter for this transliterator (filter.contains(ch) == true). 8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Text outside of runs may appear as context but it is not modified. 8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The start and limit Position values are narrowed to each run. 8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // PASSES (incremental only) -- To make incremental mode work correctly, 8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // each run is broken up into n passes, where n is the length (in code 8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // points) of the run. Each pass contains the first n characters. If a 8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // pass is completely transliterated, it is committed, and further passes 8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // include characters after the committed text. If a pass is blocked, 8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and does not transliterate completely, then this method rolls back 8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the changes made during the pass, extends the pass by one code point, 8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and tries again. 8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //---------------------------------------------------------------------- 8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // globalLimit is the limit value for the entire operation. We 8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // set index.limit to the end of each unfiltered run before 8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // calling handleTransliterate(), so we need to maintain the real 8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // value of index.limit here. After each transliteration, we 8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // update globalLimit for insertions or deletions that have 8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // happened. 8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int globalLimit = index.limit; 8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there is a non-null filter, then break the input text up. Say the 8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // input text has the form: 8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // xxxabcxxdefxx 8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // where 'x' represents a filtered character (filter.contains('x') == 8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // false). Then we break this up into: 8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // xxxabc xxdef xx 8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Each pass through the loop consumes a run of filtered 8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // characters (which are ignored) and a subsequent run of 8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // unfiltered characters (which are transliterated). 8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer log = null; 8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log = new StringBuffer(); 8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (filter != null) { 8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Narrow the range to be transliterated to the first run 8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // of unfiltered characters at or after index.start. 8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Advance past filtered chars 8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c; 8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (index.start < globalLimit && 8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert !filter.contains(c=text.char32At(index.start))) { 8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.start += UTF16.getCharCount(c); 8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Find the end of this run of unfiltered chars 8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit = index.start; 8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (index.limit < globalLimit && 8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filter.contains(c=text.char32At(index.limit))) { 8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit += UTF16.getCharCount(c); 8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Check to see if the unfiltered run is empty. This only 8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // happens at the end of the string when all the remaining 8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // characters are filtered. 8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index.start == index.limit) { 8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Is this run incremental? If there is additional 8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // filtered text (if limit < globalLimit) then we pass in 8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // an incremental value of FALSE to force the subclass to 8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // complete the transliteration for this run. 8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isIncrementalRun = 8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (index.limit < globalLimit ? false : incremental); 8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta; 8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Implement rollback. To understand the need for rollback, 8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // consider the following transliterator: 8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // "t" is "a > A;" 8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // "u" is "A > b;" 8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // "v" is a compound of "t; NFD; u" with a filter [:Ll:] 8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Now apply "v" to the input text "a". The result is "b". But if 8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the transliteration is done incrementally, then the NFD holds 8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // things up after "t" has already transformed "a" to "A". When 9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // finishTransliterate() is called, "A" is _not_ processed because 9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // it gets excluded by the [:Ll:] filter, and the end result is "A" 9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // -- incorrect. The problem is that the filter is applied to a 9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // partially-transliterated result, when we only want it to apply to 9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // input text. Although this example describes a compound 9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // transliterator containing NFD and a specific filter, it can 9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // happen with any transliterator which does a partial 9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // transformation in incremental mode into characters outside its 9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // filter. 9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // To handle this, when in incremental mode we supply characters to 9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // handleTransliterate() in several passes. Each pass adds one more 9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // input character to the input text. That is, for input "ABCD", we 9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // first try "A", then "AB", then "ABC", and finally "ABCD". If at 9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // any point we block (upon return, start < limit) then we roll 9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // back. If at any point we complete the run (upon return start == 9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // limit) then we commit that run. 9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (rollback && isIncrementalRun) { 9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.setLength(0); 9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("filteredTransliterate{"+getID()+"}i: IN=" + 9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(text, index)); 9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int runStart = index.start; 9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int runLimit = index.limit; 9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int runLength = runLimit - runStart; 9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Make a rollback copy at the end of the string 9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rollbackOrigin = text.length(); 9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.copy(runStart, runLimit, rollbackOrigin); 9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Variables reflecting the commitment of completely 9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // transliterated text. passStart is the runStart, advanced 9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // past committed text. rollbackStart is the rollbackOrigin, 9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // advanced past rollback text that corresponds to committed 9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // text. 9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int passStart = runStart; 9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rollbackStart = rollbackOrigin; 9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The limit for each pass; we advance by one code point with 9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // each iteration. 9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int passLimit = index.start; 9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Total length, in 16-bit code units, of uncommitted text. 9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This is the length to be rolled back. 9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int uncommittedLength = 0; 9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Total delta (change in length) for all passes 9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int totalDelta = 0; 9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // PASS MAIN LOOP -- Start with a single character, and extend 9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the text by one character at a time. Roll back partial 9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // transliterations and commit complete transliterations. 9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Length of additional code point, either one or two 9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int charLength = 9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.getCharCount(text.char32At(passLimit)); 9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert passLimit += charLength; 9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (passLimit > runLimit) { 9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uncommittedLength += charLength; 9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit = passLimit; 9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.setLength(0); 9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.append("filteredTransliterate{"+getID()+"}i: "); 9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(log, text, index); 9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delegate to subclass for actual transliteration. Upon 9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // return, start will be updated to point after the 9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // transliterated text, and limit and contextLimit will be 9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // adjusted for length changes. 9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert handleTransliterate(text, index, true); 9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.append(" => "); 9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(log, text, index); 9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta = index.limit - passLimit; // change in length 9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We failed to completely transliterate this pass. 9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Roll back the text. Indices remain unchanged; reset 9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // them where necessary. 9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index.start != index.limit) { 9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Find the rollbackStart, adjusted for length changes 9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and the deletion of partially transliterated text. 9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int rs = rollbackStart + delta - (index.limit - passStart); 9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delete the partially transliterated text 9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(passStart, index.limit, ""); 9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Copy the rollback text back 9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.copy(rs, rs + uncommittedLength, passStart); 10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Restore indices to their original values 10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.start = passStart; 10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit = passLimit; 10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.contextLimit -= delta; 10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.append(" (ROLLBACK)"); 10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We did completely transliterate this pass. Update the 10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // commit indices to record how far we got. Adjust indices 10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // for length change. 10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else { 10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Move the pass indices past the committed text. 10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert passStart = passLimit = index.start; 10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Adjust the rollbackStart for length changes and move 10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // it past the committed text. All characters we've 10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // processed to this point are committed now, so zero 10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // out the uncommittedLength. 10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rollbackStart += delta + uncommittedLength; 10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uncommittedLength = 0; 10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Adjust indices for length changes. 10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert runLimit += delta; 10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert totalDelta += delta; 10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(Utility.escape(log.toString())); 10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Adjust overall limit and rollbackOrigin for insertions and 10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // deletions. Don't need to worry about contextLimit because 10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // handleTransliterate() maintains that. 10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rollbackOrigin += totalDelta; 10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert globalLimit += totalDelta; 10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delete the rollback copy 10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(rollbackOrigin, rollbackOrigin + runLength, ""); 10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Move start past committed text 10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.start = passStart; 10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else { 10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delegate to subclass for actual transliteration. 10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.setLength(0); 10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.append("filteredTransliterate{"+getID()+"}: "); 10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(log, text, index); 10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = index.limit; 10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert handleTransliterate(text, index, isIncrementalRun); 10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta = index.limit - limit; // change in length 10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert log.append(" => "); 10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(log, text, index); 10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // In a properly written transliterator, start == limit after 10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // handleTransliterate() returns when incremental is false. 10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Catch cases where the subclass doesn't do this, and throw 10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // an exception. (Just pinning start to limit is a bad idea, 10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // because what's probably happening is that the subclass 10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // isn't transliterating all the way to the end, and it should 10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // in non-incremental mode.) 10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!isIncrementalRun && index.start != index.limit) { 10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new RuntimeException("ERROR: Incomplete non-incremental transliteration by " + getID()); 10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Adjust overall limit for insertions/deletions. Don't need 10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to worry about contextLimit because handleTransliterate() 10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // maintains that. 10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert globalLimit += delta; 10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(Utility.escape(log.toString())); 10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (filter == null || isIncrementalRun) { 10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we did completely transliterate this 10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // run, then repeat with the next unfiltered run. 10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Start is valid where it is. Limit needs to be put back where 10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // it was, modulo adjustments for deletions/insertions. 10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.limit = globalLimit; 10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) { 10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("filteredTransliterate{"+getID()+"}: OUT=" + 11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UtilityExtensions.formatInput(text, index)); 11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterate a substring of text, as specified by index, taking filters 11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * into account. This method is for subclasses that need to delegate to 11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * another transliterator, such as CompoundTransliterator. 11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text the text to be transliterated 11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index the position indices 11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param incremental if TRUE, then assume more characters may be inserted 11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at index.limit, and postpone processing to accomodate future incoming 11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * characters 11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void filteredTransliterate(Replaceable text, 11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position index, 11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean incremental) { 11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filteredTransliterate(text, index, incremental, false); 11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the length of the longest context required by this transliterator. 11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This is <em>preceding</em> context. The default value is zero, but 11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclasses can change this by calling <code>setMaximumContextLength()</code>. 11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, if a transliterator translates "ddd" (where 11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * d is any digit) to "555" when preceded by "(ddd)", then the preceding 11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * context length is 5, the length of "(ddd)". 11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The maximum number of preceding context characters this 11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator needs to examine 11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int getMaximumContextLength() { 11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return maximumContextLength; 11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Method for subclasses to use to set the maximum context length. 11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getMaximumContextLength 11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void setMaximumContextLength(int a) { 11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (a < 0) { 11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Invalid context length " + a); 11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maximumContextLength = a; 11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a programmatic identifier for this transliterator. 11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If this identifier is passed to <code>getInstance()</code>, it 11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will return this object, if it has been registered. 11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #registerClass 11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getAvailableIDs 11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final String getID() { 11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ID; 11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Set the programmatic identifier for this transliterator. Only 11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for use by subclasses. 11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected final void setID(String id) { 11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ID = id; 11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a name for this transliterator that is appropriate for 11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * display to the user in the default <code>DISPLAY</code> locale. See {@link 11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * #getDisplayName(String,Locale)} for details. 11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see com.ibm.icu.util.ULocale.Category#DISPLAY 11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final static String getDisplayName(String ID) { 11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return getDisplayName(ID, ULocale.getDefault(Category.DISPLAY)); 11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a name for this transliterator that is appropriate for 11837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * display to the user in the given locale. This name is taken 11847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from the locale resource data in the standard manner of the 11857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>java.text</code> package. 11867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 11877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>If no localized names exist in the system resource bundles, 11887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a name is synthesized using a localized 11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>MessageFormat</code> pattern from the resource data. The 11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * arguments to this pattern are an integer followed by one or two 11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strings. The integer is the number of strings, either 1 or 2. 11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The strings are formed by splitting the ID for this 11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator at the first '-'. If there is no '-', then the 11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * entire ID forms the only string. 11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param inLocale the Locale in which the display name should be 11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * localized. 11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see java.text.MessageFormat 11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String getDisplayName(String id, Locale inLocale) { 12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return getDisplayName(id, ULocale.forLocale(inLocale)); 12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a name for this transliterator that is appropriate for 12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * display to the user in the given locale. This name is taken 12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from the locale resource data in the standard manner of the 12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>java.text</code> package. 12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>If no localized names exist in the system resource bundles, 12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a name is synthesized using a localized 12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>MessageFormat</code> pattern from the resource data. The 12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * arguments to this pattern are an integer followed by one or two 12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strings. The integer is the number of strings, either 1 or 2. 12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The strings are formed by splitting the ID for this 12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator at the first '-'. If there is no '-', then the 12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * entire ID forms the only string. 12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param inLocale the ULocale in which the display name should be 12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * localized. 12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see java.text.MessageFormat 12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 3.2 12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String getDisplayName(String id, ULocale inLocale) { 12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Resource bundle containing display name keys and the 12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // RB_RULE_BASED_IDS array. 12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //If we ever integrate this with the Sun JDK, the resource bundle 12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // root will change to sun.text.resources.LocaleElements 12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle. 12322d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME, inLocale); 12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Normalize the ID 12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String stv[] = TransliteratorIDParser.IDtoSTV(id); 12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (stv == null) { 12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No target; malformed id 12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ""; 12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String ID = stv[0] + '-' + stv[1]; 12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (stv[2] != null && stv[2].length() > 0) { 12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ID = ID + '/' + stv[2]; 12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Use the registered display name, if any 12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String n = displayNameCache.get(new CaseInsensitiveString(ID)); 12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (n != null) { 12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return n; 12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Use display name for the entire transliterator, if it 12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // exists. 12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID); 12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (MissingResourceException e) {} 12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Construct the formatter first; if getString() fails 12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // we'll exit the try block 12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert MessageFormat format = new MessageFormat( 12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bundle.getString(RB_DISPLAY_NAME_PATTERN)); 12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Construct the argument array 12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Object[] args = new Object[] { Integer.valueOf(2), stv[0], stv[1] }; 12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Use display names for the scripts, if they exist 12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j=1; j<=2; ++j) { 12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert args[j] = bundle.getString(RB_SCRIPT_DISPLAY_NAME_PREFIX + 12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (String) args[j]); 12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (MissingResourceException e) {} 12717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Format it using the pattern in the resource 12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (stv[2].length() > 0) ? 12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (format.format(args) + '/' + stv[2]) : 12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert format.format(args); 12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (MissingResourceException e2) {} 12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We should not reach this point unless there is something 12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // wrong with the build or the RB_DISPLAY_NAME_PATTERN has 12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // been deleted from the root RB_LOCALE_ELEMENTS resource. 12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new RuntimeException(); 12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the filter used by this transliterator, or <tt>null</tt> 12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if this transliterator uses no filter. 12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final UnicodeFilter getFilter() { 12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return filter; 12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Changes the filter used by this transliterator. If the filter 12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is set to <tt>null</tt> then no filtering will occur. 12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Callers must take care if a transliterator is in use by 12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * multiple threads. The filter should not be changed by one 13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * thread while another thread may be transliterating. 13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setFilter(UnicodeFilter filter) { 13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (filter == null) { 13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.filter = null; 13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // fast high-runner case 13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.filter = new UnicodeSet((UnicodeSet)filter).freeze(); 13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.filter = new UnicodeSet(); 13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filter.addMatchSetTo(this.filter); 13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.filter.freeze(); 13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a <code>Transliterator</code> object given its ID. 13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The ID must be either a system transliterator ID or a ID registered 13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * using <code>registerClass()</code>. 13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A <code>Transliterator</code> object with the given ID 13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException if the given ID is invalid. 13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Transliterator getInstance(String ID) { 13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return getInstance(ID, FORWARD); 13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a <code>Transliterator</code> object given its ID. 13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The ID must be either a system transliterator ID or a ID registered 13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * using <code>registerClass()</code>. 13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param dir either FORWARD or REVERSE. If REVERSE then the 13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * inverse of the given ID is instantiated. 13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A <code>Transliterator</code> object with the given ID 13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException if the given ID is invalid. 13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #registerClass 13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getAvailableIDs 13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getID 13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static Transliterator getInstance(String ID, 13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int dir) { 13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer canonID = new StringBuffer(); 13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<SingleID> list = new ArrayList<SingleID>(); 13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet[] globalFilter = new UnicodeSet[1]; 13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) { 13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Invalid ID " + ID); 13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<Transliterator> translits = TransliteratorIDParser.instantiateList(list); 13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // assert(list.size() > 0); 13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator t = null; 13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (list.size() > 1 || canonID.indexOf(";") >= 0) { 13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only 13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // has one child transliterator. This is so that toRules() will return the right thing 13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (without any inactive ID), but our main ID still comes out correct. That is, if we 13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;" 13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // even though the ID is "(Lower);Latin-Greek;". 13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = new CompoundTransliterator(translits); 13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else { 13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = translits.get(0); 13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setID(canonID.toString()); 13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (globalFilter[0] != null) { 13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setFilter(globalFilter[0]); 13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return t; 13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Create a transliterator from a basic ID. This is an ID 13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * containing only the forward direction source, target, and 13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * variant. 13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param id a basic ID of the form S-T or S-T/V. 13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param canonID canonical ID to apply to the result, or 13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * null to leave the ID unchanged 13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return a newly created Transliterator or null if the ID is 13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * invalid. 13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static Transliterator getBasicInstance(String id, String canonID) { 13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer s = new StringBuffer(); 13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator t = registry.get(id, s); 13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (s.length() != 0) { 13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // assert(t==0); 13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Instantiate an alias 13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = getInstance(s.toString(), FORWARD); 13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (t != null && canonID != null) { 13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setID(canonID); 13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return t; 14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a <code>Transliterator</code> object constructed from 14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the given rule string. This will be a RuleBasedTransliterator, 14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if the rule string contains only rules, or a 14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * CompoundTransliterator, if it contains ID blocks, or a 14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * NullTransliterator, if it contains ID blocks which parse as 14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * empty for the given direction. 14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Transliterator createFromRules(String ID, String rules, int dir) { 14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator t = null; 14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliteratorParser parser = new TransliteratorParser(); 14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert parser.parse(rules, dir); 14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: The logic here matches that in TransliteratorRegistry. 14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { 14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = new NullTransliterator(); 14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { 14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = new RuleBasedTransliterator(ID, parser.dataVector.get(0), parser.compoundFilter); 14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { 14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // idBlock, no data -- this is an alias. The ID has 14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // been munged from reverse into forward mode, if 14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // necessary, so instantiate the ID in the forward 14297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // direction. 14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (parser.compoundFilter != null) { 14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = getInstance(parser.compoundFilter.toPattern(false) + ";" 14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + parser.idBlockVector.get(0)); 14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 14347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = getInstance(parser.idBlockVector.get(0)); 14357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (t != null) { 14387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setID(ID); 14397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else { 14427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<Transliterator> transliterators = new ArrayList<Transliterator>(); 14437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int passNumber = 1; 14447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size()); 14467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < limit; i++) { 14477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i < parser.idBlockVector.size()) { 14487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String idBlock = parser.idBlockVector.get(i); 14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (idBlock.length() > 0) { 14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator temp = getInstance(idBlock); 14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!(temp instanceof NullTransliterator)) 14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterators.add(getInstance(idBlock)); 14537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i < parser.dataVector.size()) { 14567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Data data = parser.dataVector.get(i); 14577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transliterators.add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null)); 14587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t = new CompoundTransliterator(transliterators, passNumber - 1); 14627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setID(ID); 14637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (parser.compoundFilter != null) { 14647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert t.setFilter(parser.compoundFilter); 14657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return t; 14697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 14727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a rule string for this transliterator. 14737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param escapeUnprintable if true, then unprintable characters 14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will be converted to escape form backslash-'u' or 14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * backslash-'U'. 14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toRules(boolean escapeUnprintable) { 14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return baseToRules(escapeUnprintable); 14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a rule string for this transliterator. This is 14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a non-overrideable base class implementation that subclasses 14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * may call. It simply munges the ID into the correct format, 14869e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * that is, "foo" => "::foo". 14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param escapeUnprintable if true, then unprintable characters 14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * will be converted to escape form backslash-'u' or 14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * backslash-'U'. 14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected final String baseToRules(boolean escapeUnprintable) { 14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The base class implementation of toRules munges the ID into 14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the correct format. That is: foo => ::foo 14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // KEEP in sync with rbt_pars 14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (escapeUnprintable) { 14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer rulesSource = new StringBuffer(); 14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String id = getID(); 14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<id.length();) { 15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = UTF16.charAt(id, i); 15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!Utility.escapeUnprintable(rulesSource, c)) { 15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.append(rulesSource, c); 15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i += UTF16.getCharCount(c); 15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rulesSource.insert(0, "::"); 15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert rulesSource.append(ID_DELIM); 15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return rulesSource.toString(); 15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return "::" + getID() + ID_DELIM; 15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return the elements that make up this transliterator. For 15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * were created, the return value of this method would be an array 15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of the three transliterator objects that make up that 15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator: [NFD, Jamo-Latin, Latin-Greek]. 15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>If this transliterator is not composed of other 15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators, then this method will return an array of 15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * length one containing a reference to this transliterator. 15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return an array of one or more transliterators that make up 15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this transliterator 15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 3.0 15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Transliterator[] getElements() { 15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator result[]; 15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (this instanceof CompoundTransliterator) { 15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CompoundTransliterator cpd = (CompoundTransliterator) this; 15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = new Transliterator[cpd.getCount()]; 15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<result.length; ++i) { 15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result[i] = cpd.getTransliterator(i); 15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = new Transliterator[] { this }; 15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the set of all characters that may be modified in the 15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * input text by this Transliterator. This incorporates this 15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * object's current filter; if the filter is changed, the return 15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * value of this function will change. The default implementation 15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returns an empty set. Some subclasses may override {@link 15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * #handleGetSourceSet} to return a more precise result. The 15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * return result is approximate in any case and is intended for 15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * use by tests, tools, or utilities. 15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTargetSet 15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #handleGetSourceSet 15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.2 15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final UnicodeSet getSourceSet() { 15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet result = new UnicodeSet(); 15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert addSourceTargetSet(getFilterAsUnicodeSet(UnicodeSet.ALL_CODE_POINTS), result, new UnicodeSet()); 15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Framework method that returns the set of all characters that 15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * may be modified in the input text by this Transliterator, 15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ignoring the effect of this object's filter. The base class 15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * implementation returns the empty set. Subclasses that wish to 15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * implement this should override this method. 15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the set of characters that this transliterator may 15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * modify. The set may be modified, so subclasses should return a 15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * newly-created object. 15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getSourceSet 15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTargetSet 15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.2 15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected UnicodeSet handleGetSourceSet() { 15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new UnicodeSet(); 15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the set of all characters that may be generated as 15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacement text by this transliterator. The default 15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * implementation returns the empty set. Some subclasses may 15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * override this method to return a more precise result. The 15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * return result is approximate in any case and is intended for 15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * use by tests, tools, or utilities requiring such 15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * meta-information. 15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Warning. You might expect an empty filter to always produce an empty target. 15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * However, consider the following: 15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 15889e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * [Pp]{}[\u03A3\u03C2\u03C3\u03F7\u03F8\u03FA\u03FB] > \'; 15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * With a filter of [], you still get some elements in the target set, because this rule will still match. It could 15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be recast to the following if it were important. 15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 15939e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * [Pp]{([\u03A3\u03C2\u03C3\u03F7\u03F8\u03FA\u03FB])} > \' | $1; 15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 15957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTargetSet 15967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.2 15977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 15987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UnicodeSet getTargetSet() { 15997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet result = new UnicodeSet(); 16007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert addSourceTargetSet(getFilterAsUnicodeSet(UnicodeSet.ALL_CODE_POINTS), new UnicodeSet(), result); 16017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 16027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 16047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 16057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the set of all characters that may be generated as 16067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacement text by this transliterator, filtered by BOTH the input filter, and the current getFilter(). 16077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>SHOULD BE OVERRIDEN BY SUBCLASSES. 16087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It is probably an error for any transliterator to NOT override this, but we can't force them to 16097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for backwards compatibility. 16107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Other methods vector through this. 16117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>When gathering the information on source and target, the compound transliterator makes things complicated. 16127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, suppose we have: 16137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 16147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Global FILTER = [ax] 16159e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * a > b; 16167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * :: NULL; 16179e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * b > c; 16189e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * x > d; 16197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 16207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * While the filter just allows a and x, b is an intermediate result, which could produce c. So the source and target sets 16217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * cannot be gathered independently. What we have to do is filter the sources for the first transliterator according to 16227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the global filter, intersect that transliterator's filter. Based on that we get the target. 16237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The next transliterator gets as a global filter (global + last target). And so on. 16247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>There is another complication: 16257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 16267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Global FILTER = [ax] 16279e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * a >|b; 16289e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * b >c; 16297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 16307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Even though b would be filtered from the input, whenever we have a backup, it could be part of the input. So ideally we will 16317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * change the global filter as we go. 16327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param targetSet TODO 16337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTargetSet 16347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 16357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 16367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 16377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 16387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 16397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 16407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet temp = new UnicodeSet(handleGetSourceSet()).retainAll(myFilter); 16417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // use old method, if we don't have anything better 16427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceSet.addAll(temp); 16437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // clumsy guess with target 16447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String s : temp) { 16457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String t = transliterate(s); 16467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!s.equals(t)) { 16477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert targetSet.addAll(t); 16487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 16527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 16532d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * Returns the intersectionof this instance's filter intersected with an external filter. 16547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The externalFilter must be frozen (it is frozen if not). 16557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The result may be frozen, so don't attempt to modify. 16567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 16577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 16587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 16597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 16607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO change to getMergedFilter 16617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UnicodeSet getFilterAsUnicodeSet(UnicodeSet externalFilter) { 16627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (filter == null) { 16637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return externalFilter; 16647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet filterSet = new UnicodeSet(externalFilter); 16667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Most, but not all filters will be UnicodeSets. Optimize for 16677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the high-runner case. 16687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet temp; 16697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 16707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert temp = filter; 16717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (ClassCastException e) { 16727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert filter.addMatchSetTo(temp = new UnicodeSet()); 16737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return filterSet.retainAll(temp).freeze(); 16757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 16777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 16787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns this transliterator's inverse. See the class 16797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * documentation for details. This implementation simply inverts 16807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the two entities in the ID and attempts to retrieve the 16817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * resulting transliterator. That is, if <code>getID()</code> 16827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returns "A-B", then this method will return the result of 16837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>getInstance("B-A")</code>, or <code>null</code> if that 16847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * call fails. 16857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 16867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Subclasses with knowledge of their inverse may wish to 16877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * override this method. 16887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 16897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return a transliterator that is an inverse, not necessarily 16907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * exact, of this transliterator, or <code>null</code> if no such 16917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator is registered. 16927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #registerClass 16937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 16947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 16957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final Transliterator getInverse() { 16967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return getInstance(ID, REVERSE); 16977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 16987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 16997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Registers a subclass of <code>Transliterator</code> with the 17017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * system. This subclass must have a public constructor taking no 17027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * arguments. When that constructor is called, the resulting 17037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * object must return the <code>ID</code> passed to this method if 17047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * its <code>getID()</code> method is called. 17057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID the result of <code>getID()</code> for this 17077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterator 17087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param transClass a subclass of <code>Transliterator</code> 17097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #unregister 17107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 17117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 17127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void registerClass(String ID, Class<? extends Transliterator> transClass, String displayName) { 17137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(ID, transClass, true); 17147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (displayName != null) { 17157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert displayNameCache.put(new CaseInsensitiveString(ID), displayName); 17167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 17197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Register a factory object with the given ID. The factory 17217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * method should return a new instance of the given transliterator. 17222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Because ICU may choose to cache Transliterator objects internally, this must 17247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be called at application startup, prior to any calls to 17257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.getInstance to avoid undefined behavior. 17262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID the ID of this transliterator 17287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param factory the factory object 17297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 17307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 17317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void registerFactory(String ID, Factory factory) { 17327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(ID, factory, true); 17337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 17357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Register a Transliterator object with the given ID. 17372d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Because ICU may choose to cache Transliterator objects internally, this must 17397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be called at application startup, prior to any calls to 17407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.getInstance to avoid undefined behavior. 17412d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trans the Transliterator object 17437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.2 17447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 17457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void registerInstance(Transliterator trans) { 17467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(trans.getID(), trans, true); 17477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 17497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17509e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * Register a Transliterator object. 17512d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Because ICU may choose to cache Transliterator objects internally, this must 17537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be called at application startup, prior to any calls to 17547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.getInstance to avoid undefined behavior. 17552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trans the Transliterator object 17577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 17587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void registerInstance(Transliterator trans, boolean visible) { 17597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(trans.getID(), trans, visible); 17607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 17627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Register an ID as an alias of another ID. Instantiating 17647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * alias ID produces the same result as instantiating the original ID. 17657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This is generally used to create short aliases of compound IDs. 17662d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Because ICU may choose to cache Transliterator objects internally, this must 17687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be called at application startup, prior to any calls to 17697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.getInstance to avoid undefined behavior. 17702d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 17717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param aliasID The new ID being registered. 17727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param realID The existing ID that the new ID should be an alias of. 17737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 3.6 17747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 17757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void registerAlias(String aliasID, String realID) { 17767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(aliasID, realID, true); 17777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 17787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 17797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 17807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Register two targets as being inverses of one another. For 17817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * example, calling registerSpecialInverse("NFC", "NFD", true) causes 17827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator to form the following inverse relationships: 17837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17849e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * <pre>NFC => NFD 17859e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * Any-NFC => Any-NFD 17869e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * NFD => NFC 17879e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * Any-NFD => Any-NFC</pre> 17887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (Without the special inverse registration, the inverse of NFC 17907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but 17917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that the presence or absence of "Any-" is preserved. 17927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>The relationship is symmetrical; registering (a, b) is 17947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * equivalent to registering (b, a). 17957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>The relevant IDs must still be registered separately as 17977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * factories or classes. 17987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 17997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Only the targets are specified. Special inverses always 18009e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * have the form Any-Target1 <=> Any-Target2. The target should 18017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * have canonical casing (the casing desired to be produced when 18027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * an inverse is formed) and should contain no whitespace or other 18037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * extraneous characters. 18047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 18057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param target the target against which to register the inverse 18067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param inverseTarget the inverse of target, that is 18079e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * Any-target.getInverse() => Any-inverseTarget 18087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param bidirectional if true, register the reverse relation 18099e281ba4837cba4a1cf9523d6f8b0621b150063dScott Russell * as well, that is, Any-inverseTarget.getInverse() => Any-target 18107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void registerSpecialInverse(String target, 18127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String inverseTarget, 18137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean bidirectional) { 18147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TransliteratorIDParser.registerSpecialInverse(target, inverseTarget, bidirectional); 18157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 18167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 18187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unregisters a transliterator or class. This may be either 18197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a system transliterator or a user transliterator or class. 18207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 18217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ID the ID of the transliterator or class 18227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #registerClass 18237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 18247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void unregister(String ID) { 18267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert displayNameCache.remove(new CaseInsensitiveString(ID)); 18277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.remove(ID); 18287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 18297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 18317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns an enumeration over the programmatic names of registered 18327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>Transliterator</code> objects. This includes both system 18337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators and user transliterators registered using 18347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>registerClass()</code>. The enumerated names may be 18357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * passed to <code>getInstance()</code>. 18367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 18377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return An <code>Enumeration</code> over <code>String</code> objects 18387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getInstance 18397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #registerClass 18407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 18417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Enumeration<String> getAvailableIDs() { 18437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return registry.getAvailableIDs(); 18447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 18457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 18477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns an enumeration over the source names of registered 18487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators. Source names may be passed to 18497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * getAvailableTargets() to obtain available targets for each 18507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * source. 18517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 18527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Enumeration<String> getAvailableSources() { 18547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return registry.getAvailableSources(); 18557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 18567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 18587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns an enumeration over the target names of registered 18597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators having a given source name. Target names may 18607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be passed to getAvailableVariants() to obtain available 18617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * variants for each source and target pair. 18627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 18637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Enumeration<String> getAvailableTargets(String source) { 18657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return registry.getAvailableTargets(source); 18667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 18677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 18697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns an enumeration over the variant names of registered 18707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transliterators having a given source name and target name. 18717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 18727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 18737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final Enumeration<String> getAvailableVariants(String source, 18747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String target) { 18757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return registry.getAvailableVariants(source, target); 18767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1877f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert private static final String ROOT = "root", 18787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RB_RULE_BASED_IDS ="RuleBasedTransliteratorIDs"; 18797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static { 18807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry = new TransliteratorRegistry(); 18817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 18827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The display name cache starts out empty 18837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert displayNameCache = Collections.synchronizedMap(new HashMap<CaseInsensitiveString, String>()); 18847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* The following code parses the index table located in 18857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * icu/data/translit/root.txt. The index is an n x 4 table 18867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that follows this format: 18877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <id>{ 18887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * file{ 18897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * resource{"<resource>"} 18907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * direction{"<direction>"} 18917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 18927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 18937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <id>{ 18947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * internal{ 18957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * resource{"<resource>"} 18967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * direction{"<direction"} 18977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 18987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 18997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <id>{ 19007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * alias{"<getInstanceArg"} 19017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 19027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <id> is the ID of the system transliterator being defined. These 19037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are public IDs enumerated by Transliterator.getAvailableIDs(), 19047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * unless the second field is "internal". 19052d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 19067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <resource> is a ResourceReader resource name. Currently these refer 19077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to file names under com/ibm/text/resources. This string is passed 19087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * directly to ResourceReader, together with <encoding>. 19092d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 19107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <direction> is either "FORWARD" or "REVERSE". 19112d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert * 19127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <getInstanceArg> is a string to be passed directly to 19137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transliterator.getInstance(). The returned Transliterator object 19147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * then has its ID changed to <id> and is returned. 19157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 19167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The extra blank field on "alias" lines is to make the array square. 19177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 19187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UResourceBundle bundle, transIDs, colBund; 19192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert bundle = UResourceBundle.getBundleInstance(ICUData.ICU_TRANSLIT_BASE_NAME, ROOT); 19207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert transIDs = bundle.get(RB_RULE_BASED_IDS); 19217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 19227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int row, maxRows; 19237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maxRows = transIDs.getSize(); 19247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (row = 0; row < maxRows; row++) { 19257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert colBund = transIDs.get(row); 19267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String ID = colBund.getKey(); 192787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (ID.indexOf("-t-") >= 0) { 192887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert continue; 192987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 19307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UResourceBundle res = colBund.get(0); 19317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String type = res.getKey(); 19327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (type.equals("file") || type.equals("internal")) { 19337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Rest of line is <resource>:<encoding>:<direction> 19347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // pos colon c2 19357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String resString = res.getString("resource"); 19367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int dir; 19377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String direction = res.getString("direction"); 19387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert switch (direction.charAt(0)) { 19397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 'F': 19407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dir = FORWARD; 19417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 19427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 'R': 19437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dir = REVERSE; 19447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 19457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert default: 19467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new RuntimeException("Can't parse direction: " + direction); 19477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 19487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(ID, 19497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert resString, // resource 19507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dir, 19517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert !type.equals("internal")); 19527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (type.equals("alias")) { 19537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //'alias'; row[2]=createInstance argument 19547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String resString = res.getString(); 19557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registry.put(ID, resString, true); 19567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 19577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Unknown type 19587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new RuntimeException("Unknow type: " + type); 19597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 19607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 19617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 19627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registerSpecialInverse(NullTransliterator.SHORT_ID, NullTransliterator.SHORT_ID, false); 19637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 19647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Register non-rule-based transliterators 19657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registerClass(NullTransliterator._ID, 19667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NullTransliterator.class, null); 19677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RemoveTransliterator.register(); 19687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert EscapeTransliterator.register(); 19697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnescapeTransliterator.register(); 19707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LowercaseTransliterator.register(); 19717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UppercaseTransliterator.register(); 19727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TitlecaseTransliterator.register(); 19737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CaseFoldTransliterator.register(); 19747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeNameTransliterator.register(); 19757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NameUnicodeTransliterator.register(); 19767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NormalizationTransliterator.register(); 19777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BreakTransliterator.register(); 19787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AnyTransliterator.register(); // do this last! 19797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 19802d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 19817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 19827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Register the script-based "Any" transliterators: Any-Latin, Any-Greek 19837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 19847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 19857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 19867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 19877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void registerAny() { 19887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AnyTransliterator.register(); 19897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 19907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 19917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 19927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The factory interface for transliterators. Transliterator 19937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclasses can register factory objects for IDs using the 19947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * registerFactory() method of Transliterator. When invoked, the 19957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * factory object will be passed the ID being instantiated. This 19967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * makes it possible to register one factory method to more than 19977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * one ID, or for a factory method to parameterize its result 19987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * based on the variant. 19997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 20007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 20017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static interface Factory { 20027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 20037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return a transliterator for the given ID. 20047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 20057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 20067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator getInstance(String ID); 20077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 20082d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 20097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 20107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implements StringTransform via this method. 20117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param source text to be transformed (eg lowercased) 20127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return result 20137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 3.8 20147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 20152d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 20167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String transform(String source) { 20177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return transliterate(source); 20187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 20197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 2020