/** ******************************************************************************* * Copyright (C) 1996-2005, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * ******************************************************************************* */ package com.ibm.icu4jni.text; import java.util.Locale; import java.text.CharacterIterator; import java.text.ParseException; import com.ibm.icu4jni.common.ErrorCode; /** * Concrete implementation class for Collation. *
* The collation table is composed of a list of collation rules, where each * rule is of three forms: *
* < modifier > * < relation > < text-argument > * < reset > < text-argument > **
* RuleBasedCollator
has the following restrictions for efficiency
* (other subclasses may be used for more complex languages) :
*
b c
is
* treated as bc
.
* '@' : Indicates that accents are sorted backwards, as in French. *
'&' : Indicates that the next rule follows the position to where * the reset text-argument would be sorted. *
* This sounds more complicated than it is in practice. For example, the * following are equivalent ways of expressing the same thing: *
** Notice that the order is important, as the subsequent item goes immediately * after the text-argument. The following are not equivalent: ** a < b < c * a < b & b < c * a < c & a < b **
** Either the text-argument must already be present in the sequence, or some * initial substring of the text-argument must be present. (e.g. "a < b & ae < * e" is valid since "a" is present in the sequence before "ae" is reset). In * this latter case, "ae" is not entered and treated as a single character; * instead, "e" is sorted as if it were expanded to two characters: "a" * followed by an "e". This difference appears in natural languages: in * traditional Spanish "ch" is treated as though it contracts to a single * character (expressed as "c < ch < d"), while in traditional German a-umlaut * is treated as though it expanded to two characters (expressed as "a,A < b,B * ... & ae;? & AE;?"). [? and ? are, of course, the escape sequences for * a-umlaut.] ** a < b & a < c * a < c & a < b **
* Ignorable Characters *
* For ignorable characters, the first rule must start with a relation (the * examples we have used above are really fragments; "a < b" really should be * "< a < b"). If, however, the first relation is not "<", then all the all * text-arguments up to the first "<" are ignorable. For example, ", - < a < b" * makes "-" an ignorable character, as we saw earlier in the word * "black-birds". In the samples for different languages, you see that most * accents are ignorable. * *
Normalization and Accents *
* RuleBasedCollator
automatically processes its rule table to
* include both pre-composed and combining-character versions of accented
* characters. Even if the provided rule string contains only base characters
* and separate combining accent characters, the pre-composed accented
* characters matching all canonical combinations of characters from the rule
* string will be entered in the table.
*
* This allows you to use a RuleBasedCollator to compare accented strings even * when the collator is set to NO_DECOMPOSITION. However, if the strings to be * collated contain combining sequences that may not be in canonical order, you * should set the collator to CANONICAL_DECOMPOSITION to enable sorting of * combining sequences. * For more information, see * The Unicode Standard, Version 3.0.) * *
Errors *
* The following are errors: *
RuleBasedCollator
throws
* a ParseException
.
*
* Examples *
Simple: "< a < b < c < d" *
Norwegian: "< a,A< b,B< c,C< d,D< e,E< f,F< g,G< h,H< i,I< j,J * < k,K< l,L< m,M< n,N< o,O< p,P< q,Q< r,R< s,S< t,T * < u,U< v,V< w,W< x,X< y,Y< z,Z * < ?=a?,?=A? * ;aa,AA< ?,?< ?,?" * *
* Normally, to create a rule-based Collator object, you will use
* Collator
's factory method getInstance
.
* However, to create a rule-based Collator object with specialized rules
* tailored to your needs, you construct the RuleBasedCollator
* with the rules contained in a String
object. For example:
*
** Or: ** String Simple = "< a < b < c < d"; * RuleBasedCollator mySimple = new RuleBasedCollator(Simple); **
** ** String Norwegian = "< a,A< b,B< c,C< d,D< e,E< f,F< g,G< h,H< i,I< j,J" + * "< k,K< l,L< m,M< n,N< o,O< p,P< q,Q< r,R< s,S< t,T" + * "< u,U< v,V< w,W< x,X< y,Y< z,Z" + * "< ?=a?,?=A?" + * ";aa,AA< ?,?< ?,?"; * RuleBasedCollator myNorwegian = new RuleBasedCollator(Norwegian); **
* Combining Collator
s is as simple as concatenating strings.
* Here's an example that combines two Collator
s from two
* different locales:
*
** ** // Create an en_US Collator object * RuleBasedCollator en_USCollator = (RuleBasedCollator) * Collator.getInstance(new Locale("en", "US", "")); * // Create a da_DK Collator object * RuleBasedCollator da_DKCollator = (RuleBasedCollator) * Collator.getInstance(new Locale("da", "DK", "")); * // Combine the two * // First, get the collation rules from en_USCollator * String en_USRules = en_USCollator.getRules(); * // Second, get the collation rules from da_DKCollator * String da_DKRules = da_DKCollator.getRules(); * RuleBasedCollator newCollator = * new RuleBasedCollator(en_USRules + da_DKRules); * // newCollator has the combined rules **
* Another more interesting example would be to make changes on an existing
* table to create a new Collator
object. For example, add
* "& C < ch, cH, Ch, CH" to the en_USCollator
object to create
* your own:
*
** ** // Create a new Collator object with additional rules * String addRules = "& C < ch, cH, Ch, CH"; * RuleBasedCollator myCollator = * new RuleBasedCollator(en_USCollator + addRules); * // myCollator contains the new rules **
* The following example demonstrates how to change the order of * non-spacing accents, *
** ** // old rule * String oldRules = "=?;?;?" // main accents Diaeresis 00A8, Macron 00AF * // Acute 00BF * + "< a , A ; ae, AE ; ? , ?" * + "< b , B < c, C < e, E & C < d, D"; * // change the order of accent characters * String addOn = "& ?;?;?;"; // Acute 00BF, Macron 00AF, Diaeresis 00A8 * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn); **
* The last example shows how to put new primary ordering in before the
* default setting. For example, in Japanese Collator
, you
* can either sort English characters before or after Japanese characters,
*
*** // get en_US Collator rules * RuleBasedCollator en_USCollator = * (RuleBasedCollator)Collator.getInstance(Locale.US); * // add a few Japanese character to sort before English characters * // suppose the last character before the first base letter 'a' in * // the English collation rule is ? * String jaString = "& \\u30A2 , \\u30FC < \\u30C8"; * RuleBasedCollator myJapaneseCollator = new * RuleBasedCollator(en_USCollator.getRules() + jaString); **
* @author syn wee quek * @stable ICU 2.4 */ public final class RuleBasedCollator extends Collator { // public constructors ------------------------------------------ /** * RuleBasedCollator constructor. This takes the table rules and builds a * collation table out of them. Please see RuleBasedCollator class * description for more details on the collation rule syntax. * @param rules the collation rules to build the collation table from. * @exception ParseException thrown if rules are empty or a Runtime error * if collator can not be created. * @stable ICU 2.4 */ public RuleBasedCollator(String rules) throws ParseException { // BEGIN android-changed if (rules == null) { throw new NullPointerException(); } // if (rules.length() == 0) // throw new ParseException("Build rules empty.", 0); // END android-changed m_collator_ = NativeCollation.openCollatorFromRules(rules, CollationAttribute.VALUE_OFF, CollationAttribute.VALUE_DEFAULT_STRENGTH); } /** * RuleBasedCollator constructor. This takes the table rules and builds a * collation table out of them. Please see RuleBasedCollator class * description for more details on the collation rule syntax. * @param rules the collation rules to build the collation table from. * @param strength collation strength * @exception ParseException thrown if rules are empty or a Runtime error * if collator can not be created. * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY * @see #QUATERNARY * @see #IDENTICAL * @stable ICU 2.4 */ public RuleBasedCollator(String rules, int strength) throws ParseException { // BEGIN android-changed if (rules == null) { throw new NullPointerException(); } // if (rules.length() == 0) // throw new ParseException("Build rules empty.", 0); // END android-changed if (!CollationAttribute.checkStrength(strength)) throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR); m_collator_ = NativeCollation.openCollatorFromRules(rules, CollationAttribute.VALUE_OFF, strength); } /** * RuleBasedCollator constructor. This takes the table rules and builds a * collation table out of them. Please see RuleBasedCollator class * description for more details on the collation rule syntax. *
Note API change starting from release 2.4. Prior to release 2.4, the * normalizationmode argument values are from the class * com.ibm.icu4jni.text.Normalization. In 2.4, * the valid normalizationmode arguments for this API are * CollationAttribute.VALUE_ON and CollationAttribute.VALUE_OFF. *
* @param rules the collation rules to build the collation table from. * @param strength collation strength * @param normalizationmode normalization mode * @exception IllegalArgumentException thrown when constructor error occurs * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY * @see #QUATERNARY * @see #IDENTICAL * @see #CANONICAL_DECOMPOSITION * @see #NO_DECOMPOSITION * @stable ICU 2.4 */ public RuleBasedCollator(String rules, int normalizationmode, int strength) { // BEGIN android-added if (rules == null) { throw new NullPointerException(); } // END android-added if (!CollationAttribute.checkStrength(strength) || !CollationAttribute.checkNormalization(normalizationmode)) { throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR); } m_collator_ = NativeCollation.openCollatorFromRules(rules, normalizationmode, strength); } // public methods ----------------------------------------------- /** * Makes a complete copy of the current object. * @return a copy of this object if data clone is a success, otherwise null * @stable ICU 2.4 */ public Object clone() { RuleBasedCollator result = null; int collatoraddress = NativeCollation.safeClone(m_collator_); result = new RuleBasedCollator(collatoraddress); return (Collator)result; } /** * The comparison function compares the character data stored in two * different strings. Returns information about whether a string is less * than, greater than or equal to another string. *Example of use:
*
*
* Collator myCollation = Collator.createInstance(Locale::US);
* myCollation.setStrength(CollationAttribute.VALUE_PRIMARY);
* // result would be Collator.RESULT_EQUAL ("abc" == "ABC")
* // (no primary difference between "abc" and "ABC")
* int result = myCollation.compare("abc", "ABC",3);
* myCollation.setStrength(CollationAttribute.VALUE_TERTIARY);
* // result would be Collation::LESS (abc" <<< "ABC")
* // (with tertiary difference between "abc" and "ABC")
* int result = myCollation.compare("abc", "ABC",3);
*
* @param source The source string.
* @param target The target string.
* @return result of the comparison, Collator.RESULT_EQUAL,
* Collator.RESULT_GREATER or Collator.RESULT_LESS
* @stable ICU 2.4
*/
public int compare(String source, String target)
{
return NativeCollation.compare(m_collator_, source, target);
}
/**
* Get the normalization mode for this object.
* The normalization mode influences how strings are compared.
* @see #CANONICAL_DECOMPOSITION
* @see #NO_DECOMPOSITION
* @stable ICU 2.4
*/
public int getDecomposition()
{
return NativeCollation.getNormalization(m_collator_);
}
/**
*
Sets the decomposition mode of the Collator object on or off. * If the decomposition mode is set to on, string would be decomposed into * NFD format where necessary before sorting.
* * @param decompositionmode the new decomposition mode * @see #CANONICAL_DECOMPOSITION * @see #NO_DECOMPOSITION * @stable ICU 2.4 */ public void setDecomposition(int decompositionmode) { if (!CollationAttribute.checkNormalization(decompositionmode)) throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR); NativeCollation.setAttribute(m_collator_, CollationAttribute.NORMALIZATION_MODE, decompositionmode); } /** * Determines the minimum strength that will be use in comparison or * transformation. ** E.g. with strength == CollationAttribute.VALUE_SECONDARY, the tertiary difference * is ignored *
** E.g. with strength == PRIMARY, the secondary and tertiary difference are * ignored. *
* @return the current comparison level. * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY * @see #QUATERNARY * @see #IDENTICAL * @stable ICU 2.4 */ public int getStrength() { return NativeCollation.getAttribute(m_collator_, CollationAttribute.STRENGTH); } /** * Sets the minimum strength to be used in comparison or transformation. *Example of use:
*
*
* Collator myCollation = Collator.createInstance(Locale::US);
* myCollation.setStrength(PRIMARY);
* // result will be "abc" == "ABC"
* // tertiary differences will be ignored
* int result = myCollation->compare("abc", "ABC");
*
* @param strength the new comparison level.
* @exception IllegalArgumentException when argument does not belong to any collation strength
* mode or error occurs while setting data.
* @see #PRIMARY
* @see #SECONDARY
* @see #TERTIARY
* @see #QUATERNARY
* @see #IDENTICAL
* @stable ICU 2.4
*/
public void setStrength(int strength)
{
if (!CollationAttribute.checkStrength(strength))
throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR);
NativeCollation.setAttribute(m_collator_, CollationAttribute.STRENGTH,
strength);
}
/**
* Sets the attribute to be used in comparison or transformation.
*
Example of use:
*
*
* Collator myCollation = Collator.createInstance(Locale::US);
* myCollation.setAttribute(CollationAttribute.CASE_LEVEL,
* CollationAttribute.VALUE_ON);
* int result = myCollation->compare("\\u30C3\\u30CF",
* "\\u30C4\\u30CF");
* // result will be Collator.RESULT_LESS.
*
* @param type the attribute to be set from CollationAttribute
* @param value attribute value from CollationAttribute
* @stable ICU 2.4
*/
public void setAttribute(int type, int value)
{
if (!CollationAttribute.checkAttribute(type, value))
throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR);
NativeCollation.setAttribute(m_collator_, type, value);
}
/**
* Gets the attribute to be used in comparison or transformation.
* @param type the attribute to be set from CollationAttribute
* @return value attribute value from CollationAttribute
* @stable ICU 2.4
*/
public int getAttribute(int type)
{
if (!CollationAttribute.checkType(type))
throw ErrorCode.getException(ErrorCode.U_ILLEGAL_ARGUMENT_ERROR);
return NativeCollation.getAttribute(m_collator_, type);
}
/**
* Get the sort key as an CollationKey object from the argument string.
* To retrieve sort key in terms of byte arrays, use the method as below
*
*
* Collator collator = Collator.getInstance();
* byte[] array = collator.getSortKey(source);
*
* Byte array result are zero-terminated and can be compared using
* java.util.Arrays.equals();
* @param source string to be processed.
* @return the sort key
* @stable ICU 2.4
*/
public CollationKey getCollationKey(String source)
{
// BEGIN android-removed
// return new CollationKey(NativeCollation.getSortKey(m_collator_, source));
// END android-removed
// BEGIN android-added
if(source == null) {
return null;
}
byte[] key = NativeCollation.getSortKey(m_collator_, source);
if(key == null) {
return null;
}
return new CollationKey(key);
// END android-added
}
/**
* Get a sort key for the argument string
* Sort keys may be compared using java.util.Arrays.equals
* @param source string for key to be generated
* @return sort key
* @stable ICU 2.4
*/
public byte[] getSortKey(String source)
{
return NativeCollation.getSortKey(m_collator_, source);
}
/**
* Get the collation rules of this Collation object
* The rules will follow the rule syntax.
* @return collation rules.
* @stable ICU 2.4
*/
public String getRules()
{
return NativeCollation.getRules(m_collator_);
}
/**
* Create a CollationElementIterator object that will iterator over the
* elements in a string, using the collation rules defined in this
* RuleBasedCollator
* @param source string to iterate over
* @return address of C collationelement
* @exception IllegalArgumentException thrown when error occurs
* @stable ICU 2.4
*/
public CollationElementIterator getCollationElementIterator(String source)
{
CollationElementIterator result = new CollationElementIterator(
NativeCollation.getCollationElementIterator(m_collator_, source));
// result.setOwnCollationElementIterator(true);
return result;
}
// BEGIN android-added
/**
* Create a CollationElementIterator object that will iterator over the
* elements in a string, using the collation rules defined in this
* RuleBasedCollator
* @param source string to iterate over
* @return address of C collationelement
* @exception IllegalArgumentException thrown when error occurs
* @stable ICU 2.4
*/
public CollationElementIterator getCollationElementIterator(
CharacterIterator source)
{
CollationElementIterator result = new CollationElementIterator(
NativeCollation.getCollationElementIterator(m_collator_,
source.toString()));
// result.setOwnCollationElementIterator(true);
return result;
}
// END android-added
/**
* Returns a hash of this collation object
* Note this method is not complete, it only returns 0 at the moment.
* @return hash of this collation object
* @stable ICU 2.4
*/
public int hashCode()
{
// since rules do not change once it is created, we can cache the hash
if (m_hashcode_ == 0) {
m_hashcode_ = NativeCollation.hashCode(m_collator_);
if (m_hashcode_ == 0)
m_hashcode_ = 1;
}
return m_hashcode_;
}
/**
* Checks if argument object is equals to this object.
* @param target object
* @return true if source is equivalent to target, false otherwise
* @stable ICU 2.4
*/
public boolean equals(Object target)
{
if (this == target)
return true;
if (target == null)
return false;
if (getClass() != target.getClass())
return false;
RuleBasedCollator tgtcoll = (RuleBasedCollator)target;
return getRules().equals(tgtcoll.getRules()) &&
getStrength() == tgtcoll.getStrength() &&
getDecomposition() == tgtcoll.getDecomposition();
}
// package constructor ----------------------------------------
/**
* RuleBasedCollator default constructor. This constructor takes the default
* locale. The only caller of this class should be Collator.getInstance().
* Current implementation createInstance() returns a RuleBasedCollator(Locale)
* instance. The RuleBasedCollator will be created in the following order,
*