12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others. All Rights Reserved. 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* UTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTF-16 collation element and character iterator. 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Handles normalized UTF-16 text, with length or NUL-terminated. 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Unnormalized text is handled by a subclass. 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class UTF16CollationIterator extends CollationIterator { 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}. 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UTF16CollationIterator(CollationData d) { 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(d); 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UTF16CollationIterator(CollationData d, boolean numeric, CharSequence s, int p) { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(d, numeric); 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert seq = s; 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = 0; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos = p; 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit = s.length(); 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean equals(Object other) { 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!super.equals(other)) { return false; } 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16CollationIterator o = (UTF16CollationIterator)other; 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compare the iterator state but not the text: Assume that the caller does that. 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (pos - start) == (o.pos - o.start); 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int hashCode() { 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert false : "hashCode not designed"; 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 42; // any arbitrary constant will do 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void resetToOffset(int newOffset) { 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert reset(); 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos = start + newOffset; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getOffset() { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos - start; 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setText(boolean numeric, CharSequence s, int p) { 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert reset(numeric); 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert seq = s; 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = 0; 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos = p; 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit = s.length(); 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int nextCodePoint() { 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos == limit) { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.SENTINEL_CP; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = seq.charAt(pos++); 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char trail; 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isHighSurrogate(c) && pos != limit && 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Character.isLowSurrogate(trail = seq.charAt(pos))) { 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Character.toCodePoint(c, trail); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return c; 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int previousCodePoint() { 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos == start) { 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Collation.SENTINEL_CP; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = seq.charAt(--pos); 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char lead; 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isLowSurrogate(c) && pos != start && 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Character.isHighSurrogate(lead = seq.charAt(pos - 1))) { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --pos; 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Character.toCodePoint(lead, c); 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return c; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected long handleNextCE32() { 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos == limit) { 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return NO_CP_AND_CE32; 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = seq.charAt(pos++); 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c)); 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected char handleGetTrailSurrogate() { 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos == limit) { return 0; } 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char trail; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isLowSurrogate(trail = seq.charAt(pos))) { ++pos; } 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return trail; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* boolean foundNULTerminator(); */ 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void forwardNumCodePoints(int num) { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(num > 0 && pos != limit) { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = seq.charAt(pos++); 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --num; 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isHighSurrogate(c) && pos != limit && 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Character.isLowSurrogate(seq.charAt(pos))) { 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void backwardNumCodePoints(int num) { 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(num > 0 && pos != start) { 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = seq.charAt(--pos); 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --num; 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(Character.isLowSurrogate(c) && pos != start && 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Character.isHighSurrogate(seq.charAt(pos-1))) { 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --pos; 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected CharSequence seq; 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int start; 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int pos; 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int limit; 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 150