17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2013, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CharacterIteration { 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // disallow instantiation 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private CharacterIteration() { } 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 32 bit Char value returned from when an iterator has run out of range. 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Positive value so fast case (not end, not surrogate) can be checked 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // with a single test. 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int DONE32 = 0x7fffffff; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Move the iterator forward to the next code point, and return that code point, 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * leaving the iterator positioned at char returned. 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For Supplementary chars, the iterator is left positioned at the lead surrogate. 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param ci The character iterator 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The next code point. 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int next32(CharacterIterator ci) { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If the current position is at a surrogate pair, move to the trail surrogate 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which leaves it in position for underlying iterator's next() to work. 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c= ci.current(); 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) { 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = ci.next(); 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = ci.previous(); 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // For BMP chars, this next() is the real deal. 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = ci.next(); 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If we might have a lead surrogate, we need to peak ahead to get the trail 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // even though we don't want to really be positioned there. 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = nextTrail32(ci, c); 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) { 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We got a supplementary char. Back the iterator up to the postion 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // of the lead surrogate. 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.previous(); 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return c; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Out-of-line portion of the in-line Next32 code. 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The call site does an initial ci.next() and calls this function 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE. 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: we leave the underlying char iterator positioned in the 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // middle of a surrogate pair. ci.next() will work correctly 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // from there, but the ci.getIndex() will be wrong, and needs 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // adjustment. 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int nextTrail32(CharacterIterator ci, int lead) { 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) { 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return DONE32; 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int retVal = lead; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char cTrail = ci.next(); 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.isTrailSurrogate(cTrail)) { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.SUPPLEMENTARY_MIN_VALUE; 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.previous(); 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return retVal; 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int previous32(CharacterIterator ci) { 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ci.getIndex() <= ci.getBeginIndex()) { 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return DONE32; 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char trail = ci.previous(); 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int retVal = trail; 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) { 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char lead = ci.previous(); 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.isLeadSurrogate(lead)) { 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.SUPPLEMENTARY_MIN_VALUE; 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.next(); 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return retVal; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int current32(CharacterIterator ci) { 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char lead = ci.current(); 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int retVal = lead; 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return retVal; 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.isLeadSurrogate(lead)) { 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int trail = (int)ci.next(); 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.previous(); 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.isTrailSurrogate((char)trail)) { 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.SUPPLEMENTARY_MIN_VALUE; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (lead == CharacterIterator.DONE) { 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ci.getIndex() >= ci.getEndIndex()) { 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert retVal = DONE32; 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return retVal; 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 126