1/* 2****************************************************************************** 3* Copyright (C) 1996-2011, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5****************************************************************************** 6*/ 7 8package com.ibm.icu.lang; 9 10import com.ibm.icu.impl.UCharacterName; 11import com.ibm.icu.impl.UCharacterNameChoice; 12import com.ibm.icu.util.ValueIterator; 13 14/** 15 * <p>Class enabling iteration of the codepoints and their names.</p> 16 * <p>Result of each iteration contains a valid codepoint that has valid 17 * name.</p> 18 * <p>See UCharacter.getNameIterator() for an example of use.</p> 19 * @author synwee 20 * @since release 2.1, March 5 2002 21 */ 22class UCharacterNameIterator implements ValueIterator 23{ 24 // public methods ---------------------------------------------------- 25 26 /** 27 * <p>Gets the next result for this iteration and returns 28 * true if we are not at the end of the iteration, false otherwise.</p> 29 * <p>If the return boolean is a false, the contents of elements will not 30 * be updated.</p> 31 * @param element for storing the result codepoint and name 32 * @return true if we are not at the end of the iteration, false otherwise. 33 * @see com.ibm.icu.util.ValueIterator.Element 34 */ 35 public boolean next(ValueIterator.Element element) 36 { 37 if (m_current_ >= m_limit_) { 38 return false; 39 } 40 41 if (m_choice_ == UCharacterNameChoice.UNICODE_CHAR_NAME || 42 m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME 43 ) { 44 int length = m_name_.getAlgorithmLength(); 45 if (m_algorithmIndex_ < length) { 46 while (m_algorithmIndex_ < length) { 47 // find the algorithm range that could contain m_current_ 48 if (m_algorithmIndex_ < 0 || 49 m_name_.getAlgorithmEnd(m_algorithmIndex_) < 50 m_current_) { 51 m_algorithmIndex_ ++; 52 } 53 else { 54 break; 55 } 56 } 57 58 if (m_algorithmIndex_ < length) { 59 // interleave the data-driven ones with the algorithmic ones 60 // iterate over all algorithmic ranges; assume that they are 61 // in ascending order 62 int start = m_name_.getAlgorithmStart(m_algorithmIndex_); 63 if (m_current_ < start) { 64 // this should get rid of those codepoints that are not 65 // in the algorithmic range 66 int end = start; 67 if (m_limit_ <= start) { 68 end = m_limit_; 69 } 70 if (!iterateGroup(element, end)) { 71 m_current_ ++; 72 return true; 73 } 74 } 75 /* 76 // "if (m_current_ >= m_limit_)" would not return true 77 // because it can never be reached due to: 78 // 1) It has already been checked earlier 79 // 2) When m_current_ is updated earlier, it returns true 80 // 3) No updates on m_limit_*/ 81 if (m_current_ >= m_limit_) { 82 // after iterateGroup fails, current codepoint may be 83 // greater than limit 84 return false; 85 } 86 87 element.integer = m_current_; 88 element.value = m_name_.getAlgorithmName(m_algorithmIndex_, 89 m_current_); 90 // reset the group index if we are in the algorithmic names 91 m_groupIndex_ = -1; 92 m_current_ ++; 93 return true; 94 } 95 } 96 } 97 // enumerate the character names after the last algorithmic range 98 if (!iterateGroup(element, m_limit_)) { 99 m_current_ ++; 100 return true; 101 } 102 else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 103 if (!iterateExtended(element, m_limit_)) { 104 m_current_ ++; 105 return true; 106 } 107 } 108 109 return false; 110 } 111 112 /** 113 * <p>Resets the iterator to start iterating from the integer index 114 * UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously. 115 * </p> 116 */ 117 public void reset() 118 { 119 m_current_ = m_start_; 120 m_groupIndex_ = -1; 121 m_algorithmIndex_ = -1; 122 } 123 124 /** 125 * <p>Restricts the range of integers to iterate and resets the iteration 126 * to begin at the index argument start.</p> 127 * <p>If setRange(start, end) is not performed before next(element) is 128 * called, the iteration will start from the integer index 129 * UCharacter.MIN_VALUE and end at UCharacter.MAX_VALUE.</p> 130 * <p> 131 * If this range is set outside the range of UCharacter.MIN_VALUE and 132 * UCharacter.MAX_VALUE, next(element) will always return false. 133 * </p> 134 * @param start first integer in range to iterate 135 * @param limit 1 integer after the last integer in range 136 * @exception IllegalArgumentException thrown when attempting to set an 137 * illegal range. E.g limit <= start 138 */ 139 public void setRange(int start, int limit) 140 { 141 if (start >= limit) { 142 throw new IllegalArgumentException( 143 "start or limit has to be valid Unicode codepoints and start < limit"); 144 } 145 if (start < UCharacter.MIN_VALUE) { 146 m_start_ = UCharacter.MIN_VALUE; 147 } 148 else { 149 m_start_ = start; 150 } 151 152 if (limit > UCharacter.MAX_VALUE + 1) { 153 m_limit_ = UCharacter.MAX_VALUE + 1; 154 } 155 else { 156 m_limit_ = limit; 157 } 158 m_current_ = m_start_; 159 } 160 161 // protected constructor --------------------------------------------- 162 163 /** 164 * Constructor 165 * @param name name data 166 * @param choice name choice from the class 167 * com.ibm.icu.lang.UCharacterNameChoice 168 */ 169 protected UCharacterNameIterator(UCharacterName name, int choice) 170 { 171 if(name==null){ 172 throw new IllegalArgumentException("UCharacterName name argument cannot be null. Missing unames.icu?"); 173 } 174 m_name_ = name; 175 // no explicit choice in UCharacter so no checks on choice 176 m_choice_ = choice; 177 m_start_ = UCharacter.MIN_VALUE; 178 m_limit_ = UCharacter.MAX_VALUE + 1; 179 m_current_ = m_start_; 180 } 181 182 // private data members --------------------------------------------- 183 184 /** 185 * Name data 186 */ 187 private UCharacterName m_name_; 188 /** 189 * Name choice 190 */ 191 private int m_choice_; 192 /** 193 * Start iteration range 194 */ 195 private int m_start_; 196 /** 197 * End + 1 iteration range 198 */ 199 private int m_limit_; 200 /** 201 * Current codepoint 202 */ 203 private int m_current_; 204 /** 205 * Group index 206 */ 207 private int m_groupIndex_ = -1; 208 /** 209 * Algorithm index 210 */ 211 private int m_algorithmIndex_ = -1; 212 /** 213 * Group use 214 */ 215 private static char GROUP_OFFSETS_[] = 216 new char[UCharacterName.LINES_PER_GROUP_ + 1]; 217 private static char GROUP_LENGTHS_[] = 218 new char[UCharacterName.LINES_PER_GROUP_ + 1]; 219 220 // private methods -------------------------------------------------- 221 222 /** 223 * Group name iteration, iterate all the names in the current 32-group and 224 * returns the first codepoint that has a valid name. 225 * @param result stores the result codepoint and name 226 * @param limit last codepoint + 1 in range to search 227 * @return false if a codepoint with a name is found in group and we can 228 * bail from further iteration, true to continue on with the 229 * iteration 230 */ 231 private boolean iterateSingleGroup(ValueIterator.Element result, int limit) 232 { 233 synchronized(GROUP_OFFSETS_) { 234 synchronized(GROUP_LENGTHS_) { 235 int index = m_name_.getGroupLengths(m_groupIndex_, GROUP_OFFSETS_, 236 GROUP_LENGTHS_); 237 while (m_current_ < limit) { 238 int offset = UCharacterName.getGroupOffset(m_current_); 239 String name = m_name_.getGroupName( 240 index + GROUP_OFFSETS_[offset], 241 GROUP_LENGTHS_[offset], m_choice_); 242 if ((name == null || name.length() == 0) && 243 m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 244 name = m_name_.getExtendedName(m_current_); 245 } 246 if (name != null && name.length() > 0) { 247 result.integer = m_current_; 248 result.value = name; 249 return false; 250 } 251 ++ m_current_; 252 } 253 } 254 } 255 return true; 256 } 257 258 /** 259 * Group name iteration, iterate all the names in the current 32-group and 260 * returns the first codepoint that has a valid name. 261 * @param result stores the result codepoint and name 262 * @param limit last codepoint + 1 in range to search 263 * @return false if a codepoint with a name is found in group and we can 264 * bail from further iteration, true to continue on with the 265 * iteration 266 */ 267 private boolean iterateGroup(ValueIterator.Element result, int limit) 268 { 269 if (m_groupIndex_ < 0) { 270 m_groupIndex_ = m_name_.getGroup(m_current_); 271 } 272 273 while (m_groupIndex_ < m_name_.m_groupcount_ && 274 m_current_ < limit) { 275 // iterate till the last group or the last codepoint 276 int startMSB = UCharacterName.getCodepointMSB(m_current_); 277 int gMSB = m_name_.getGroupMSB(m_groupIndex_); // can be -1 278 if (startMSB == gMSB) { 279 if (startMSB == UCharacterName.getCodepointMSB(limit - 1)) { 280 // if start and limit - 1 are in the same group, then enumerate 281 // only in that one 282 return iterateSingleGroup(result, limit); 283 } 284 // enumerate characters in the partial start group 285 // if (m_name_.getGroupOffset(m_current_) != 0) { 286 if (!iterateSingleGroup(result, 287 UCharacterName.getGroupLimit(gMSB))) { 288 return false; 289 } 290 ++ m_groupIndex_; // continue with the next group 291 } 292 else if (startMSB > gMSB) { 293 // make sure that we start enumerating with the first group 294 // after start 295 m_groupIndex_ ++; 296 } 297 else { 298 int gMIN = UCharacterName.getGroupMin(gMSB); 299 if (gMIN > limit) { 300 gMIN = limit; 301 } 302 if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) { 303 if (!iterateExtended(result, gMIN)) { 304 return false; 305 } 306 } 307 m_current_ = gMIN; 308 } 309 } 310 311 return true; 312 } 313 314 /** 315 * Iterate extended names. 316 * @param result stores the result codepoint and name 317 * @param limit last codepoint + 1 in range to search 318 * @return false if a codepoint with a name is found and we can 319 * bail from further iteration, true to continue on with the 320 * iteration (this will always be false for valid codepoints) 321 */ 322 private boolean iterateExtended(ValueIterator.Element result, 323 int limit) 324 { 325 while (m_current_ < limit) { 326 String name = m_name_.getExtendedOr10Name(m_current_); 327 if (name != null && name.length() > 0) { 328 result.integer = m_current_; 329 result.value = name; 330 return false; 331 } 332 ++ m_current_; 333 } 334 return true; 335 } 336} 337