1/* 2******************************************************************************* 3* Copyright (C) 2013-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* collationrootelements.cpp 7* 8* created on: 2013mar05 9* created by: Markus W. Scherer 10*/ 11 12#include "unicode/utypes.h" 13 14#if !UCONFIG_NO_COLLATION 15 16#include "collation.h" 17#include "collationrootelements.h" 18#include "uassert.h" 19 20U_NAMESPACE_BEGIN 21 22int64_t 23CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { 24 if(p == 0) { return 0; } 25 U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); 26 int32_t index = findP(p); 27 uint32_t q = elements[index]; 28 uint32_t secTer; 29 if(p == (q & 0xffffff00)) { 30 // p == elements[index] is a root primary. Find the CE before it. 31 // We must not be in a primary range. 32 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 33 secTer = elements[index - 1]; 34 if((secTer & SEC_TER_DELTA_FLAG) == 0) { 35 // Primary CE just before p. 36 p = secTer & 0xffffff00; 37 secTer = Collation::COMMON_SEC_AND_TER_CE; 38 } else { 39 // secTer = last secondary & tertiary for the previous primary 40 index -= 2; 41 for(;;) { 42 p = elements[index]; 43 if((p & SEC_TER_DELTA_FLAG) == 0) { 44 p &= 0xffffff00; 45 break; 46 } 47 --index; 48 } 49 } 50 } else { 51 // p > elements[index] which is the previous primary. 52 // Find the last secondary & tertiary weights for it. 53 p = q & 0xffffff00; 54 secTer = Collation::COMMON_SEC_AND_TER_CE; 55 for(;;) { 56 q = elements[++index]; 57 if((q & SEC_TER_DELTA_FLAG) == 0) { 58 // We must not be in a primary range. 59 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 60 break; 61 } 62 secTer = q; 63 } 64 } 65 return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); 66} 67 68int64_t 69CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { 70 if(p == 0) { return 0; } 71 int32_t index = findP(p); 72 if(p != (elements[index] & 0xffffff00)) { 73 for(;;) { 74 p = elements[++index]; 75 if((p & SEC_TER_DELTA_FLAG) == 0) { 76 // First primary after p. We must not be in a primary range. 77 U_ASSERT((p & PRIMARY_STEP_MASK) == 0); 78 break; 79 } 80 } 81 } 82 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. 83 return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; 84} 85 86uint32_t 87CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { 88 int32_t index = findPrimary(p); 89 int32_t step; 90 uint32_t q = elements[index]; 91 if(p == (q & 0xffffff00)) { 92 // Found p itself. Return the previous primary. 93 // See if p is at the end of a previous range. 94 step = (int32_t)q & PRIMARY_STEP_MASK; 95 if(step == 0) { 96 // p is not at the end of a range. Look for the previous primary. 97 do { 98 p = elements[--index]; 99 } while((p & SEC_TER_DELTA_FLAG) != 0); 100 return p & 0xffffff00; 101 } 102 } else { 103 // p is in a range, and not at the start. 104 uint32_t nextElement = elements[index + 1]; 105 U_ASSERT(isEndOfPrimaryRange(nextElement)); 106 step = (int32_t)nextElement & PRIMARY_STEP_MASK; 107 } 108 // Return the previous range primary. 109 if((p & 0xffff) == 0) { 110 return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); 111 } else { 112 return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); 113 } 114} 115 116uint32_t 117CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { 118 int32_t index; 119 uint32_t previousSec, sec; 120 if(p == 0) { 121 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 122 // Gap at the beginning of the secondary CE range. 123 previousSec = 0; 124 sec = elements[index] >> 16; 125 } else { 126 index = findPrimary(p) + 1; 127 previousSec = Collation::MERGE_SEPARATOR_WEIGHT16; 128 sec = Collation::COMMON_WEIGHT16; 129 } 130 U_ASSERT(s >= sec); 131 while(s > sec) { 132 previousSec = sec; 133 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); 134 sec = elements[index++] >> 16; 135 } 136 U_ASSERT(sec == s); 137 return previousSec; 138} 139 140uint32_t 141CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { 142 U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); 143 int32_t index; 144 uint32_t previousTer, secTer; 145 if(p == 0) { 146 if(s == 0) { 147 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; 148 // Gap at the beginning of the tertiary CE range. 149 previousTer = 0; 150 } else { 151 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 152 previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; 153 } 154 secTer = elements[index] & ~SEC_TER_DELTA_FLAG; 155 } else { 156 index = findPrimary(p) + 1; 157 previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; 158 secTer = Collation::COMMON_SEC_AND_TER_CE; 159 } 160 uint32_t st = (s << 16) | t; 161 while(st > secTer) { 162 if((secTer >> 16) == s) { previousTer = secTer; } 163 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); 164 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; 165 } 166 U_ASSERT(secTer == st); 167 return previousTer & 0xffff; 168} 169 170uint32_t 171CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { 172 U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); 173 uint32_t q = elements[++index]; 174 int32_t step; 175 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { 176 // Return the next primary in this range. 177 if((p & 0xffff) == 0) { 178 return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); 179 } else { 180 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); 181 } 182 } else { 183 // Return the next primary in the list. 184 while((q & SEC_TER_DELTA_FLAG) != 0) { 185 q = elements[++index]; 186 } 187 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 188 return q; 189 } 190} 191 192uint32_t 193CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { 194 uint32_t secLimit; 195 if(index == 0) { 196 // primary = 0 197 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 198 // Gap at the end of the secondary CE range. 199 secLimit = 0x10000; 200 } else { 201 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); 202 ++index; 203 // Gap for secondaries of primary CEs. 204 secLimit = getSecondaryBoundary(); 205 } 206 for(;;) { 207 uint32_t secTer = elements[index]; 208 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } 209 uint32_t sec = secTer >> 16; 210 if(sec > s) { return sec; } 211 ++index; 212 } 213} 214 215uint32_t 216CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { 217 uint32_t terLimit; 218 if(index == 0) { 219 // primary = 0 220 if(s == 0) { 221 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; 222 // Gap at the end of the tertiary CE range. 223 terLimit = 0x4000; 224 } else { 225 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 226 // Gap for tertiaries of primary/secondary CEs. 227 terLimit = getTertiaryBoundary(); 228 } 229 } else { 230 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); 231 ++index; 232 terLimit = getTertiaryBoundary(); 233 } 234 uint32_t st = (s << 16) | t; 235 for(;;) { 236 uint32_t secTer = elements[index]; 237 // No tertiary greater than t for this primary+secondary. 238 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } 239 secTer &= ~SEC_TER_DELTA_FLAG; 240 if(secTer > st) { return secTer & 0xffff; } 241 ++index; 242 } 243} 244 245int32_t 246CollationRootElements::findPrimary(uint32_t p) const { 247 // Requirement: p must occur as a root primary. 248 U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary 249 int32_t index = findP(p); 250 // If p is in a range, then we just assume that p is an actual primary in this range. 251 // (Too cumbersome/expensive to check.) 252 // Otherwise, it must be an exact match. 253 U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); 254 return index; 255} 256 257int32_t 258CollationRootElements::findP(uint32_t p) const { 259 // p need not occur as a root primary. 260 // For example, it might be a reordering group boundary. 261 U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); 262 // modified binary search 263 int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; 264 U_ASSERT(p >= elements[start]); 265 int32_t limit = length - 1; 266 U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); 267 U_ASSERT(p < elements[limit]); 268 while((start + 1) < limit) { 269 // Invariant: elements[start] and elements[limit] are primaries, 270 // and elements[start]<=p<=elements[limit]. 271 int32_t i = (start + limit) / 2; 272 uint32_t q = elements[i]; 273 if((q & SEC_TER_DELTA_FLAG) != 0) { 274 // Find the next primary. 275 int32_t j = i + 1; 276 for(;;) { 277 if(j == limit) { break; } 278 q = elements[j]; 279 if((q & SEC_TER_DELTA_FLAG) == 0) { 280 i = j; 281 break; 282 } 283 ++j; 284 } 285 if((q & SEC_TER_DELTA_FLAG) != 0) { 286 // Find the preceding primary. 287 j = i - 1; 288 for(;;) { 289 if(j == start) { break; } 290 q = elements[j]; 291 if((q & SEC_TER_DELTA_FLAG) == 0) { 292 i = j; 293 break; 294 } 295 --j; 296 } 297 if((q & SEC_TER_DELTA_FLAG) != 0) { 298 // No primary between start and limit. 299 break; 300 } 301 } 302 } 303 if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. 304 limit = i; 305 } else { 306 start = i; 307 } 308 } 309 return start; 310} 311 312U_NAMESPACE_END 313 314#endif // !UCONFIG_NO_COLLATION 315