1/* 2 ****************************************************************************** 3 * Copyright (C) 1996-2010, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8#include "unicode/utypes.h" 9 10#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 11 12#include "unicode/unistr.h" 13#include "unicode/putil.h" 14#include "unicode/usearch.h" 15 16#include "cmemory.h" 17#include "unicode/coll.h" 18#include "unicode/tblcoll.h" 19#include "unicode/coleitr.h" 20#include "unicode/ucoleitr.h" 21 22#include "unicode/regex.h" // TODO: make conditional on regexp being built. 23 24#include "unicode/uniset.h" 25#include "unicode/uset.h" 26#include "unicode/ustring.h" 27#include "hash.h" 28#include "uhash.h" 29#include "ucol_imp.h" 30#include "normalizer2impl.h" 31 32#include "unicode/colldata.h" 33#include "unicode/bmsearch.h" 34 35U_NAMESPACE_BEGIN 36 37#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) 38#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 39#define DELETE_ARRAY(array) uprv_free((void *) (array)) 40 41 42struct CEI 43{ 44 uint32_t order; 45 int32_t lowOffset; 46 int32_t highOffset; 47}; 48 49class Target : public UMemory 50{ 51public: 52 Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status); 53 ~Target(); 54 55 void setTargetString(const UnicodeString *target); 56 57 const CEI *nextCE(int32_t offset); 58 const CEI *prevCE(int32_t offset); 59 60 int32_t stringLength(); 61 UChar charAt(int32_t offset); 62 63 UBool isBreakBoundary(int32_t offset); 64 int32_t nextBreakBoundary(int32_t offset); 65 int32_t nextSafeBoundary(int32_t offset); 66 67 UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end); 68 69 void setOffset(int32_t offset); 70 void setLast(int32_t last); 71 int32_t getOffset(); 72 73private: 74 CEI *ceb; 75 int32_t bufferSize; 76 int32_t bufferMin; 77 int32_t bufferMax; 78 79 uint32_t strengthMask; 80 UCollationStrength strength; 81 uint32_t variableTop; 82 UBool toShift; 83 UCollator *coll; 84 const Normalizer2 &nfd; 85 86 const UnicodeString *targetString; 87 const UChar *targetBuffer; 88 int32_t targetLength; 89 90 UCollationElements *elements; 91 UBreakIterator *charBreakIterator; 92}; 93 94Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) 95 : bufferSize(0), bufferMin(0), bufferMax(0), 96 strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), 97 nfd(*Normalizer2Factory::getNFDInstance(status)), 98 targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) 99{ 100 strength = ucol_getStrength(coll); 101 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; 102 variableTop = ucol_getVariableTop(coll, &status); 103 104 // find the largest expansion 105 uint8_t maxExpansion = 0; 106 for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { 107 if (*expansion > maxExpansion) { 108 maxExpansion = *expansion; 109 } 110 } 111 112 // room for an extra character on each end, plus 4 for safety 113 bufferSize = patternLength + (2 * maxExpansion) + 4; 114 115 ceb = NEW_ARRAY(CEI, bufferSize); 116 117 if (ceb == NULL) { 118 status = U_MEMORY_ALLOCATION_ERROR; 119 return; 120 } 121 122 if (target != NULL) { 123 setTargetString(target); 124 } 125 126 switch (strength) 127 { 128 default: 129 strengthMask |= UCOL_TERTIARYORDERMASK; 130 /* fall through */ 131 132 case UCOL_SECONDARY: 133 strengthMask |= UCOL_SECONDARYORDERMASK; 134 /* fall through */ 135 136 case UCOL_PRIMARY: 137 strengthMask |= UCOL_PRIMARYORDERMASK; 138 } 139} 140 141Target::~Target() 142{ 143 ubrk_close(charBreakIterator); 144 ucol_closeElements(elements); 145 146 DELETE_ARRAY(ceb); 147} 148 149void Target::setTargetString(const UnicodeString *target) 150{ 151 if (charBreakIterator != NULL) { 152 ubrk_close(charBreakIterator); 153 ucol_closeElements(elements); 154 } 155 156 targetString = target; 157 158 if (targetString != NULL) { 159 UErrorCode status = U_ZERO_ERROR; 160 161 targetBuffer = targetString->getBuffer(); 162 targetLength = targetString->length(); 163 164 elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); 165 ucol_forceHanImplicit(elements, &status); 166 167 charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), 168 targetBuffer, targetLength, &status); 169 } else { 170 targetBuffer = NULL; 171 targetLength = 0; 172 } 173} 174 175const CEI *Target::nextCE(int32_t offset) 176{ 177 UErrorCode status = U_ZERO_ERROR; 178 int32_t low = -1, high = -1; 179 uint32_t order; 180 UBool cont = FALSE; 181 182 if (offset >= bufferMin && offset < bufferMax) { 183 return &ceb[offset]; 184 } 185 186 if (bufferMax >= bufferSize || offset != bufferMax) { 187 return NULL; 188 } 189 190 do { 191 low = ucol_getOffset(elements); 192 order = ucol_next(elements, &status); 193 high = ucol_getOffset(elements); 194 195 if (order == (uint32_t)UCOL_NULLORDER) { 196 //high = low = -1; 197 break; 198 } 199 200 cont = isContinuation(order); 201 order &= strengthMask; 202 203 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 204 if (strength >= UCOL_QUATERNARY) { 205 order &= UCOL_PRIMARYORDERMASK; 206 } else { 207 order = UCOL_IGNORABLE; 208 } 209 } 210 } while (order == UCOL_IGNORABLE); 211 212 if (cont) { 213 order |= UCOL_CONTINUATION_MARKER; 214 } 215 216 ceb[offset].order = order; 217 ceb[offset].lowOffset = low; 218 ceb[offset].highOffset = high; 219 220 bufferMax += 1; 221 222 return &ceb[offset]; 223} 224 225const CEI *Target::prevCE(int32_t offset) 226{ 227 UErrorCode status = U_ZERO_ERROR; 228 int32_t low = -1, high = -1; 229 uint32_t order; 230 UBool cont = FALSE; 231 232 if (offset >= bufferMin && offset < bufferMax) { 233 return &ceb[offset]; 234 } 235 236 if (bufferMax >= bufferSize || offset != bufferMax) { 237 return NULL; 238 } 239 240 do { 241 high = ucol_getOffset(elements); 242 order = ucol_previous(elements, &status); 243 low = ucol_getOffset(elements); 244 245 if (order == (uint32_t)UCOL_NULLORDER) { 246 break; 247 } 248 249 cont = isContinuation(order); 250 order &= strengthMask; 251 252 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 253 if (strength >= UCOL_QUATERNARY) { 254 order &= UCOL_PRIMARYORDERMASK; 255 } else { 256 order = UCOL_IGNORABLE; 257 } 258 } 259 } while (order == UCOL_IGNORABLE); 260 261 bufferMax += 1; 262 263 if (cont) { 264 order |= UCOL_CONTINUATION_MARKER; 265 } 266 267 ceb[offset].order = order; 268 ceb[offset].lowOffset = low; 269 ceb[offset].highOffset = high; 270 271 return &ceb[offset]; 272} 273 274int32_t Target::stringLength() 275{ 276 if (targetString != NULL) { 277 return targetLength; 278 } 279 280 return 0; 281} 282 283UChar Target::charAt(int32_t offset) 284{ 285 if (targetString != NULL) { 286 return targetBuffer[offset]; 287 } 288 289 return 0x0000; 290} 291 292void Target::setOffset(int32_t offset) 293{ 294 UErrorCode status = U_ZERO_ERROR; 295 296 bufferMin = 0; 297 bufferMax = 0; 298 299 ucol_setOffset(elements, offset, &status); 300} 301 302void Target::setLast(int32_t last) 303{ 304 UErrorCode status = U_ZERO_ERROR; 305 306 bufferMin = 0; 307 bufferMax = 1; 308 309 ceb[0].order = UCOL_NULLORDER; 310 ceb[0].lowOffset = last; 311 ceb[0].highOffset = last; 312 313 ucol_setOffset(elements, last, &status); 314} 315 316int32_t Target::getOffset() 317{ 318 return ucol_getOffset(elements); 319} 320 321UBool Target::isBreakBoundary(int32_t offset) 322{ 323 return ubrk_isBoundary(charBreakIterator, offset); 324} 325 326int32_t Target::nextBreakBoundary(int32_t offset) 327{ 328 return ubrk_following(charBreakIterator, offset); 329} 330 331int32_t Target::nextSafeBoundary(int32_t offset) 332{ 333 while (offset < targetLength) { 334 //UChar ch = charAt(offset); 335 UChar ch = targetBuffer[offset]; 336 337 if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) { 338 return offset; 339 } 340 341 offset += 1; 342 } 343 344 return targetLength; 345} 346 347UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) 348{ 349 if (strength < UCOL_IDENTICAL) { 350 return TRUE; 351 } 352 353 // Note: We could use Normalizer::compare() or similar, but for short strings 354 // which may not be in FCD it might be faster to just NFD them. 355 UErrorCode status = U_ZERO_ERROR; 356 UnicodeString t2, p2; 357 nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status); 358 nfd.normalize(pattern, p2, status); 359 // return FALSE if NFD failed 360 return U_SUCCESS(status) && t2 == p2; 361} 362 363#define HASH_TABLE_SIZE 257 364 365class BadCharacterTable : public UMemory 366{ 367public: 368 BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status); 369 ~BadCharacterTable(); 370 371 int32_t operator[](uint32_t ce) const; 372 int32_t getMaxSkip() const; 373 int32_t minLengthInChars(int32_t index); 374 375private: 376 static int32_t hash(uint32_t ce); 377 378 int32_t maxSkip; 379 int32_t badCharacterTable[HASH_TABLE_SIZE]; 380 381 int32_t *minLengthCache; 382}; 383 384BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status) 385 : minLengthCache(NULL) 386{ 387 int32_t plen = patternCEs.size(); 388 389 // **** need a better way to deal with this **** 390 if (U_FAILURE(status) || plen == 0) { 391 return; 392 } 393 394 int32_t *history = NEW_ARRAY(int32_t, plen); 395 396 if (history == NULL) { 397 status = U_MEMORY_ALLOCATION_ERROR; 398 return; 399 } 400 401 for (int32_t i = 0; i < plen; i += 1) { 402 history[i] = -1; 403 } 404 405 minLengthCache = NEW_ARRAY(int32_t, plen + 1); 406 407 if (minLengthCache == NULL) { 408 DELETE_ARRAY(history); 409 status = U_MEMORY_ALLOCATION_ERROR; 410 return; 411 } 412 413 maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history); 414 415 for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) { 416 badCharacterTable[j] = maxSkip; 417 } 418 419 for(int32_t p = 1; p < plen; p += 1) { 420 minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history); 421 422 // Make sure this entry is not bigger than the previous one. 423 // Otherwise, we might skip too far in some cases. 424 if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) { 425 minLengthCache[p] = minLengthCache[p - 1]; 426 } 427 } 428 429 minLengthCache[plen] = 0; 430 431 for(int32_t p = 0; p < plen - 1; p += 1) { 432 badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1]; 433 } 434 435 DELETE_ARRAY(history); 436} 437 438BadCharacterTable::~BadCharacterTable() 439{ 440 DELETE_ARRAY(minLengthCache); 441} 442 443int32_t BadCharacterTable::operator[](uint32_t ce) const 444{ 445 return badCharacterTable[hash(ce)]; 446} 447 448int32_t BadCharacterTable::getMaxSkip() const 449{ 450 return maxSkip; 451} 452 453int32_t BadCharacterTable::minLengthInChars(int32_t index) 454{ 455 return minLengthCache[index]; 456} 457 458int32_t BadCharacterTable::hash(uint32_t ce) 459{ 460 return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE; 461} 462 463class GoodSuffixTable : public UMemory 464{ 465public: 466 GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status); 467 ~GoodSuffixTable(); 468 469 int32_t operator[](int32_t offset) const; 470 471private: 472 int32_t *goodSuffixTable; 473}; 474 475GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status) 476 : goodSuffixTable(NULL) 477{ 478 int32_t patlen = patternCEs.size(); 479 480 // **** need a better way to deal with this **** 481 if (U_FAILURE(status) || patlen <= 0) { 482 return; 483 } 484 485 int32_t *suff = NEW_ARRAY(int32_t, patlen); 486 int32_t start = patlen - 1, end = - 1; 487 int32_t maxSkip = badCharacterTable.getMaxSkip(); 488 489 if (suff == NULL) { 490 status = U_MEMORY_ALLOCATION_ERROR; 491 return; 492 } 493 494 // initialze suff 495 suff[patlen - 1] = patlen; 496 497 for (int32_t i = patlen - 2; i >= 0; i -= 1) { 498 // (i > start) means we're inside the last suffix match we found 499 // ((patlen - 1) - end) is how far the end of that match is from end of pattern 500 // (i - start) is how far we are from start of that match 501 // (i + (patlen - 1) - end) is index of same character at end of pattern 502 // so if any suffix match at that character doesn't extend beyond the last match, 503 // it's the suffix for this character as well 504 if (i > start && suff[i + patlen - 1 - end] < i - start) { 505 suff[i] = suff[i + patlen - 1 - end]; 506 } else { 507 start = end = i; 508 509 int32_t s = patlen; 510 511 while (start >= 0 && patternCEs[start] == patternCEs[--s]) { 512 start -= 1; 513 } 514 515 suff[i] = end - start; 516 } 517 } 518 519 // now build goodSuffixTable 520 goodSuffixTable = NEW_ARRAY(int32_t, patlen); 521 522 if (goodSuffixTable == NULL) { 523 DELETE_ARRAY(suff); 524 status = U_MEMORY_ALLOCATION_ERROR; 525 return; 526 } 527 528 529 // initialize entries to minLengthInChars of the pattern 530 for (int32_t i = 0; i < patlen; i += 1) { 531 goodSuffixTable[i] = maxSkip; 532 } 533 534 int32_t prefix = 0; 535 536 for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) { 537 if (suff[i] == i + 1) { 538 // this matching suffix is a prefix of the pattern 539 int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1); 540 541 // for any mis-match before this suffix, we should skip 542 // so that the front of the pattern (i.e. the prefix) 543 // lines up with the front of the suffix. 544 // (patlen - 1 - i) is the start of the suffix 545 while (prefix < patlen - 1 - i) { 546 // value of maxSkip means never set... 547 if (goodSuffixTable[prefix] == maxSkip) { 548 goodSuffixTable[prefix] = prefixSkip; 549 } 550 551 prefix += 1; 552 } 553 } 554 } 555 556 for (int32_t i = 0; i < patlen - 1; i += 1) { 557 goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1); 558 } 559 560 DELETE_ARRAY(suff); 561} 562 563GoodSuffixTable::~GoodSuffixTable() 564{ 565 DELETE_ARRAY(goodSuffixTable); 566} 567 568int32_t GoodSuffixTable::operator[](int32_t offset) const 569{ 570 return goodSuffixTable[offset]; 571} 572 573UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch) 574 575 576UBool BoyerMooreSearch::empty() 577{ 578 return patCEs->size() <= 0; 579} 580 581CollData *BoyerMooreSearch::getData() 582{ 583 return data; 584} 585 586CEList *BoyerMooreSearch::getPatternCEs() 587{ 588 return patCEs; 589} 590 591BadCharacterTable *BoyerMooreSearch::getBadCharacterTable() 592{ 593 return badCharacterTable; 594} 595 596GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable() 597{ 598 return goodSuffixTable; 599} 600 601BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, 602 UErrorCode &status) 603 : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL) 604{ 605 606 if (U_FAILURE(status)) { 607 return; 608 } 609 610 UCollator *collator = data->getCollator(); 611 612 patCEs = new CEList(collator, patternString, status); 613 614 if (patCEs == NULL || U_FAILURE(status)) { 615 return; 616 } 617 618 badCharacterTable = new BadCharacterTable(*patCEs, data, status); 619 620 if (badCharacterTable == NULL || U_FAILURE(status)) { 621 return; 622 } 623 624 goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status); 625 626 if (targetString != NULL) { 627 target = new Target(collator, targetString, patCEs->size(), status); 628 } 629} 630 631BoyerMooreSearch::~BoyerMooreSearch() 632{ 633 delete target; 634 delete goodSuffixTable; 635 delete badCharacterTable; 636 delete patCEs; 637} 638 639void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status) 640{ 641 if (U_FAILURE(status)) { 642 return; 643 } 644 645 if (target == NULL) { 646 target = new Target(data->getCollator(), targetString, patCEs->size(), status); 647 } else { 648 target->setTargetString(targetString); 649 } 650} 651 652// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **** 653/* 654 * TODO: 655 * * deal with trailing (and leading?) ignorables. 656 * * Adding BoyerMooreSearch object slowed it down. How can we speed it up? 657 */ 658UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end) 659{ 660 /*UCollator *coll =*/ data->getCollator(); 661 int32_t plen = patCEs->size(); 662 int32_t tlen = target->stringLength(); 663 int32_t maxSkip = badCharacterTable->getMaxSkip(); 664 int32_t tOffset = offset + maxSkip; 665 666 if (plen <= 0) { 667 // Searching for a zero length pattern always fails. 668 start = end = -1; 669 return FALSE; 670 } 671 672 while (tOffset <= tlen) { 673 int32_t pIndex = plen - 1; 674 int32_t tIndex = 0; 675 int32_t lIndex = 0; 676 677 if (tOffset < tlen) { 678 // **** we really want to skip ahead enough to **** 679 // **** be sure we get at least 1 non-ignorable **** 680 // **** CE after the end of the pattern. **** 681 int32_t next = target->nextSafeBoundary(tOffset + 1); 682 683 target->setOffset(next); 684 685 for (lIndex = 0; ; lIndex += 1) { 686 const CEI *cei = target->prevCE(lIndex); 687 int32_t low = cei->lowOffset; 688 int32_t high = cei->highOffset; 689 690 if (high == 0 || (low < high && low <= tOffset)) { 691 if (low < tOffset) { 692 while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) { 693 lIndex -= 1; 694 } 695 696 if (high > tOffset) { 697 tOffset = high; 698 } 699 } 700 701 break; 702 } 703 } 704 } else { 705 target->setLast(tOffset); 706 lIndex = 0; 707 } 708 709 tIndex = ++lIndex; 710 711 // Iterate backward until we hit the beginning of the pattern 712 while (pIndex >= 0) { 713 uint32_t pce = (*patCEs)[pIndex]; 714 const CEI *tcei = target->prevCE(tIndex++); 715 716 717 if (tcei->order != pce) { 718 // There is a mismatch at this position. Decide how far 719 // over to shift the pattern, then try again. 720 721 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex]; 722#ifdef EXTRA_CAUTIOUS 723 int32_t old = tOffset; 724#endif 725 726 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1); 727 728 if (gsOffset > tOffset) { 729 tOffset = gsOffset; 730 } 731 732#ifdef EXTRA_CAUTIOUS 733 // Make sure we don't skip backwards... 734 if (tOffset <= old) { 735 tOffset = old + 1; 736 } 737#endif 738 739 break; 740 } 741 742 pIndex -= 1; 743 } 744 745 if (pIndex < 0) { 746 // We made it back to the beginning of the pattern, 747 // which means we matched it all. Return the location. 748 const CEI firstCEI = *target->prevCE(tIndex - 1); 749 const CEI lastCEI = *target->prevCE(lIndex); 750 int32_t mStart = firstCEI.lowOffset; 751 int32_t minLimit = lastCEI.lowOffset; 752 int32_t maxLimit = lastCEI.highOffset; 753 int32_t mLimit; 754 UBool found = TRUE; 755 756 target->setOffset(/*tOffset*/maxLimit); 757 758 const CEI nextCEI = *target->nextCE(0); 759 760 if (nextCEI.lowOffset > maxLimit) { 761 maxLimit = nextCEI.lowOffset; 762 } 763 764 if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) { 765 found = FALSE; 766 } 767 768 if (! target->isBreakBoundary(mStart)) { 769 found = FALSE; 770 } 771 772 if (firstCEI.lowOffset == firstCEI.highOffset) { 773 found = FALSE; 774 } 775 776 mLimit = maxLimit; 777 if (minLimit < maxLimit) { 778 int32_t nbb = target->nextBreakBoundary(minLimit); 779 780 if (nbb >= lastCEI.highOffset) { 781 mLimit = nbb; 782 } 783 } 784 785 if (mLimit > maxLimit) { 786 found = FALSE; 787 } 788 789 if (! target->isBreakBoundary(mLimit)) { 790 found = FALSE; 791 } 792 793 if (! target->isIdentical(pattern, mStart, mLimit)) { 794 found = FALSE; 795 } 796 797 if (found) { 798 start = mStart; 799 end = mLimit; 800 801 return TRUE; 802 } 803 804 tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip? 805 } 806 // Otherwise, we're here because of a mismatch, so keep going.... 807 } 808 809 // no match 810 start = -1; 811 end = -1; 812 return FALSE; 813} 814 815U_NAMESPACE_END 816 817#endif // #if !UCONFIG_NO_COLLATION 818