12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and * 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Assert; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie.Result; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass BytesDictionaryMatcher extends DictionaryMatcher { 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final byte[] characters; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final int transform; 202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesDictionaryMatcher(byte[] chars, int transform) { 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert characters = chars; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Assert.assrt((transform & DictionaryData.TRANSFORM_TYPE_MASK) == DictionaryData.TRANSFORM_TYPE_OFFSET); 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // while there is only one transform type so far, save the entire transform constant so that 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we add any others, we need only change code in transform() and the assert above rather 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // than adding a "transform type" variable 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.transform = transform; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int transform(int c) { 312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert if (c == 0x200D) { 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0xFF; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (c == 0x200C) { 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0xFE; 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta = c - (transform & DictionaryData.TRANSFORM_OFFSET_MASK); 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (delta < 0 || 0xFD < delta) { 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return delta; 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) { 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacterIterator text = UCharacterIterator.getInstance(text_); 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrie bt = new BytesTrie(characters, 0); 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = text.nextCodePoint(); 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c == UCharacterIterator.DONE) { 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result = bt.first(transform(c)); 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: should numChars count Character.charCount() ? 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int numChars = 1; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = 0; 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (;;) { 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result.hasValue()) { 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (count < limit) { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (values != null) { 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert values[count] = bt.getValue(); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert lengths[count] = numChars; 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count++; 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result == Result.FINAL_VALUE) { 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (result == Result.NO_MATCH) { 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (numChars >= maxLength) { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = text.nextCodePoint(); 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c == UCharacterIterator.DONE) { 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++numChars; 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = bt.next(transform(c)); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count_[0] = count; 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return numChars; 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getType() { 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return DictionaryData.TRIE_TYPE_BYTES; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 94