12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and         *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Assert;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie.Result;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass BytesDictionaryMatcher extends DictionaryMatcher {
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final byte[] characters;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final int transform;
202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public BytesDictionaryMatcher(byte[] chars, int transform) {
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        characters = chars;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Assert.assrt((transform & DictionaryData.TRANSFORM_TYPE_MASK) == DictionaryData.TRANSFORM_TYPE_OFFSET);
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // while there is only one transform type so far, save the entire transform constant so that
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we add any others, we need only change code in transform() and the assert above rather
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // than adding a "transform type" variable
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.transform = transform;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
292d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int transform(int c) {
312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        if (c == 0x200D) {
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0xFF;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (c == 0x200C) {
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0xFE;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int delta = c - (transform & DictionaryData.TRANSFORM_OFFSET_MASK);
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (delta < 0 || 0xFD < delta) {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -1;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return delta;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UCharacterIterator text = UCharacterIterator.getInstance(text_);
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BytesTrie bt = new BytesTrie(characters, 0);
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c = text.nextCodePoint();
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (c == UCharacterIterator.DONE) {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Result result = bt.first(transform(c));
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: should numChars count Character.charCount() ?
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int numChars = 1;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = 0;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (result.hasValue()) {
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (count < limit) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (values != null) {
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        values[count] = bt.getValue();
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    lengths[count] = numChars;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    count++;
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (result == Result.FINAL_VALUE) {
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (result == Result.NO_MATCH) {
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (numChars >= maxLength) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = text.nextCodePoint();
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (c == UCharacterIterator.DONE) {
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ++numChars;
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result = bt.next(transform(c));
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count_[0] = count;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return numChars;
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
872d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getType() {
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return DictionaryData.TRIE_TYPE_BYTES;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
94