1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2012-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10package com.ibm.icu.text;
11
12import java.io.IOException;
13import java.nio.ByteBuffer;
14
15import com.ibm.icu.impl.Assert;
16import com.ibm.icu.impl.ICUBinary;
17import com.ibm.icu.impl.ICUData;
18import com.ibm.icu.impl.ICUResourceBundle;
19import com.ibm.icu.util.UResourceBundle;
20
21final class DictionaryData {
22    // disallow instantiation
23    private DictionaryData() { }
24
25    public static final int TRIE_TYPE_BYTES = 0;
26    public static final int TRIE_TYPE_UCHARS = 1;
27    public static final int TRIE_TYPE_MASK = 7;
28    public static final int TRIE_HAS_VALUES = 8;
29    public static final int TRANSFORM_NONE = 0;
30    public static final int TRANSFORM_TYPE_OFFSET = 0x1000000;
31    public static final int TRANSFORM_TYPE_MASK = 0x7f000000;
32    public static final int TRANSFORM_OFFSET_MASK = 0x1fffff;
33
34    public static final int IX_STRING_TRIE_OFFSET = 0;
35    public static final int IX_RESERVED1_OFFSET = 1;
36    public static final int IX_RESERVED2_OFFSET = 2;
37    public static final int IX_TOTAL_SIZE = 3;
38    public static final int IX_TRIE_TYPE = 4;
39    public static final int IX_TRANSFORM = 5;
40    public static final int IX_RESERVED6 = 6;
41    public static final int IX_RESERVED7 = 7;
42    public static final int IX_COUNT = 8;
43
44    private static final int DATA_FORMAT_ID = 0x44696374;
45
46    public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
47        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BRKITR_BASE_NAME);
48        String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
49        dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
50        ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
51        ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
52        int[] indexes = new int[IX_COUNT];
53        // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
54        for (int i = 0; i < IX_COUNT; i++) {
55            indexes[i] = bytes.getInt();
56        }
57        int offset = indexes[IX_STRING_TRIE_OFFSET];
58        Assert.assrt(offset >= (4 * IX_COUNT));
59        if (offset > (4 * IX_COUNT)) {
60            int diff = offset - (4 * IX_COUNT);
61            ICUBinary.skipBytes(bytes, diff);
62        }
63        int trieType = indexes[IX_TRIE_TYPE] & TRIE_TYPE_MASK;
64        int totalSize = indexes[IX_TOTAL_SIZE] - offset;
65        DictionaryMatcher m = null;
66        if (trieType == TRIE_TYPE_BYTES) {
67            int transform = indexes[IX_TRANSFORM];
68            byte[] data = new byte[totalSize];
69            bytes.get(data);
70            m = new BytesDictionaryMatcher(data, transform);
71        } else if (trieType == TRIE_TYPE_UCHARS) {
72            Assert.assrt(totalSize % 2 == 0);
73            String data = ICUBinary.getString(bytes, totalSize / 2, totalSize & 1);
74            m = new CharsDictionaryMatcher(data);
75        } else {
76            m = null;
77        }
78        return m;
79    }
80}
81