1b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard/*
2b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * Copyright (C) 2014 The Android Open Source Project
3b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard *
4b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * Licensed under the Apache License, Version 2.0 (the "License");
5b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * you may not use this file except in compliance with the License.
6b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * You may obtain a copy of the License at
7b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard *
8b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard *      http://www.apache.org/licenses/LICENSE-2.0
9b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard *
10b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * Unless required by applicable law or agreed to in writing, software
11b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * distributed under the License is distributed on an "AS IS" BASIS,
12b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * See the License for the specific language governing permissions and
14b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard * limitations under the License.
15b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard */
16b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
17b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalardpackage com.android.inputmethod.latin.settings;
18b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
19b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalardimport android.content.res.Resources;
20b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
21914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalardimport com.android.inputmethod.annotations.UsedForTesting;
22e855093f5513e46f7f2da6d99e74873ac4f1eeefTadashi G. Takaokaimport com.android.inputmethod.keyboard.internal.MoreKeySpec;
23a273319c59ad24070ee5b35d72f044df496faa02Tadashi G. Takaokaimport com.android.inputmethod.latin.PunctuationSuggestions;
24b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalardimport com.android.inputmethod.latin.R;
259342484e8d573a40f470b6a593df31c602fa4076Ken Wakasaimport com.android.inputmethod.latin.common.Constants;
264beeb9253a06482299e0c67467531d30436a02fcJean Chalardimport com.android.inputmethod.latin.common.StringUtils;
27b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
28b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalardimport java.util.Arrays;
29860c3b8e8cc65e2a2b26b4da0356b5bcff6450e6Tadashi G. Takaokaimport java.util.Locale;
30b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
31b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalardpublic final class SpacingAndPunctuations {
32837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka    private final int[] mSortedSymbolsPrecededBySpace;
33837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka    private final int[] mSortedSymbolsFollowedBySpace;
3429c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard    private final int[] mSortedSymbolsClusteringTogether;
35837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka    private final int[] mSortedWordConnectors;
36c93cf1c398fbea8bde4b568dae1fbe2f8d9b4180Tadashi G. Takaoka    public final int[] mSortedWordSeparators;
37a273319c59ad24070ee5b35d72f044df496faa02Tadashi G. Takaoka    public final PunctuationSuggestions mSuggestPuncList;
38b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    private final int mSentenceSeparator;
39b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    private final int mAbbreviationMarker;
40b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    private final int[] mSortedSentenceTerminators;
41b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public final String mSentenceSeparatorAndSpace;
42b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public final boolean mCurrentLanguageHasSpaces;
43860c3b8e8cc65e2a2b26b4da0356b5bcff6450e6Tadashi G. Takaoka    public final boolean mUsesAmericanTypography;
4460afa7000f14f8f8ca890236f636d45a2b59b61eJean Chalard    public final boolean mUsesGermanRules;
45b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
46b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public SpacingAndPunctuations(final Resources res) {
47837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        // To be able to binary search the code point. See {@link #isUsuallyPrecededBySpace(int)}.
48837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        mSortedSymbolsPrecededBySpace = StringUtils.toSortedCodePointArray(
49837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka                res.getString(R.string.symbols_preceded_by_space));
50837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        // To be able to binary search the code point. See {@link #isUsuallyFollowedBySpace(int)}.
51837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        mSortedSymbolsFollowedBySpace = StringUtils.toSortedCodePointArray(
52837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka                res.getString(R.string.symbols_followed_by_space));
5329c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard        mSortedSymbolsClusteringTogether = StringUtils.toSortedCodePointArray(
5429c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard                res.getString(R.string.symbols_clustering_together));
55837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        // To be able to binary search the code point. See {@link #isWordConnector(int)}.
56837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        mSortedWordConnectors = StringUtils.toSortedCodePointArray(
57837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka                res.getString(R.string.symbols_word_connectors));
58c93cf1c398fbea8bde4b568dae1fbe2f8d9b4180Tadashi G. Takaoka        mSortedWordSeparators = StringUtils.toSortedCodePointArray(
59c93cf1c398fbea8bde4b568dae1fbe2f8d9b4180Tadashi G. Takaoka                res.getString(R.string.symbols_word_separators));
60b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard        mSortedSentenceTerminators = StringUtils.toSortedCodePointArray(
61b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard                res.getString(R.string.symbols_sentence_terminators));
62b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard        mSentenceSeparator = res.getInteger(R.integer.sentence_separator);
63b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard        mAbbreviationMarker = res.getInteger(R.integer.abbreviation_marker);
64b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard        mSentenceSeparatorAndSpace = new String(new int[] {
65b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard                mSentenceSeparator, Constants.CODE_SPACE }, 0, 2);
66b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard        mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces);
67860c3b8e8cc65e2a2b26b4da0356b5bcff6450e6Tadashi G. Takaoka        final Locale locale = res.getConfiguration().locale;
68860c3b8e8cc65e2a2b26b4da0356b5bcff6450e6Tadashi G. Takaoka        // Heuristic: we use American Typography rules because it's the most common rules for all
6960afa7000f14f8f8ca890236f636d45a2b59b61eJean Chalard        // English variants. German rules (not "German typography") also have small gotchas.
70860c3b8e8cc65e2a2b26b4da0356b5bcff6450e6Tadashi G. Takaoka        mUsesAmericanTypography = Locale.ENGLISH.getLanguage().equals(locale.getLanguage());
7160afa7000f14f8f8ca890236f636d45a2b59b61eJean Chalard        mUsesGermanRules = Locale.GERMAN.getLanguage().equals(locale.getLanguage());
72a273319c59ad24070ee5b35d72f044df496faa02Tadashi G. Takaoka        final String[] suggestPuncsSpec = MoreKeySpec.splitKeySpecs(
7344f144ab3ae6824a64d309add591cfc469451480Tadashi G. Takaoka                res.getString(R.string.suggested_punctuations));
74a273319c59ad24070ee5b35d72f044df496faa02Tadashi G. Takaoka        mSuggestPuncList = PunctuationSuggestions.newPunctuationSuggestions(suggestPuncsSpec);
75b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
76b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
77914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard    @UsedForTesting
78914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard    public SpacingAndPunctuations(final SpacingAndPunctuations model,
79914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard            final int[] overrideSortedWordSeparators) {
80914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSortedSymbolsPrecededBySpace = model.mSortedSymbolsPrecededBySpace;
81914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSortedSymbolsFollowedBySpace = model.mSortedSymbolsFollowedBySpace;
82914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSortedSymbolsClusteringTogether = model.mSortedSymbolsClusteringTogether;
83914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSortedWordConnectors = model.mSortedWordConnectors;
84914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSortedWordSeparators = overrideSortedWordSeparators;
857140035932ec98ce003b6b7a5100a0aca501505fJean Chalard        mSortedSentenceTerminators = model.mSortedSentenceTerminators;
86914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSuggestPuncList = model.mSuggestPuncList;
87914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSentenceSeparator = model.mSentenceSeparator;
887140035932ec98ce003b6b7a5100a0aca501505fJean Chalard        mAbbreviationMarker = model.mAbbreviationMarker;
89914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mSentenceSeparatorAndSpace = model.mSentenceSeparatorAndSpace;
90914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mCurrentLanguageHasSpaces = model.mCurrentLanguageHasSpaces;
91914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mUsesAmericanTypography = model.mUsesAmericanTypography;
92914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard        mUsesGermanRules = model.mUsesGermanRules;
93914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard    }
94914078fd9198aeb3d7ffa034562321d688d588f7Jean Chalard
95b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isWordSeparator(final int code) {
96c93cf1c398fbea8bde4b568dae1fbe2f8d9b4180Tadashi G. Takaoka        return Arrays.binarySearch(mSortedWordSeparators, code) >= 0;
97b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
98b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
99b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isWordConnector(final int code) {
100837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        return Arrays.binarySearch(mSortedWordConnectors, code) >= 0;
101b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
102b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
103b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isWordCodePoint(final int code) {
104b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard        return Character.isLetter(code) || isWordConnector(code);
105b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
106b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
107b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isUsuallyPrecededBySpace(final int code) {
108837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        return Arrays.binarySearch(mSortedSymbolsPrecededBySpace, code) >= 0;
109b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
110b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
111b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isUsuallyFollowedBySpace(final int code) {
112837cdd738b7ddbeac04b15230f01e44d247bd50aTadashi G. Takaoka        return Arrays.binarySearch(mSortedSymbolsFollowedBySpace, code) >= 0;
113b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
114b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard
11529c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard    public boolean isClusteringSymbol(final int code) {
11629c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard        return Arrays.binarySearch(mSortedSymbolsClusteringTogether, code) >= 0;
11729c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard    }
11829c00ff53822658268d91a0c42afb7db540c15f9Jean Chalard
119b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    public boolean isSentenceTerminator(final int code) {
120b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard        return Arrays.binarySearch(mSortedSentenceTerminators, code) >= 0;
121b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    }
122b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard
123b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    public boolean isAbbreviationMarker(final int code) {
124b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard        return code == mAbbreviationMarker;
125b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard    }
126b526a894db0f475596abbed56c4b311b5e4904dcJean Chalard
127b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    public boolean isSentenceSeparator(final int code) {
128b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard        return code == mSentenceSeparator;
129b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard    }
130367a35d377076e387a542560f033434959d72d1dJean Chalard
131367a35d377076e387a542560f033434959d72d1dJean Chalard    public String dump() {
132367a35d377076e387a542560f033434959d72d1dJean Chalard        final StringBuilder sb = new StringBuilder();
133367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("mSortedSymbolsPrecededBySpace = ");
134367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + Arrays.toString(mSortedSymbolsPrecededBySpace));
135367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSortedSymbolsFollowedBySpace = ");
136367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + Arrays.toString(mSortedSymbolsFollowedBySpace));
137367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSortedWordConnectors = ");
138367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + Arrays.toString(mSortedWordConnectors));
139367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSortedWordSeparators = ");
140367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + Arrays.toString(mSortedWordSeparators));
141367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSuggestPuncList = ");
142367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mSuggestPuncList);
143367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSentenceSeparator = ");
144367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mSentenceSeparator);
145367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mSentenceSeparatorAndSpace = ");
146367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mSentenceSeparatorAndSpace);
147367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mCurrentLanguageHasSpaces = ");
148367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mCurrentLanguageHasSpaces);
149367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mUsesAmericanTypography = ");
150367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mUsesAmericanTypography);
151367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("\n   mUsesGermanRules = ");
152367a35d377076e387a542560f033434959d72d1dJean Chalard        sb.append("" + mUsesGermanRules);
153367a35d377076e387a542560f033434959d72d1dJean Chalard        return sb.toString();
154367a35d377076e387a542560f033434959d72d1dJean Chalard    }
155b90fa0fb2ce195cfaec878dea9702261290176c2Jean Chalard}
156