1b78f13911bfe6eda303e91ef215c87a165aae8aeAlexandre Rames/*
288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * Copyright (C) 2014 The Android Open Source Project
388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois *
488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * Licensed under the Apache License, Version 2.0 (the "License");
588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * you may not use this file except in compliance with the License.
688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * You may obtain a copy of the License at
788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois *
888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois *      http://www.apache.org/licenses/LICENSE-2.0
988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois *
1088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * Unless required by applicable law or agreed to in writing, software
1188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * distributed under the License is distributed on an "AS IS" BASIS,
1288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * See the License for the specific language governing permissions and
1488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois * limitations under the License.
1588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois */
1688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
1788c46b84df005638546de5e4e965bdcc31352f48Pierre Langloispackage com.android.inputmethod.latin.settings;
1888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
1988c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport android.content.res.Resources;
2088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
2188c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.annotations.UsedForTesting;
2288c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.keyboard.internal.MoreKeySpec;
2388c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.latin.PunctuationSuggestions;
2488c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.latin.R;
2588c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.latin.common.Constants;
2688c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport com.android.inputmethod.latin.common.StringUtils;
2719c0535d3c9c0bec6eeecce0ae704a7fd527a9d8Pierre Langlois
2888c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport java.util.Arrays;
2988c46b84df005638546de5e4e965bdcc31352f48Pierre Langloisimport java.util.Locale;
30d3832965c62a8ad461b9ea9eb0994ca6b0a3da2cAlexandre Rames
3188c46b84df005638546de5e4e965bdcc31352f48Pierre Langloispublic final class SpacingAndPunctuations {
3288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int[] mSortedSymbolsPrecededBySpace;
3388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int[] mSortedSymbolsFollowedBySpace;
3488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int[] mSortedSymbolsClusteringTogether;
3588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int[] mSortedWordConnectors;
3619c0535d3c9c0bec6eeecce0ae704a7fd527a9d8Pierre Langlois    public final int[] mSortedWordSeparators;
3788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public final PunctuationSuggestions mSuggestPuncList;
3888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int mSentenceSeparator;
3988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    private final int mAbbreviationMarker;
40d3832965c62a8ad461b9ea9eb0994ca6b0a3da2cAlexandre Rames    private final int[] mSortedSentenceTerminators;
4188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public final String mSentenceSeparatorAndSpace;
42d3832965c62a8ad461b9ea9eb0994ca6b0a3da2cAlexandre Rames    public final boolean mCurrentLanguageHasSpaces;
43d3832965c62a8ad461b9ea9eb0994ca6b0a3da2cAlexandre Rames    public final boolean mUsesAmericanTypography;
44d3832965c62a8ad461b9ea9eb0994ca6b0a3da2cAlexandre Rames    public final boolean mUsesGermanRules;
4588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
4688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public SpacingAndPunctuations(final Resources res) {
4788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        // To be able to binary search the code point. See {@link #isUsuallyPrecededBySpace(int)}.
4888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedSymbolsPrecededBySpace = StringUtils.toSortedCodePointArray(
491e85b7f2e8ad2bfb233de29405aade635ed207cePierre Langlois                res.getString(R.string.symbols_preceded_by_space));
5088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        // To be able to binary search the code point. See {@link #isUsuallyFollowedBySpace(int)}.
5188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedSymbolsFollowedBySpace = StringUtils.toSortedCodePointArray(
5288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois                res.getString(R.string.symbols_followed_by_space));
5388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedSymbolsClusteringTogether = StringUtils.toSortedCodePointArray(
54919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames                res.getString(R.string.symbols_clustering_together));
5588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        // To be able to binary search the code point. See {@link #isWordConnector(int)}.
5688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedWordConnectors = StringUtils.toSortedCodePointArray(
5788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois                res.getString(R.string.symbols_word_connectors));
5888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedWordSeparators = StringUtils.toSortedCodePointArray(
5988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois                res.getString(R.string.symbols_word_separators));
6088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSortedSentenceTerminators = StringUtils.toSortedCodePointArray(
6188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois                res.getString(R.string.symbols_sentence_terminators));
6288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSentenceSeparator = res.getInteger(R.integer.sentence_separator);
6388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mAbbreviationMarker = res.getInteger(R.integer.abbreviation_marker);
6488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mSentenceSeparatorAndSpace = new String(new int[] {
651e85b7f2e8ad2bfb233de29405aade635ed207cePierre Langlois                mSentenceSeparator, Constants.CODE_SPACE }, 0, 2);
6688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces);
674cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        final Locale locale = res.getConfiguration().locale;
684cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        // Heuristic: we use American Typography rules because it's the most common rules for all
694cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        // English variants. German rules (not "German typography") also have small gotchas.
704cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mUsesAmericanTypography = Locale.ENGLISH.getLanguage().equals(locale.getLanguage());
7188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        mUsesGermanRules = Locale.GERMAN.getLanguage().equals(locale.getLanguage());
72919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        final String[] suggestPuncsSpec = MoreKeySpec.splitKeySpecs(
73919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames                res.getString(R.string.suggested_punctuations));
74919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        mSuggestPuncList = PunctuationSuggestions.newPunctuationSuggestions(suggestPuncsSpec);
75919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    }
76919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames
77919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    @UsedForTesting
78919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    public SpacingAndPunctuations(final SpacingAndPunctuations model,
79919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames            final int[] overrideSortedWordSeparators) {
80919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        mSortedSymbolsPrecededBySpace = model.mSortedSymbolsPrecededBySpace;
81919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        mSortedSymbolsFollowedBySpace = model.mSortedSymbolsFollowedBySpace;
824cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSortedSymbolsClusteringTogether = model.mSortedSymbolsClusteringTogether;
834cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSortedWordConnectors = model.mSortedWordConnectors;
844cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSortedWordSeparators = overrideSortedWordSeparators;
854cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSortedSentenceTerminators = model.mSortedSentenceTerminators;
864cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSuggestPuncList = model.mSuggestPuncList;
874cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSentenceSeparator = model.mSentenceSeparator;
884cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mAbbreviationMarker = model.mAbbreviationMarker;
894cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mSentenceSeparatorAndSpace = model.mSentenceSeparatorAndSpace;
904cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mCurrentLanguageHasSpaces = model.mCurrentLanguageHasSpaces;
914cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mUsesAmericanTypography = model.mUsesAmericanTypography;
924cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        mUsesGermanRules = model.mUsesGermanRules;
934cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli    }
944cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli
954cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli    public boolean isWordSeparator(final int code) {
964cb13e841305b38acbd8195b1c511d59c91ec8d9Georgia Kouveli        return Arrays.binarySearch(mSortedWordSeparators, code) >= 0;
97cb6592f4b00347a84f9d7638473f0af8f1b6b1ddGeorgia Kouveli    }
9888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
99919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    public boolean isWordConnector(final int code) {
100919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        return Arrays.binarySearch(mSortedWordConnectors, code) >= 0;
101919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    }
1026a049f97861bd71c69d81f643e42308d28c5de31Alexandre Rames
103919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    public boolean isWordCodePoint(final int code) {
104919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames        return Character.isLetter(code) || isWordConnector(code);
105919e3fe28a5024c53ede42922092bbc32e89dcb8Alexandre Rames    }
1066a049f97861bd71c69d81f643e42308d28c5de31Alexandre Rames
10788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public boolean isUsuallyPrecededBySpace(final int code) {
10888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        return Arrays.binarySearch(mSortedSymbolsPrecededBySpace, code) >= 0;
109cb6592f4b00347a84f9d7638473f0af8f1b6b1ddGeorgia Kouveli    }
11088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
1111e85b7f2e8ad2bfb233de29405aade635ed207cePierre Langlois    public boolean isUsuallyFollowedBySpace(final int code) {
11288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        return Arrays.binarySearch(mSortedSymbolsFollowedBySpace, code) >= 0;
11388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    }
11488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
11588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public boolean isClusteringSymbol(final int code) {
11688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        return Arrays.binarySearch(mSortedSymbolsClusteringTogether, code) >= 0;
11788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    }
11888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
11988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    public boolean isSentenceTerminator(final int code) {
12088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        return Arrays.binarySearch(mSortedSentenceTerminators, code) >= 0;
12188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    }
12288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois
12319c0535d3c9c0bec6eeecce0ae704a7fd527a9d8Pierre Langlois    public boolean isAbbreviationMarker(final int code) {
1241d451fce2580f360ef249893135526156985a85cPierre Langlois        return code == mAbbreviationMarker;
1251d451fce2580f360ef249893135526156985a85cPierre Langlois    }
1261d451fce2580f360ef249893135526156985a85cPierre Langlois
1271d451fce2580f360ef249893135526156985a85cPierre Langlois    public boolean isSentenceSeparator(final int code) {
1281d451fce2580f360ef249893135526156985a85cPierre Langlois        return code == mSentenceSeparator;
1291d451fce2580f360ef249893135526156985a85cPierre Langlois    }
1301d451fce2580f360ef249893135526156985a85cPierre Langlois
1319a9331faeba996d6c85e6e2a6355ccfc22c6cab6Rodolph Perfetta    public String dump() {
1321d451fce2580f360ef249893135526156985a85cPierre Langlois        final StringBuilder sb = new StringBuilder();
1331d451fce2580f360ef249893135526156985a85cPierre Langlois        sb.append("mSortedSymbolsPrecededBySpace = ");
13488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + Arrays.toString(mSortedSymbolsPrecededBySpace));
13588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSortedSymbolsFollowedBySpace = ");
13688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + Arrays.toString(mSortedSymbolsFollowedBySpace));
13788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSortedWordConnectors = ");
13888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + Arrays.toString(mSortedWordConnectors));
13988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSortedWordSeparators = ");
14088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + Arrays.toString(mSortedWordSeparators));
14188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSuggestPuncList = ");
14288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mSuggestPuncList);
14388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSentenceSeparator = ");
14488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mSentenceSeparator);
14588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mSentenceSeparatorAndSpace = ");
14688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mSentenceSeparatorAndSpace);
14788c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mCurrentLanguageHasSpaces = ");
14888c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mCurrentLanguageHasSpaces);
14988c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mUsesAmericanTypography = ");
15088c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mUsesAmericanTypography);
15188c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("\n   mUsesGermanRules = ");
15288c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        sb.append("" + mUsesGermanRules);
15388c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois        return sb.toString();
15488c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois    }
15588c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois}
15688c46b84df005638546de5e4e965bdcc31352f48Pierre Langlois