1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.event;
18
19import android.text.TextUtils;
20import android.util.SparseIntArray;
21
22import com.android.inputmethod.latin.common.Constants;
23
24import java.text.Normalizer;
25import java.util.ArrayList;
26
27import javax.annotation.Nonnull;
28
29/**
30 * A combiner that handles dead keys.
31 */
32public class DeadKeyCombiner implements Combiner {
33
34    private static class Data {
35        // This class data taken from KeyCharacterMap.java.
36
37        /* Characters used to display placeholders for dead keys. */
38        private static final int ACCENT_ACUTE = '\u00B4';
39        private static final int ACCENT_BREVE = '\u02D8';
40        private static final int ACCENT_CARON = '\u02C7';
41        private static final int ACCENT_CEDILLA = '\u00B8';
42        private static final int ACCENT_CIRCUMFLEX = '\u02C6';
43        private static final int ACCENT_COMMA_ABOVE = '\u1FBD';
44        private static final int ACCENT_COMMA_ABOVE_RIGHT = '\u02BC';
45        private static final int ACCENT_DOT_ABOVE = '\u02D9';
46        private static final int ACCENT_DOT_BELOW = Constants.CODE_PERIOD; // approximate
47        private static final int ACCENT_DOUBLE_ACUTE = '\u02DD';
48        private static final int ACCENT_GRAVE = '\u02CB';
49        private static final int ACCENT_HOOK_ABOVE = '\u02C0';
50        private static final int ACCENT_HORN = Constants.CODE_SINGLE_QUOTE; // approximate
51        private static final int ACCENT_MACRON = '\u00AF';
52        private static final int ACCENT_MACRON_BELOW = '\u02CD';
53        private static final int ACCENT_OGONEK = '\u02DB';
54        private static final int ACCENT_REVERSED_COMMA_ABOVE = '\u02BD';
55        private static final int ACCENT_RING_ABOVE = '\u02DA';
56        private static final int ACCENT_STROKE = Constants.CODE_DASH; // approximate
57        private static final int ACCENT_TILDE = '\u02DC';
58        private static final int ACCENT_TURNED_COMMA_ABOVE = '\u02BB';
59        private static final int ACCENT_UMLAUT = '\u00A8';
60        private static final int ACCENT_VERTICAL_LINE_ABOVE = '\u02C8';
61        private static final int ACCENT_VERTICAL_LINE_BELOW = '\u02CC';
62
63        /* Legacy dead key display characters used in previous versions of the API (before L)
64         * We still support these characters by mapping them to their non-legacy version. */
65        private static final int ACCENT_GRAVE_LEGACY = Constants.CODE_GRAVE_ACCENT;
66        private static final int ACCENT_CIRCUMFLEX_LEGACY = Constants.CODE_CIRCUMFLEX_ACCENT;
67        private static final int ACCENT_TILDE_LEGACY = Constants.CODE_TILDE;
68
69        /**
70         * Maps Unicode combining diacritical to display-form dead key.
71         */
72        static final SparseIntArray sCombiningToAccent = new SparseIntArray();
73        static final SparseIntArray sAccentToCombining = new SparseIntArray();
74        static {
75            // U+0300: COMBINING GRAVE ACCENT
76            addCombining('\u0300', ACCENT_GRAVE);
77            // U+0301: COMBINING ACUTE ACCENT
78            addCombining('\u0301', ACCENT_ACUTE);
79            // U+0302: COMBINING CIRCUMFLEX ACCENT
80            addCombining('\u0302', ACCENT_CIRCUMFLEX);
81            // U+0303: COMBINING TILDE
82            addCombining('\u0303', ACCENT_TILDE);
83            // U+0304: COMBINING MACRON
84            addCombining('\u0304', ACCENT_MACRON);
85            // U+0306: COMBINING BREVE
86            addCombining('\u0306', ACCENT_BREVE);
87            // U+0307: COMBINING DOT ABOVE
88            addCombining('\u0307', ACCENT_DOT_ABOVE);
89            // U+0308: COMBINING DIAERESIS
90            addCombining('\u0308', ACCENT_UMLAUT);
91            // U+0309: COMBINING HOOK ABOVE
92            addCombining('\u0309', ACCENT_HOOK_ABOVE);
93            // U+030A: COMBINING RING ABOVE
94            addCombining('\u030A', ACCENT_RING_ABOVE);
95            // U+030B: COMBINING DOUBLE ACUTE ACCENT
96            addCombining('\u030B', ACCENT_DOUBLE_ACUTE);
97            // U+030C: COMBINING CARON
98            addCombining('\u030C', ACCENT_CARON);
99            // U+030D: COMBINING VERTICAL LINE ABOVE
100            addCombining('\u030D', ACCENT_VERTICAL_LINE_ABOVE);
101            // U+030E: COMBINING DOUBLE VERTICAL LINE ABOVE
102            //addCombining('\u030E', ACCENT_DOUBLE_VERTICAL_LINE_ABOVE);
103            // U+030F: COMBINING DOUBLE GRAVE ACCENT
104            //addCombining('\u030F', ACCENT_DOUBLE_GRAVE);
105            // U+0310: COMBINING CANDRABINDU
106            //addCombining('\u0310', ACCENT_CANDRABINDU);
107            // U+0311: COMBINING INVERTED BREVE
108            //addCombining('\u0311', ACCENT_INVERTED_BREVE);
109            // U+0312: COMBINING TURNED COMMA ABOVE
110            addCombining('\u0312', ACCENT_TURNED_COMMA_ABOVE);
111            // U+0313: COMBINING COMMA ABOVE
112            addCombining('\u0313', ACCENT_COMMA_ABOVE);
113            // U+0314: COMBINING REVERSED COMMA ABOVE
114            addCombining('\u0314', ACCENT_REVERSED_COMMA_ABOVE);
115            // U+0315: COMBINING COMMA ABOVE RIGHT
116            addCombining('\u0315', ACCENT_COMMA_ABOVE_RIGHT);
117            // U+031B: COMBINING HORN
118            addCombining('\u031B', ACCENT_HORN);
119            // U+0323: COMBINING DOT BELOW
120            addCombining('\u0323', ACCENT_DOT_BELOW);
121            // U+0326: COMBINING COMMA BELOW
122            //addCombining('\u0326', ACCENT_COMMA_BELOW);
123            // U+0327: COMBINING CEDILLA
124            addCombining('\u0327', ACCENT_CEDILLA);
125            // U+0328: COMBINING OGONEK
126            addCombining('\u0328', ACCENT_OGONEK);
127            // U+0329: COMBINING VERTICAL LINE BELOW
128            addCombining('\u0329', ACCENT_VERTICAL_LINE_BELOW);
129            // U+0331: COMBINING MACRON BELOW
130            addCombining('\u0331', ACCENT_MACRON_BELOW);
131            // U+0335: COMBINING SHORT STROKE OVERLAY
132            addCombining('\u0335', ACCENT_STROKE);
133            // U+0342: COMBINING GREEK PERISPOMENI
134            //addCombining('\u0342', ACCENT_PERISPOMENI);
135            // U+0344: COMBINING GREEK DIALYTIKA TONOS
136            //addCombining('\u0344', ACCENT_DIALYTIKA_TONOS);
137            // U+0345: COMBINING GREEK YPOGEGRAMMENI
138            //addCombining('\u0345', ACCENT_YPOGEGRAMMENI);
139
140            // One-way mappings to equivalent preferred accents.
141            // U+0340: COMBINING GRAVE TONE MARK
142            sCombiningToAccent.append('\u0340', ACCENT_GRAVE);
143            // U+0341: COMBINING ACUTE TONE MARK
144            sCombiningToAccent.append('\u0341', ACCENT_ACUTE);
145            // U+0343: COMBINING GREEK KORONIS
146            sCombiningToAccent.append('\u0343', ACCENT_COMMA_ABOVE);
147
148            // One-way legacy mappings to preserve compatibility with older applications.
149            // U+0300: COMBINING GRAVE ACCENT
150            sAccentToCombining.append(ACCENT_GRAVE_LEGACY, '\u0300');
151            // U+0302: COMBINING CIRCUMFLEX ACCENT
152            sAccentToCombining.append(ACCENT_CIRCUMFLEX_LEGACY, '\u0302');
153            // U+0303: COMBINING TILDE
154            sAccentToCombining.append(ACCENT_TILDE_LEGACY, '\u0303');
155        }
156
157        private static void addCombining(int combining, int accent) {
158            sCombiningToAccent.append(combining, accent);
159            sAccentToCombining.append(accent, combining);
160        }
161
162        // Caution! This may only contain chars, not supplementary code points. It's unlikely
163        // it will ever need to, but if it does we'll have to change this
164        private static final SparseIntArray sNonstandardDeadCombinations = new SparseIntArray();
165        static {
166            // Non-standard decompositions.
167            // Stroke modifier for Finnish multilingual keyboard and others.
168            // U+0110: LATIN CAPITAL LETTER D WITH STROKE
169            addNonStandardDeadCombination(ACCENT_STROKE, 'D', '\u0110');
170            // U+01E4: LATIN CAPITAL LETTER G WITH STROKE
171            addNonStandardDeadCombination(ACCENT_STROKE, 'G', '\u01e4');
172            // U+0126: LATIN CAPITAL LETTER H WITH STROKE
173            addNonStandardDeadCombination(ACCENT_STROKE, 'H', '\u0126');
174            // U+0197: LATIN CAPITAL LETTER I WITH STROKE
175            addNonStandardDeadCombination(ACCENT_STROKE, 'I', '\u0197');
176            // U+0141: LATIN CAPITAL LETTER L WITH STROKE
177            addNonStandardDeadCombination(ACCENT_STROKE, 'L', '\u0141');
178            // U+00D8: LATIN CAPITAL LETTER O WITH STROKE
179            addNonStandardDeadCombination(ACCENT_STROKE, 'O', '\u00d8');
180            // U+0166: LATIN CAPITAL LETTER T WITH STROKE
181            addNonStandardDeadCombination(ACCENT_STROKE, 'T', '\u0166');
182            // U+0111: LATIN SMALL LETTER D WITH STROKE
183            addNonStandardDeadCombination(ACCENT_STROKE, 'd', '\u0111');
184            // U+01E5: LATIN SMALL LETTER G WITH STROKE
185            addNonStandardDeadCombination(ACCENT_STROKE, 'g', '\u01e5');
186            // U+0127: LATIN SMALL LETTER H WITH STROKE
187            addNonStandardDeadCombination(ACCENT_STROKE, 'h', '\u0127');
188            // U+0268: LATIN SMALL LETTER I WITH STROKE
189            addNonStandardDeadCombination(ACCENT_STROKE, 'i', '\u0268');
190            // U+0142: LATIN SMALL LETTER L WITH STROKE
191            addNonStandardDeadCombination(ACCENT_STROKE, 'l', '\u0142');
192            // U+00F8: LATIN SMALL LETTER O WITH STROKE
193            addNonStandardDeadCombination(ACCENT_STROKE, 'o', '\u00f8');
194            // U+0167: LATIN SMALL LETTER T WITH STROKE
195            addNonStandardDeadCombination(ACCENT_STROKE, 't', '\u0167');
196        }
197
198        private static void addNonStandardDeadCombination(final int deadCodePoint,
199                final int spacingCodePoint, final int result) {
200            final int combination = (deadCodePoint << 16) | spacingCodePoint;
201            sNonstandardDeadCombinations.put(combination, result);
202        }
203
204        public static final int NOT_A_CHAR = 0;
205        public static final int BITS_TO_SHIFT_DEAD_CODE_POINT_FOR_NON_STANDARD_COMBINATION = 16;
206        // Get a non-standard combination
207        public static char getNonstandardCombination(final int deadCodePoint,
208                final int spacingCodePoint) {
209            final int combination = spacingCodePoint |
210                    (deadCodePoint << BITS_TO_SHIFT_DEAD_CODE_POINT_FOR_NON_STANDARD_COMBINATION);
211            return (char)sNonstandardDeadCombinations.get(combination, NOT_A_CHAR);
212        }
213    }
214
215    // TODO: make this a list of events instead
216    final StringBuilder mDeadSequence = new StringBuilder();
217
218    @Nonnull
219    private static Event createEventChainFromSequence(final @Nonnull CharSequence text,
220            @Nonnull final Event originalEvent) {
221        int index = text.length();
222        if (index <= 0) {
223            return originalEvent;
224        }
225        Event lastEvent = null;
226        do {
227            final int codePoint = Character.codePointBefore(text, index);
228            lastEvent = Event.createHardwareKeypressEvent(codePoint,
229                    originalEvent.mKeyCode, lastEvent, false /* isKeyRepeat */);
230            index -= Character.charCount(codePoint);
231        } while (index > 0);
232        return lastEvent;
233    }
234
235    @Override
236    @Nonnull
237    public Event processEvent(final ArrayList<Event> previousEvents, final Event event) {
238        if (TextUtils.isEmpty(mDeadSequence)) {
239            // No dead char is currently being tracked: this is the most common case.
240            if (event.isDead()) {
241                // The event was a dead key. Start tracking it.
242                mDeadSequence.appendCodePoint(event.mCodePoint);
243                return Event.createConsumedEvent(event);
244            }
245            // Regular keystroke when not keeping track of a dead key. Simply said, there are
246            // no dead keys at all in the current input, so this combiner has nothing to do and
247            // simply returns the event as is. The majority of events will go through this path.
248            return event;
249        }
250        if (Character.isWhitespace(event.mCodePoint)
251                || event.mCodePoint == mDeadSequence.codePointBefore(mDeadSequence.length())) {
252            // When whitespace or twice the same dead key, we should output the dead sequence as is.
253            final Event resultEvent = createEventChainFromSequence(mDeadSequence.toString(),
254                    event);
255            mDeadSequence.setLength(0);
256            return resultEvent;
257        }
258        if (event.isFunctionalKeyEvent()) {
259            if (Constants.CODE_DELETE == event.mKeyCode) {
260                // Remove the last code point
261                final int trimIndex = mDeadSequence.length() - Character.charCount(
262                        mDeadSequence.codePointBefore(mDeadSequence.length()));
263                mDeadSequence.setLength(trimIndex);
264                return Event.createConsumedEvent(event);
265            }
266            return event;
267        }
268        if (event.isDead()) {
269            mDeadSequence.appendCodePoint(event.mCodePoint);
270            return Event.createConsumedEvent(event);
271        }
272        // Combine normally.
273        final StringBuilder sb = new StringBuilder();
274        sb.appendCodePoint(event.mCodePoint);
275        int codePointIndex = 0;
276        while (codePointIndex < mDeadSequence.length()) {
277            final int deadCodePoint = mDeadSequence.codePointAt(codePointIndex);
278            final char replacementSpacingChar =
279                    Data.getNonstandardCombination(deadCodePoint, event.mCodePoint);
280            if (Data.NOT_A_CHAR != replacementSpacingChar) {
281                sb.setCharAt(0, replacementSpacingChar);
282            } else {
283                final int combining = Data.sAccentToCombining.get(deadCodePoint);
284                sb.appendCodePoint(0 == combining ? deadCodePoint : combining);
285            }
286            codePointIndex += Character.isSupplementaryCodePoint(deadCodePoint) ? 2 : 1;
287        }
288        final String normalizedString = Normalizer.normalize(sb, Normalizer.Form.NFC);
289        final Event resultEvent = createEventChainFromSequence(normalizedString, event);
290        mDeadSequence.setLength(0);
291        return resultEvent;
292    }
293
294    @Override
295    public void reset() {
296        mDeadSequence.setLength(0);
297    }
298
299    @Override
300    public CharSequence getCombiningStateFeedback() {
301        return mDeadSequence;
302    }
303}
304