1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.research;
18
19import android.os.SystemClock;
20import android.text.TextUtils;
21import android.util.JsonWriter;
22import android.util.Log;
23
24import com.android.inputmethod.latin.SuggestedWords;
25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
26import com.android.inputmethod.latin.define.ProductionFlag;
27
28import java.io.IOException;
29import java.util.ArrayList;
30import java.util.Arrays;
31import java.util.List;
32import java.util.regex.Pattern;
33
34/**
35 * A group of log statements related to each other.
36 *
37 * A LogUnit is collection of LogStatements, each of which is generated by at a particular point
38 * in the code.  (There is no LogStatement class; the data is stored across the instance variables
39 * here.)  A single LogUnit's statements can correspond to all the calls made while in the same
40 * composing region, or all the calls between committing the last composing region, and the first
41 * character of the next composing region.
42 *
43 * Individual statements in a log may be marked as potentially private.  If so, then they are only
44 * published to a ResearchLog if the ResearchLogger determines that publishing the entire LogUnit
45 * will not violate the user's privacy.  Checks for this may include whether other LogUnits have
46 * been published recently, or whether the LogUnit contains numbers, etc.
47 */
48public class LogUnit {
49    private static final String TAG = LogUnit.class.getSimpleName();
50    private static final boolean DEBUG = false
51            && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
52
53    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
54    private static final String[] EMPTY_STRING_ARRAY = new String[0];
55
56    private final ArrayList<LogStatement> mLogStatementList;
57    private final ArrayList<Object[]> mValuesList;
58    // Assume that mTimeList is sorted in increasing order.  Do not insert null values into
59    // mTimeList.
60    private final ArrayList<Long> mTimeList;
61    // Words that this LogUnit generates.  Should be null if the data in the LogUnit does not
62    // generate a genuine word (i.e. separators alone do not count as a word).  Should never be
63    // empty.  Note that if the user types spaces explicitly, then normally mWords should contain
64    // only a single word; it will only contain space-separate multiple words if the user does not
65    // enter a space, and the system enters one automatically.
66    private String mWords;
67    private String[] mWordArray = EMPTY_STRING_ARRAY;
68    private boolean mMayContainDigit;
69    private boolean mIsPartOfMegaword;
70    private boolean mContainsCorrection;
71
72    // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
73    // correction.
74    private int mCorrectionType;
75    // LogUnits start in this state.  If a word is entered without being corrected, it will have
76    // this CorrectiontType.
77    public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
78    // The LogUnit was corrected manually by the user in an unspecified way.
79    public static final int CORRECTIONTYPE_CORRECTION = 1;
80    // The LogUnit was corrected manually by the user to a word not in the list of suggestions of
81    // the first word typed here.  (Note: this is a heuristic value, it may be incorrect, for
82    // example, if the user repositions the cursor).
83    public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
84    // The LogUnit was corrected manually by the user to a word that was in the list of suggestions
85    // of the first word typed here.  (Again, a heuristic).  It is probably a typo correction.
86    public static final int CORRECTIONTYPE_TYPO = 3;
87    // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
88    // state and statistics.  This should include how many times it has been corrected, whether
89    // other LogUnit edits were done between edits to this LogUnit, etc.  Also track when a LogUnit
90    // previously contained a word, but was corrected to empty (because it was deleted, and there is
91    // no known replacement).
92
93    private SuggestedWords mSuggestedWords;
94
95    public LogUnit() {
96        mLogStatementList = new ArrayList<LogStatement>();
97        mValuesList = new ArrayList<Object[]>();
98        mTimeList = new ArrayList<Long>();
99        mIsPartOfMegaword = false;
100        mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
101        mSuggestedWords = null;
102    }
103
104    private LogUnit(final ArrayList<LogStatement> logStatementList,
105            final ArrayList<Object[]> valuesList,
106            final ArrayList<Long> timeList,
107            final boolean isPartOfMegaword) {
108        mLogStatementList = logStatementList;
109        mValuesList = valuesList;
110        mTimeList = timeList;
111        mIsPartOfMegaword = isPartOfMegaword;
112        mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
113        mSuggestedWords = null;
114    }
115
116    private static final Object[] NULL_VALUES = new Object[0];
117    /**
118     * Adds a new log statement.  The time parameter in successive calls to this method must be
119     * monotonically increasing, or splitByTime() will not work.
120     */
121    public void addLogStatement(final LogStatement logStatement, final long time,
122            Object... values) {
123        if (values == null) {
124            values = NULL_VALUES;
125        }
126        mLogStatementList.add(logStatement);
127        mValuesList.add(values);
128        mTimeList.add(time);
129    }
130
131    /**
132     * Publish the contents of this LogUnit to {@code researchLog}.
133     *
134     * For each publishable {@code LogStatement}, invoke {@link LogStatement#outputToLocked}.
135     *
136     * @param researchLog where to publish the contents of this {@code LogUnit}
137     * @param canIncludePrivateData whether the private data in this {@code LogUnit} should be
138     * included
139     *
140     * @throws IOException if publication to the log file is not possible
141     */
142    public synchronized void publishTo(final ResearchLog researchLog,
143            final boolean canIncludePrivateData) throws IOException {
144        // Write out any logStatement that passes the privacy filter.
145        final int size = mLogStatementList.size();
146        if (size != 0) {
147            // Note that jsonWriter is only set to a non-null value if the logUnit start text is
148            // output and at least one logStatement is output.
149            JsonWriter jsonWriter = null;
150            for (int i = 0; i < size; i++) {
151                final LogStatement logStatement = mLogStatementList.get(i);
152                if (!canIncludePrivateData && logStatement.isPotentiallyPrivate()) {
153                    continue;
154                }
155                if (mIsPartOfMegaword && logStatement.isPotentiallyRevealing()) {
156                    continue;
157                }
158                // Only retrieve the jsonWriter if we need to.  If we don't get this far, then
159                // researchLog.getInitializedJsonWriterLocked() will not ever be called, and the
160                // file will not have been opened for writing.
161                if (jsonWriter == null) {
162                    jsonWriter = researchLog.getInitializedJsonWriterLocked();
163                    outputLogUnitStart(jsonWriter, canIncludePrivateData);
164                }
165                logStatement.outputToLocked(jsonWriter, mTimeList.get(i), mValuesList.get(i));
166            }
167            if (jsonWriter != null) {
168                // We must have called logUnitStart earlier, so emit a logUnitStop.
169                outputLogUnitStop(jsonWriter);
170            }
171        }
172    }
173
174    private static final String WORD_KEY = "_wo";
175    private static final String CORRECTION_TYPE_KEY = "_corType";
176    private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart";
177    private static final String LOG_UNIT_END_KEY = "logUnitEnd";
178
179    final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA =
180            new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
181                    false /* isPotentiallyRevealing */, WORD_KEY, CORRECTION_TYPE_KEY);
182    final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA =
183            new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */,
184                    false /* isPotentiallyRevealing */);
185    private void outputLogUnitStart(final JsonWriter jsonWriter,
186            final boolean canIncludePrivateData) {
187        final LogStatement logStatement;
188        if (canIncludePrivateData) {
189            LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
190                    SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
191        } else {
192            LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
193                    SystemClock.uptimeMillis());
194        }
195    }
196
197    final LogStatement LOGSTATEMENT_LOG_UNIT_END =
198            new LogStatement(LOG_UNIT_END_KEY, false /* isPotentiallyPrivate */,
199                    false /* isPotentiallyRevealing */);
200    private void outputLogUnitStop(final JsonWriter jsonWriter) {
201        LOGSTATEMENT_LOG_UNIT_END.outputToLocked(jsonWriter, SystemClock.uptimeMillis());
202    }
203
204    /**
205     * Mark the current logUnit as containing data to generate {@code newWords}.
206     *
207     * If {@code setWord()} was previously called for this LogUnit, then the method will try to
208     * determine what kind of correction it is, and update its internal state of the correctionType
209     * accordingly.
210     *
211     * @param newWords The words this LogUnit generates.  Caller should not pass null or the empty
212     * string.
213     */
214    public void setWords(final String newWords) {
215        if (hasOneOrMoreWords()) {
216            // The word was already set once, and it is now being changed.  See if the new word
217            // is close to the old word.  If so, then the change is probably a typo correction.
218            // If not, the user may have decided to enter a different word, so flag it.
219            if (mSuggestedWords != null) {
220                if (isInSuggestedWords(newWords, mSuggestedWords)) {
221                    mCorrectionType = CORRECTIONTYPE_TYPO;
222                } else {
223                    mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
224                }
225            } else {
226                // No suggested words, so it's not clear whether it's a typo or different word.
227                // Mark it as a generic correction.
228                mCorrectionType = CORRECTIONTYPE_CORRECTION;
229            }
230        } else {
231            mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
232        }
233        mWords = newWords;
234
235        // Update mWordArray
236        mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
237                : WHITESPACE_PATTERN.split(mWords);
238        if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
239            // Empty string at beginning of array.  Must have been whitespace at the start of the
240            // word.  Remove the empty string.
241            mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
242        }
243    }
244
245    public String getWordsAsString() {
246        return mWords;
247    }
248
249    /**
250     * Retuns the words generated by the data in this LogUnit.
251     *
252     * The first word may be an empty string, if the data in the LogUnit started by generating
253     * whitespace.
254     *
255     * @return the array of words. an empty list of there are no words associated with this LogUnit.
256     */
257    public String[] getWordsAsStringArray() {
258        return mWordArray;
259    }
260
261    public boolean hasOneOrMoreWords() {
262        return mWordArray.length >= 1;
263    }
264
265    public int getNumWords() {
266        return mWordArray.length;
267    }
268
269    // TODO: Refactor to eliminate getter/setters
270    public void setMayContainDigit() {
271        mMayContainDigit = true;
272    }
273
274    // TODO: Refactor to eliminate getter/setters
275    public boolean mayContainDigit() {
276        return mMayContainDigit;
277    }
278
279    // TODO: Refactor to eliminate getter/setters
280    public void setContainsCorrection() {
281        mContainsCorrection = true;
282    }
283
284    // TODO: Refactor to eliminate getter/setters
285    public boolean containsCorrection() {
286        return mContainsCorrection;
287    }
288
289    // TODO: Refactor to eliminate getter/setters
290    public void setCorrectionType(final int correctionType) {
291        mCorrectionType = correctionType;
292    }
293
294    // TODO: Refactor to eliminate getter/setters
295    public int getCorrectionType() {
296        return mCorrectionType;
297    }
298
299    public boolean isEmpty() {
300        return mLogStatementList.isEmpty();
301    }
302
303    /**
304     * Split this logUnit, with all events before maxTime staying in the current logUnit, and all
305     * events after maxTime going into a new LogUnit that is returned.
306     */
307    public LogUnit splitByTime(final long maxTime) {
308        // Assume that mTimeList is in sorted order.
309        final int length = mTimeList.size();
310        // TODO: find time by binary search, e.g. using Collections#binarySearch()
311        for (int index = 0; index < length; index++) {
312            if (mTimeList.get(index) > maxTime) {
313                final List<LogStatement> laterLogStatements =
314                        mLogStatementList.subList(index, length);
315                final List<Object[]> laterValues = mValuesList.subList(index, length);
316                final List<Long> laterTimes = mTimeList.subList(index, length);
317
318                // Create the LogUnit containing the later logStatements and associated data.
319                final LogUnit newLogUnit = new LogUnit(
320                        new ArrayList<LogStatement>(laterLogStatements),
321                        new ArrayList<Object[]>(laterValues),
322                        new ArrayList<Long>(laterTimes),
323                        true /* isPartOfMegaword */);
324                newLogUnit.mWords = null;
325                newLogUnit.mMayContainDigit = mMayContainDigit;
326                newLogUnit.mContainsCorrection = mContainsCorrection;
327
328                // Purge the logStatements and associated data from this LogUnit.
329                laterLogStatements.clear();
330                laterValues.clear();
331                laterTimes.clear();
332                mIsPartOfMegaword = true;
333
334                return newLogUnit;
335            }
336        }
337        return new LogUnit();
338    }
339
340    public void append(final LogUnit logUnit) {
341        mLogStatementList.addAll(logUnit.mLogStatementList);
342        mValuesList.addAll(logUnit.mValuesList);
343        mTimeList.addAll(logUnit.mTimeList);
344        mWords = null;
345        if (logUnit.mWords != null) {
346            setWords(logUnit.mWords);
347        }
348        mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
349        mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
350        mIsPartOfMegaword = false;
351    }
352
353    public SuggestedWords getSuggestions() {
354        return mSuggestedWords;
355    }
356
357    /**
358     * Initialize the suggestions.
359     *
360     * Once set to a non-null value, the suggestions may not be changed again.  This is to keep
361     * track of the list of words that are close to the user's initial effort to type the word.
362     * Only words that are close to the initial effort are considered typo corrections.
363     */
364    public void initializeSuggestions(final SuggestedWords suggestedWords) {
365        if (mSuggestedWords == null) {
366            mSuggestedWords = suggestedWords;
367        }
368    }
369
370    private static boolean isInSuggestedWords(final String queryWord,
371            final SuggestedWords suggestedWords) {
372        if (TextUtils.isEmpty(queryWord)) {
373            return false;
374        }
375        final int size = suggestedWords.size();
376        for (int i = 0; i < size; i++) {
377            final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i);
378            if (queryWord.equals(wordInfo.mWord)) {
379                return true;
380            }
381        }
382        return false;
383    }
384
385    /**
386     * Remove data associated with selecting the Research button.
387     *
388     * A LogUnit will capture all user interactions with the IME, including the "meta-interactions"
389     * of using the Research button to control the logging (e.g. by starting and stopping recording
390     * of a test case).  Because meta-interactions should not be part of the normal log, calling
391     * this method will set a field in the LogStatements of the motion events to indiciate that
392     * they should be disregarded.
393     *
394     * This implementation assumes that the data recorded by the meta-interaction takes the
395     * form of all events following the first MotionEvent.ACTION_DOWN before the first long-press
396     * before the last onCodeEvent containing a code matching {@code LogStatement.VALUE_RESEARCH}.
397     *
398     * @returns true if data was removed
399     */
400    public boolean removeResearchButtonInvocation() {
401        // This method is designed to be idempotent.
402
403        // First, find last invocation of "research" key
404        final int indexOfLastResearchKey = findLastIndexContainingKeyValue(
405                LogStatement.TYPE_POINTER_TRACKER_CALL_LISTENER_ON_CODE_INPUT,
406                LogStatement.KEY_CODE, LogStatement.VALUE_RESEARCH);
407        if (indexOfLastResearchKey < 0) {
408            // Could not find invocation of "research" key.  Leave log as is.
409            if (DEBUG) {
410                Log.d(TAG, "Could not find research key");
411            }
412            return false;
413        }
414
415        // Look for the long press that started the invocation of the research key code input.
416        final int indexOfLastLongPressBeforeResearchKey =
417                findLastIndexBefore(LogStatement.TYPE_MAIN_KEYBOARD_VIEW_ON_LONG_PRESS,
418                        indexOfLastResearchKey);
419
420        // Look for DOWN event preceding the long press
421        final int indexOfLastDownEventBeforeLongPress =
422                findLastIndexContainingKeyValueBefore(LogStatement.TYPE_MOTION_EVENT,
423                        LogStatement.ACTION, LogStatement.VALUE_DOWN,
424                        indexOfLastLongPressBeforeResearchKey);
425
426        // Flag all LatinKeyboardViewProcessMotionEvents from the DOWN event to the research key as
427        // logging-related
428        final int startingIndex = indexOfLastDownEventBeforeLongPress == -1 ? 0
429                : indexOfLastDownEventBeforeLongPress;
430        for (int index = startingIndex; index < indexOfLastResearchKey; index++) {
431            final LogStatement logStatement = mLogStatementList.get(index);
432            final String type = logStatement.getType();
433            final Object[] values = mValuesList.get(index);
434            if (type.equals(LogStatement.TYPE_MOTION_EVENT)) {
435                logStatement.setValue(LogStatement.KEY_IS_LOGGING_RELATED, values, true);
436            }
437        }
438        return true;
439    }
440
441    /**
442     * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}.
443     *
444     * @param queryType a String that must be {@code String.equals()} to the LogStatement type
445     * @param startingIndex the index to start the backward search from.  Must be less than the
446     * length of mLogStatementList, or an IndexOutOfBoundsException is thrown.  Can be negative,
447     * in which case -1 is returned.
448     *
449     * @return The index of the last LogStatement, -1 if none exists.
450     */
451    private int findLastIndexBefore(final String queryType, final int startingIndex) {
452        return findLastIndexContainingKeyValueBefore(queryType, null, null, startingIndex);
453    }
454
455    /**
456     * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
457     * containing the given key-value pair.
458     *
459     * @param queryType a String that must be {@code String.equals()} to the LogStatement type
460     * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
461     * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
462     * value
463     *
464     * @return The index of the last LogStatement, -1 if none exists.
465     */
466    private int findLastIndexContainingKeyValue(final String queryType, final String queryKey,
467            final Object queryValue) {
468        return findLastIndexContainingKeyValueBefore(queryType, queryKey, queryValue,
469                mLogStatementList.size() - 1);
470    }
471
472    /**
473     * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}
474     * containing the given key-value pair.
475     *
476     * @param queryType a String that must be {@code String.equals()} to the LogStatement type
477     * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement
478     * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding
479     * value
480     * @param startingIndex the index to start the backward search from.  Must be less than the
481     * length of mLogStatementList, or an IndexOutOfBoundsException is thrown.  Can be negative,
482     * in which case -1 is returned.
483     *
484     * @return The index of the last LogStatement, -1 if none exists.
485     */
486    private int findLastIndexContainingKeyValueBefore(final String queryType, final String queryKey,
487            final Object queryValue, final int startingIndex) {
488        if (startingIndex < 0) {
489            return -1;
490        }
491        for (int index = startingIndex; index >= 0; index--) {
492            final LogStatement logStatement = mLogStatementList.get(index);
493            final String type = logStatement.getType();
494            if (type.equals(queryType) && (queryKey == null
495                    || logStatement.containsKeyValuePair(queryKey, queryValue,
496                            mValuesList.get(index)))) {
497                return index;
498            }
499        }
500        return -1;
501    }
502}
503