HistogramPredictor.java revision 1253e9fb0b5570ab8adaed222655a5b052aa072e
1f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/*
2f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Copyright (C) 2011 The Android Open Source Project
3f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
4f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Licensed under the Apache License, Version 2.0 (the "License");
5f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * you may not use this file except in compliance with the License.
6f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * You may obtain a copy of the License at
7f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
8f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *      http://www.apache.org/licenses/LICENSE-2.0
9f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
10f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Unless required by applicable law or agreed to in writing, software
11f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * distributed under the License is distributed on an "AS IS" BASIS,
12f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * See the License for the specific language governing permissions and
14f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * limitations under the License.
15f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */
16f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
17f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
18f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpackage android.bordeaux.learning;
19f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
20f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Log;
21f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Pair;
22f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
23f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayInputStream;
24f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayOutputStream;
25f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.IOException;
26f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectInputStream;
27f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectOutputStream;
28f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.Serializable;
29f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.ArrayList;
30f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Collections;
31f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Comparator;
32f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashMap;
33f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashSet;
34f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Iterator;
35f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.List;
36f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map;
37f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map.Entry;
38f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
39f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/**
40f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * A histogram based predictor which records co-occurrences of applations with a speficic feature,
41f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * for example, location, * time of day, etc. The histogram is kept in a two level hash table.
42f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * The first level key is the feature value and the second level key is the app id.
43f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */
44c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// TODOS:
45c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 1. Use forgetting factor to downweight istances propotional to the time
46c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 2. Different features could have different weights on prediction scores.
47c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 3. Make prediction (on each feature) only when the histogram has collected
48c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin//    sufficient counts.
49f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpublic class HistogramPredictor {
50f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    final static String TAG = "HistogramPredictor";
51f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
52f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    private HashMap<String, HistogramCounter> mPredictor =
53f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            new HashMap<String, HistogramCounter>();
54f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    private HashMap<String, Integer> mClassCounts = new HashMap<String, Integer>();
561253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    private int mTotalClassCount = 0;
571253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
58f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    private static final double FEATURE_INACTIVE_LIKELIHOOD = 0.00000001;
5978a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin    private static final double LOG_INACTIVE = Math.log(FEATURE_INACTIVE_LIKELIHOOD);
60f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
61f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
62f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * This class keeps the histogram counts for each feature and provide the
63f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * joint probabilities of <feature, class>.
64f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
65f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    private class HistogramCounter {
66f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        private HashMap<String, HashMap<String, Integer> > mCounter =
67f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                new HashMap<String, HashMap<String, Integer> >();
68f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        private int mTotalCount;
69f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
70f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HistogramCounter() {
71f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            resetCounter();
72f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
73f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
74f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void setCounter(HashMap<String, HashMap<String, Integer> > counter) {
75f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            resetCounter();
76f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mCounter.putAll(counter);
77f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
78f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            // get total count
79f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            for (Map.Entry<String, HashMap<String, Integer> > entry : counter.entrySet()) {
80f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                for (Integer value : entry.getValue().values()) {
81f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    mTotalCount += value.intValue();
82f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                }
83f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
84f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
85f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
86f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void resetCounter() {
87f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mCounter.clear();
88f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mTotalCount = 0;
89f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
90f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
91f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void addSample(String className, String featureValue) {
92f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HashMap<String, Integer> classCounts;
93f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
94f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (!mCounter.containsKey(featureValue)) {
95f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                classCounts = new HashMap<String, Integer>();
96f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                mCounter.put(featureValue, classCounts);
971253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            } else {
981253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                classCounts = mCounter.get(featureValue);
99f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
100f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            int count = (classCounts.containsKey(className)) ?
101f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    classCounts.get(className) + 1 : 1;
102f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            classCounts.put(className, count);
103f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mTotalCount++;
104f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
105f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
106f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HashMap<String, Double> getClassScores(String featureValue) {
107f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HashMap<String, Double> classScores = new HashMap<String, Double>();
108f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
109f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            double logTotalCount = Math.log((double) mTotalCount);
110f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (mCounter.containsKey(featureValue)) {
111f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                for(Map.Entry<String, Integer> entry :
112f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                        mCounter.get(featureValue).entrySet()) {
113f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    double score =
114f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                            Math.log((double) entry.getValue()) - logTotalCount;
115f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    classScores.put(entry.getKey(), score);
116f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                }
117f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
118f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return classScores;
119f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
120f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
121f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HashMap<String, HashMap<String, Integer> > getCounter() {
122f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return mCounter;
123f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
124f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
125f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
12678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin    private double getDefaultLikelihood(Map<String, String> features) {
12778a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        int featureCount = 0;
12878a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin
12978a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        for(String featureName : features.keySet()) {
13078a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin            if (mPredictor.containsKey(featureName)) {
13178a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin                featureCount++;
13278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin            }
13378a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        }
13478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        return LOG_INACTIVE * featureCount;
13578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin    }
13678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin
137f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
138f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * Given a map of feature name -value pairs returns the mostly likely apps to
139f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * be launched with corresponding likelihoods.
140f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
141f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public List<Map.Entry<String, Double> > findTopClasses(Map<String, String> features, int topK) {
142f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // Most sophisticated function in this class
143f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, Double> appScores = new HashMap<String, Double>();
14478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        double defaultLikelihood = getDefaultLikelihood(features);
145f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
1461253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        HashMap<String, Integer> appearCounts = new HashMap<String, Integer>();
1471253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
148f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // compute all app scores
149f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
150f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            String featureName = entry.getKey();
151f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HistogramCounter counter = entry.getValue();
152f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
153f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (features.containsKey(featureName)) {
154f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                String featureValue = features.get(featureName);
155f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                HashMap<String, Double> scoreMap = counter.getClassScores(featureValue);
156f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
157f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                for (Map.Entry<String, Double> item : scoreMap.entrySet()) {
158f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    String appName = item.getKey();
159f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    double appScore = item.getValue();
160f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    double score = (appScores.containsKey(appName)) ?
161f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                        appScores.get(appName) : defaultLikelihood;
16278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin                    score += appScore - LOG_INACTIVE;
163f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    appScores.put(appName, score);
1641253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
1651253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    int count = (appearCounts.containsKey(appName)) ?
1661253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                        appearCounts.get(appName) + 1 : 1;
1671253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    appearCounts.put(appName, count);
1681253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                }
1691253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            }
1701253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
1711253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
1721253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        // TODO: this check should be unnecessary
1731253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        if (mClassCounts.size() != 0 && mTotalClassCount != 0) {
1741253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            for (Map.Entry<String, Double> entry : appScores.entrySet()) {
1751253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                String appName = entry.getKey();
1761253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                double appScore = entry.getValue();
1771253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                if (!appearCounts.containsKey(appName)) {
1781253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    throw new RuntimeException("appearance count error!");
179f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                }
1801253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                int appearCount = appearCounts.get(appName);
1811253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
1821253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                if (!mClassCounts.containsKey(appName)) {
1831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    throw new RuntimeException("class count error!");
1841253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                }
1851253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                double appPrior =
1861253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    Math.log(mClassCounts.get(appName)) - Math.log(mTotalClassCount);
1871253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                appScores.put(appName, appScore - appPrior * (appearCount - 1));
188f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
189f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
190f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
191f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // sort app scores
192f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        List<Map.Entry<String, Double> > appList =
193f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin               new ArrayList<Map.Entry<String, Double> >(appScores.size());
194f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        appList.addAll(appScores.entrySet());
195f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        Collections.sort(appList, new  Comparator<Map.Entry<String, Double> >() {
196f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            public int compare(Map.Entry<String, Double> o1,
197f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                               Map.Entry<String, Double> o2) {
198f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                return o2.getValue().compareTo(o1.getValue());
199f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
200f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        });
201f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
2021253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        Log.v(TAG, "findTopApps appList: " + appList);
203f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        return appList;
204f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
205f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
206f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
207f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * Add a new observation of given sample id and features to the histograms
208f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
209f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public void addSample(String sampleId, Map<String, String> features) {
210f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
211f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            String featureName = entry.getKey();
212f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HistogramCounter counter = entry.getValue();
213f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
214f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (features.containsKey(featureName)) {
215f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                String featureValue = features.get(featureName);
216f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                counter.addSample(sampleId, featureValue);
217f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
218f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
2191253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
2201253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        int sampleCount = (mClassCounts.containsKey(sampleId)) ?
2211253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            mClassCounts.get(sampleId) + 1 : 1;
2221253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.put(sampleId, sampleCount);
223f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
224f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
225f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
226f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * reset predictor to a empty model
227f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
228f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public void resetPredictor() {
229f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // TODO: not sure this step would reduce memory waste
230f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (HistogramCounter counter : mPredictor.values()) {
231f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            counter.resetCounter();
232f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
233f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        mPredictor.clear();
2341253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
2351253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.clear();
2361253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mTotalClassCount = 0;
237f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
238f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
239f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
240f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * specify a feature to used for prediction
241f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
242f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public void useFeature(String featureName) {
243f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        if (!mPredictor.containsKey(featureName)) {
244f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mPredictor.put(featureName, new HistogramCounter());
245f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
246f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
247f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
248f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
249f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * convert the prediction model into a byte array
250f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
251f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public byte[] getModel() {
252f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // TODO: convert model to a more memory efficient data structure.
253f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, HashMap<String, HashMap<String, Integer > > > model =
254f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                new HashMap<String, HashMap<String, HashMap<String, Integer > > >();
255f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for(Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
256f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            model.put(entry.getKey(), entry.getValue().getCounter());
257f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
258f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
259f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        try {
260f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
261f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ObjectOutputStream objStream = new ObjectOutputStream(byteStream);
262f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            objStream.writeObject(model);
263f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            byte[] bytes = byteStream.toByteArray();
264f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            //Log.i(TAG, "getModel: " + bytes);
265f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return bytes;
266f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (IOException e) {
267f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Can't get model");
268f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
269f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
270f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
271f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
272f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * set the prediction model from a model data in the format of byte array
273f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
274f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public boolean setModel(final byte[] modelData) {
275f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, HashMap<String, HashMap<String, Integer > > > model;
276f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
277f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        try {
278f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ByteArrayInputStream input = new ByteArrayInputStream(modelData);
279f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ObjectInputStream objStream = new ObjectInputStream(input);
280f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            model = (HashMap<String, HashMap<String, HashMap<String, Integer > > >)
281f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    objStream.readObject();
282f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (IOException e) {
283f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Can't load model");
284f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (ClassNotFoundException e) {
285f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Learning class not found");
286f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
287f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
288f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        resetPredictor();
289f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (Map.Entry<String, HashMap<String, HashMap<String, Integer> > > entry :
290f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                model.entrySet()) {
291f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            useFeature(entry.getKey());
292f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mPredictor.get(entry.getKey()).setCounter(entry.getValue());
293f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
2941253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
2951253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        // TODO: this is a temporary fix for now
2961253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        loadClassCounter();
2971253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
298f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        return true;
299f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
3001253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3011253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    private void loadClassCounter() {
3021253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        String TIME_OF_WEEK = "Time of Week";
3031253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3041253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        if (!mPredictor.containsKey(TIME_OF_WEEK)) {
3051253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            throw new RuntimeException("Precition model error: missing Time of Week!");
3061253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
3071253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3081253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        HashMap<String, HashMap<String, Integer> > counter =
3091253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            mPredictor.get(TIME_OF_WEEK).getCounter();
3101253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3111253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mTotalClassCount = 0;
3121253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.clear();
3131253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        for (HashMap<String, Integer> map : counter.values()) {
3141253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            for (Map.Entry<String, Integer> entry : map.entrySet()) {
3151253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                int classCount = entry.getValue();
3161253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                String className = entry.getKey();
3171253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                mTotalClassCount += classCount;
3181253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3191253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                if (mClassCounts.containsKey(className)) {
3201253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    classCount += mClassCounts.get(className);
3211253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                }
3221253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                mClassCounts.put(className, classCount);
3231253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            }
3241253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
3251253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3261253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        Log.e(TAG, "class counts: " + mClassCounts + ", total count: " +
3271253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin              mTotalClassCount);
3281253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    }
329f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin}
330