HistogramPredictor.java revision 1253e9fb0b5570ab8adaed222655a5b052aa072e
1f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/* 2f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Copyright (C) 2011 The Android Open Source Project 3f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 4f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Licensed under the Apache License, Version 2.0 (the "License"); 5f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * you may not use this file except in compliance with the License. 6f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * You may obtain a copy of the License at 7f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 8f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * http://www.apache.org/licenses/LICENSE-2.0 9f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 10f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Unless required by applicable law or agreed to in writing, software 11f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * distributed under the License is distributed on an "AS IS" BASIS, 12f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * See the License for the specific language governing permissions and 14f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * limitations under the License. 15f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 16f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 17f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 18f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpackage android.bordeaux.learning; 19f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 20f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Log; 21f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Pair; 22f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 23f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayInputStream; 24f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayOutputStream; 25f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.IOException; 26f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectInputStream; 27f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectOutputStream; 28f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.Serializable; 29f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.ArrayList; 30f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Collections; 31f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Comparator; 32f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashMap; 33f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashSet; 34f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Iterator; 35f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.List; 36f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map; 37f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map.Entry; 38f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 39f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/** 40f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * A histogram based predictor which records co-occurrences of applations with a speficic feature, 41f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * for example, location, * time of day, etc. The histogram is kept in a two level hash table. 42f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * The first level key is the feature value and the second level key is the app id. 43f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 44c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// TODOS: 45c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 1. Use forgetting factor to downweight istances propotional to the time 46c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 2. Different features could have different weights on prediction scores. 47c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 3. Make prediction (on each feature) only when the histogram has collected 48c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// sufficient counts. 49f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpublic class HistogramPredictor { 50f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin final static String TAG = "HistogramPredictor"; 51f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 52f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private HashMap<String, HistogramCounter> mPredictor = 53f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HistogramCounter>(); 54f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin private HashMap<String, Integer> mClassCounts = new HashMap<String, Integer>(); 561253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin private int mTotalClassCount = 0; 571253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 58f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private static final double FEATURE_INACTIVE_LIKELIHOOD = 0.00000001; 5978a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin private static final double LOG_INACTIVE = Math.log(FEATURE_INACTIVE_LIKELIHOOD); 60f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 61f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 62f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * This class keeps the histogram counts for each feature and provide the 63f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * joint probabilities of <feature, class>. 64f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 65f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private class HistogramCounter { 66f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private HashMap<String, HashMap<String, Integer> > mCounter = 67f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HashMap<String, Integer> >(); 68f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private int mTotalCount; 69f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 70f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HistogramCounter() { 71f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin resetCounter(); 72f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 73f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 74f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void setCounter(HashMap<String, HashMap<String, Integer> > counter) { 75f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin resetCounter(); 76f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.putAll(counter); 77f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 78f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // get total count 79f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HashMap<String, Integer> > entry : counter.entrySet()) { 80f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Integer value : entry.getValue().values()) { 81f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mTotalCount += value.intValue(); 82f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 83f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 84f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 85f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 86f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void resetCounter() { 87f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.clear(); 88f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mTotalCount = 0; 89f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 90f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 91f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void addSample(String className, String featureValue) { 92f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Integer> classCounts; 93f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 94f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (!mCounter.containsKey(featureValue)) { 95f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts = new HashMap<String, Integer>(); 96f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.put(featureValue, classCounts); 971253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } else { 981253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin classCounts = mCounter.get(featureValue); 99f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 100f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin int count = (classCounts.containsKey(className)) ? 101f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts.get(className) + 1 : 1; 102f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts.put(className, count); 103f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mTotalCount++; 104f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 105f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 106f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HashMap<String, Double> getClassScores(String featureValue) { 107f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> classScores = new HashMap<String, Double>(); 108f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 109f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin double logTotalCount = Math.log((double) mTotalCount); 110f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (mCounter.containsKey(featureValue)) { 111f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for(Map.Entry<String, Integer> entry : 112f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.get(featureValue).entrySet()) { 113f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin double score = 114f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin Math.log((double) entry.getValue()) - logTotalCount; 115f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classScores.put(entry.getKey(), score); 116f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 117f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 118f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return classScores; 119f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 120f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 121f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HashMap<String, HashMap<String, Integer> > getCounter() { 122f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return mCounter; 123f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 124f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 125f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 12678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin private double getDefaultLikelihood(Map<String, String> features) { 12778a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin int featureCount = 0; 12878a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin 12978a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin for(String featureName : features.keySet()) { 13078a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin if (mPredictor.containsKey(featureName)) { 13178a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin featureCount++; 13278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 13378a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 13478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin return LOG_INACTIVE * featureCount; 13578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 13678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin 137f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 138f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Given a map of feature name -value pairs returns the mostly likely apps to 139f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * be launched with corresponding likelihoods. 140f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 141f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public List<Map.Entry<String, Double> > findTopClasses(Map<String, String> features, int topK) { 142f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // Most sophisticated function in this class 143f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> appScores = new HashMap<String, Double>(); 14478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin double defaultLikelihood = getDefaultLikelihood(features); 145f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 1461253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin HashMap<String, Integer> appearCounts = new HashMap<String, Integer>(); 1471253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 148f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // compute all app scores 149f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) { 150f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureName = entry.getKey(); 151f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HistogramCounter counter = entry.getValue(); 152f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 153f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (features.containsKey(featureName)) { 154f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureValue = features.get(featureName); 155f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> scoreMap = counter.getClassScores(featureValue); 156f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 157f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, Double> item : scoreMap.entrySet()) { 158f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String appName = item.getKey(); 159f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin double appScore = item.getValue(); 160f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin double score = (appScores.containsKey(appName)) ? 161f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin appScores.get(appName) : defaultLikelihood; 16278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin score += appScore - LOG_INACTIVE; 163f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin appScores.put(appName, score); 1641253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 1651253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int count = (appearCounts.containsKey(appName)) ? 1661253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin appearCounts.get(appName) + 1 : 1; 1671253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin appearCounts.put(appName, count); 1681253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 1691253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 1701253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 1711253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 1721253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin // TODO: this check should be unnecessary 1731253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (mClassCounts.size() != 0 && mTotalClassCount != 0) { 1741253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin for (Map.Entry<String, Double> entry : appScores.entrySet()) { 1751253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin String appName = entry.getKey(); 1761253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin double appScore = entry.getValue(); 1771253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (!appearCounts.containsKey(appName)) { 1781253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin throw new RuntimeException("appearance count error!"); 179f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 1801253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int appearCount = appearCounts.get(appName); 1811253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 1821253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (!mClassCounts.containsKey(appName)) { 1831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin throw new RuntimeException("class count error!"); 1841253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 1851253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin double appPrior = 1861253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin Math.log(mClassCounts.get(appName)) - Math.log(mTotalClassCount); 1871253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin appScores.put(appName, appScore - appPrior * (appearCount - 1)); 188f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 189f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 190f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 191f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // sort app scores 192f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin List<Map.Entry<String, Double> > appList = 193f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new ArrayList<Map.Entry<String, Double> >(appScores.size()); 194f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin appList.addAll(appScores.entrySet()); 195f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin Collections.sort(appList, new Comparator<Map.Entry<String, Double> >() { 196f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public int compare(Map.Entry<String, Double> o1, 197f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin Map.Entry<String, Double> o2) { 198f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return o2.getValue().compareTo(o1.getValue()); 199f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 200f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin }); 201f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 2021253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin Log.v(TAG, "findTopApps appList: " + appList); 203f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return appList; 204f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 205f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 206f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 207f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Add a new observation of given sample id and features to the histograms 208f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 209f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void addSample(String sampleId, Map<String, String> features) { 210f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) { 211f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureName = entry.getKey(); 212f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HistogramCounter counter = entry.getValue(); 213f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 214f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (features.containsKey(featureName)) { 215f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureValue = features.get(featureName); 216f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin counter.addSample(sampleId, featureValue); 217f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 218f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 2191253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 2201253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int sampleCount = (mClassCounts.containsKey(sampleId)) ? 2211253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.get(sampleId) + 1 : 1; 2221253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.put(sampleId, sampleCount); 223f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 224f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 225f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 226f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * reset predictor to a empty model 227f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 228f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void resetPredictor() { 229f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // TODO: not sure this step would reduce memory waste 230f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (HistogramCounter counter : mPredictor.values()) { 231f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin counter.resetCounter(); 232f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 233f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mPredictor.clear(); 2341253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 2351253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.clear(); 2361253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mTotalClassCount = 0; 237f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 238f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 239f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 240f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * specify a feature to used for prediction 241f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 242f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void useFeature(String featureName) { 243f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (!mPredictor.containsKey(featureName)) { 244f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mPredictor.put(featureName, new HistogramCounter()); 245f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 246f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 247f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 248f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 249f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * convert the prediction model into a byte array 250f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 251f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public byte[] getModel() { 252f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // TODO: convert model to a more memory efficient data structure. 253f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, HashMap<String, HashMap<String, Integer > > > model = 254f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HashMap<String, HashMap<String, Integer > > >(); 255f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for(Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) { 256f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model.put(entry.getKey(), entry.getValue().getCounter()); 257f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 258f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 259f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin try { 260f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); 261f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ObjectOutputStream objStream = new ObjectOutputStream(byteStream); 262f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin objStream.writeObject(model); 263f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin byte[] bytes = byteStream.toByteArray(); 264f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin //Log.i(TAG, "getModel: " + bytes); 265f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return bytes; 266f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (IOException e) { 267f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Can't get model"); 268f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 269f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 270f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 271f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 272f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * set the prediction model from a model data in the format of byte array 273f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 274f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public boolean setModel(final byte[] modelData) { 275f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, HashMap<String, HashMap<String, Integer > > > model; 276f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 277f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin try { 278f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ByteArrayInputStream input = new ByteArrayInputStream(modelData); 279f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ObjectInputStream objStream = new ObjectInputStream(input); 280f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model = (HashMap<String, HashMap<String, HashMap<String, Integer > > >) 281f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin objStream.readObject(); 282f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (IOException e) { 283f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Can't load model"); 284f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (ClassNotFoundException e) { 285f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Learning class not found"); 286f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 287f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 288f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin resetPredictor(); 289f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HashMap<String, HashMap<String, Integer> > > entry : 290f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model.entrySet()) { 291f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin useFeature(entry.getKey()); 292f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mPredictor.get(entry.getKey()).setCounter(entry.getValue()); 293f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 2941253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 2951253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin // TODO: this is a temporary fix for now 2961253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin loadClassCounter(); 2971253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 298f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return true; 299f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 3001253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3011253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin private void loadClassCounter() { 3021253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin String TIME_OF_WEEK = "Time of Week"; 3031253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3041253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (!mPredictor.containsKey(TIME_OF_WEEK)) { 3051253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin throw new RuntimeException("Precition model error: missing Time of Week!"); 3061253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3071253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3081253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin HashMap<String, HashMap<String, Integer> > counter = 3091253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mPredictor.get(TIME_OF_WEEK).getCounter(); 3101253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3111253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mTotalClassCount = 0; 3121253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.clear(); 3131253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin for (HashMap<String, Integer> map : counter.values()) { 3141253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin for (Map.Entry<String, Integer> entry : map.entrySet()) { 3151253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int classCount = entry.getValue(); 3161253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin String className = entry.getKey(); 3171253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mTotalClassCount += classCount; 3181253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3191253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (mClassCounts.containsKey(className)) { 3201253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin classCount += mClassCounts.get(className); 3211253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3221253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.put(className, classCount); 3231253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3241253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3251253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3261253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin Log.e(TAG, "class counts: " + mClassCounts + ", total count: " + 3271253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mTotalClassCount); 3281253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 329f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin} 330