1f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/* 2f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Copyright (C) 2011 The Android Open Source Project 3f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 4f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Licensed under the Apache License, Version 2.0 (the "License"); 5f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * you may not use this file except in compliance with the License. 6f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * You may obtain a copy of the License at 7f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 8f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * http://www.apache.org/licenses/LICENSE-2.0 9f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * 10f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Unless required by applicable law or agreed to in writing, software 11f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * distributed under the License is distributed on an "AS IS" BASIS, 12f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * See the License for the specific language governing permissions and 14f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * limitations under the License. 15f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 16f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 17f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpackage android.bordeaux.learning; 18f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 19f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Log; 20f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 21f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayInputStream; 22f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayOutputStream; 23f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.IOException; 24f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectInputStream; 25f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectOutputStream; 26f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.Serializable; 27f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.ArrayList; 28f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Collections; 29f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Comparator; 30f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashMap; 31f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashSet; 32f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Iterator; 33f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.List; 34f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map; 35f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map.Entry; 3647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Linimport java.util.concurrent.ConcurrentHashMap; 37f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/** 3847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * A histogram based predictor which records co-occurrences of applations with a speficic 3947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * feature, for example, location, * time of day, etc. The histogram is kept in a two level 4047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * hash table. The first level key is the feature value and the second level key is the app 4147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * id. 42f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 43c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// TODOS: 44c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 1. Use forgetting factor to downweight istances propotional to the time 45c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 2. Different features could have different weights on prediction scores. 4647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin// 3. Add function to remove sampleid (i.e. remove apps that are uninstalled). 4747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 4847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 49f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpublic class HistogramPredictor { 50f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin final static String TAG = "HistogramPredictor"; 51f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 52f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private HashMap<String, HistogramCounter> mPredictor = 53f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HistogramCounter>(); 54f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin private HashMap<String, Integer> mClassCounts = new HashMap<String, Integer>(); 5647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private HashSet<String> mBlacklist = new HashSet<String>(); 5747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 5847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private static final int MINIMAL_FEATURE_VALUE_COUNTS = 5; 5947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private static final int MINIMAL_APP_APPEARANCE_COUNTS = 5; 6047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 6147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // This parameter ranges from 0 to 1 which determines the effect of app prior. 6247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // When it is set to 0, app prior means completely neglected. When it is set to 1 6347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // the predictor is a standard naive bayes model. 6447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private static final int PRIOR_K_VALUE = 1; 6547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 6647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private static final String[] APP_BLACKLIST = { 6747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.contacts", 6847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.chrome", 6947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.providers.downloads.ui", 7047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.settings", 7147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.vending", 7247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.android.mms", 7347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.google.android.gm", 7447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.google.android.gallery3d", 7547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin "com.google.android.apps.googlevoice", 7647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin }; 7747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 7847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin public HistogramPredictor(String[] blackList) { 7947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin for (String appName : blackList) { 8047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mBlacklist.add(appName); 8147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 8247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 83f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 84f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 85f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * This class keeps the histogram counts for each feature and provide the 86f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * joint probabilities of <feature, class>. 87f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 88f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private class HistogramCounter { 89f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin private HashMap<String, HashMap<String, Integer> > mCounter = 90f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HashMap<String, Integer> >(); 91f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 92f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HistogramCounter() { 9347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mCounter.clear(); 94f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 95f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 96f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void setCounter(HashMap<String, HashMap<String, Integer> > counter) { 97f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin resetCounter(); 98f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.putAll(counter); 99f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 100f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 101f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void resetCounter() { 102f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.clear(); 103f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 104f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 105f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void addSample(String className, String featureValue) { 106f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Integer> classCounts; 107f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 108f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (!mCounter.containsKey(featureValue)) { 109f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts = new HashMap<String, Integer>(); 110f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.put(featureValue, classCounts); 1111253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } else { 1121253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin classCounts = mCounter.get(featureValue); 113f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 114f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin int count = (classCounts.containsKey(className)) ? 115f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts.get(className) + 1 : 1; 116f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin classCounts.put(className, count); 117f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 118f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 119f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HashMap<String, Double> getClassScores(String featureValue) { 120f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> classScores = new HashMap<String, Double>(); 121f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 122f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (mCounter.containsKey(featureValue)) { 12347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin int totalCount = 0; 124f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for(Map.Entry<String, Integer> entry : 125f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mCounter.get(featureValue).entrySet()) { 12647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin String app = entry.getKey(); 12747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin int count = entry.getValue(); 12847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 12947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // For apps with counts less than or equal to one, we treated 13047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // those as having count one. Hence their score, i.e. log(count) 13147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // would be zero. classScroes stores only apps with non-zero scores. 13247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // Note that totalCount also neglect app with single occurrence. 13347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (count > 1) { 13447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin double score = Math.log((double) count); 13547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin classScores.put(app, score); 13647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin totalCount += count; 13747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 13847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 13947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (totalCount < MINIMAL_FEATURE_VALUE_COUNTS) { 14047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin classScores.clear(); 141f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 142f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 143f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return classScores; 144f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 145f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 14647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin public byte[] getModel() { 14747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin try { 14847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); 14947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin ObjectOutputStream objStream = new ObjectOutputStream(byteStream); 15047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin synchronized(mCounter) { 15147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin objStream.writeObject(mCounter); 15247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 15347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin byte[] bytes = byteStream.toByteArray(); 15447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin return bytes; 15547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } catch (IOException e) { 15647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin throw new RuntimeException("Can't get model"); 15747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 15847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 15947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 16047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin public boolean setModel(final byte[] modelData) { 16147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mCounter.clear(); 16247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin HashMap<String, HashMap<String, Integer> > model; 16347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 16447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin try { 16547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin ByteArrayInputStream input = new ByteArrayInputStream(modelData); 16647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin ObjectInputStream objStream = new ObjectInputStream(input); 16747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin model = (HashMap<String, HashMap<String, Integer> >) objStream.readObject(); 16847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } catch (IOException e) { 16947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin throw new RuntimeException("Can't load model"); 17047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } catch (ClassNotFoundException e) { 17147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin throw new RuntimeException("Learning class not found"); 17247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 17347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 17447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin synchronized(mCounter) { 17547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mCounter.putAll(model); 17647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 17747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 17847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin return true; 17947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 18047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 18147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 182f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public HashMap<String, HashMap<String, Integer> > getCounter() { 183f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return mCounter; 184f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 18578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin 18647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin public String toString() { 18747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin String result = ""; 18847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin for (Map.Entry<String, HashMap<String, Integer> > entry : 18947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mCounter.entrySet()) { 19047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin result += "{ " + entry.getKey() + " : " + 19147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin entry.getValue().toString() + " }"; 19278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 19347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin return result; 19478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 19578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin } 19678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin 197f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 19847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * Given a map of feature name -value pairs returns topK mostly likely apps to 19947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * be launched with corresponding likelihoods. If topK is set zero, it will return 20047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * the whole list. 201f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 202f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public List<Map.Entry<String, Double> > findTopClasses(Map<String, String> features, int topK) { 203f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // Most sophisticated function in this class 204f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> appScores = new HashMap<String, Double>(); 20547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin int validFeatureCount = 0; 2061253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 207f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // compute all app scores 208f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) { 209f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureName = entry.getKey(); 210f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HistogramCounter counter = entry.getValue(); 211f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 212f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin if (features.containsKey(featureName)) { 213f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureValue = features.get(featureName); 214f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, Double> scoreMap = counter.getClassScores(featureValue); 215f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 21647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (scoreMap.isEmpty()) { 21747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin continue; 21847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 21947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin validFeatureCount++; 22047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 221f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, Double> item : scoreMap.entrySet()) { 222f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String appName = item.getKey(); 223f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin double appScore = item.getValue(); 22447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (appScores.containsKey(appName)) { 22547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin appScore += appScores.get(appName); 22647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 22747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin appScores.put(appName, appScore); 2281253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 2291253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 2301253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 2311253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 23247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin HashMap<String, Double> appCandidates = new HashMap<String, Double>(); 23347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin for (Map.Entry<String, Double> entry : appScores.entrySet()) { 23447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin String appName = entry.getKey(); 23547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (mBlacklist.contains(appName)) { 23647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin Log.i(TAG, appName + " is in blacklist"); 23747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin continue; 23847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 23947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (!mClassCounts.containsKey(appName)) { 24047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin throw new RuntimeException("class count error!"); 241f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 24247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin int appCount = mClassCounts.get(appName); 24347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (appCount < MINIMAL_APP_APPEARANCE_COUNTS) { 24447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin Log.i(TAG, appName + " doesn't have enough counts"); 24547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin continue; 24647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 24747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin 24847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin double appScore = entry.getValue(); 24947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin double appPrior = Math.log((double) appCount); 25047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin appCandidates.put(appName, 25147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin appScore - appPrior * (validFeatureCount - PRIOR_K_VALUE)); 252f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 253f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 254f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // sort app scores 255f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin List<Map.Entry<String, Double> > appList = 25647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin new ArrayList<Map.Entry<String, Double> >(appCandidates.size()); 25747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin appList.addAll(appCandidates.entrySet()); 258f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin Collections.sort(appList, new Comparator<Map.Entry<String, Double> >() { 259f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public int compare(Map.Entry<String, Double> o1, 260f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin Map.Entry<String, Double> o2) { 261f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return o2.getValue().compareTo(o1.getValue()); 262f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 263f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin }); 264f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 26547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (topK == 0) { 26647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin topK = appList.size(); 26747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 26847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin return appList.subList(0, Math.min(topK, appList.size())); 269f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 270f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 271f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 272f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Add a new observation of given sample id and features to the histograms 273f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 274f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void addSample(String sampleId, Map<String, String> features) { 27547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin for (Map.Entry<String, String> entry : features.entrySet()) { 276f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin String featureName = entry.getKey(); 27747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin String featureValue = entry.getValue(); 278f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 27947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin useFeature(featureName); 28047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin HistogramCounter counter = mPredictor.get(featureName); 28147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin counter.addSample(sampleId, featureValue); 282f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 2831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 2841253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int sampleCount = (mClassCounts.containsKey(sampleId)) ? 2851253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.get(sampleId) + 1 : 1; 2861253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.put(sampleId, sampleCount); 287f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 288f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 289f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 290f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * reset predictor to a empty model 291f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 292f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public void resetPredictor() { 293f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // TODO: not sure this step would reduce memory waste 294f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (HistogramCounter counter : mPredictor.values()) { 295f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin counter.resetCounter(); 296f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 297f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mPredictor.clear(); 2981253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.clear(); 299f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 300f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 301f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 302f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * convert the prediction model into a byte array 303f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 304f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public byte[] getModel() { 305f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin // TODO: convert model to a more memory efficient data structure. 306f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, HashMap<String, HashMap<String, Integer > > > model = 307f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin new HashMap<String, HashMap<String, HashMap<String, Integer > > >(); 308f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for(Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) { 309f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model.put(entry.getKey(), entry.getValue().getCounter()); 310f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 311f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 312f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin try { 313f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); 314f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ObjectOutputStream objStream = new ObjectOutputStream(byteStream); 315f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin objStream.writeObject(model); 316f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin byte[] bytes = byteStream.toByteArray(); 317f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return bytes; 318f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (IOException e) { 319f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Can't get model"); 320f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 321f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 322f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 323f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin /* 324f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * set the prediction model from a model data in the format of byte array 325f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */ 326f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin public boolean setModel(final byte[] modelData) { 327f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin HashMap<String, HashMap<String, HashMap<String, Integer > > > model; 328f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 329f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin try { 330f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ByteArrayInputStream input = new ByteArrayInputStream(modelData); 331f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin ObjectInputStream objStream = new ObjectInputStream(input); 332f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model = (HashMap<String, HashMap<String, HashMap<String, Integer > > >) 333f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin objStream.readObject(); 334f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (IOException e) { 335f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Can't load model"); 336f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } catch (ClassNotFoundException e) { 337f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin throw new RuntimeException("Learning class not found"); 338f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 339f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin 340f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin resetPredictor(); 341f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin for (Map.Entry<String, HashMap<String, HashMap<String, Integer> > > entry : 342f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin model.entrySet()) { 343f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin useFeature(entry.getKey()); 344f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin mPredictor.get(entry.getKey()).setCounter(entry.getValue()); 345f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 3461253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3471253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin // TODO: this is a temporary fix for now 3481253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin loadClassCounter(); 3491253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 350f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin return true; 351f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin } 3521253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3531253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin private void loadClassCounter() { 3541253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin String TIME_OF_WEEK = "Time of Week"; 3551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3561253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (!mPredictor.containsKey(TIME_OF_WEEK)) { 3571253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin throw new RuntimeException("Precition model error: missing Time of Week!"); 3581253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3591253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3601253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin HashMap<String, HashMap<String, Integer> > counter = 3611253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mPredictor.get(TIME_OF_WEEK).getCounter(); 3621253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3631253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.clear(); 3641253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin for (HashMap<String, Integer> map : counter.values()) { 3651253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin for (Map.Entry<String, Integer> entry : map.entrySet()) { 3661253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin int classCount = entry.getValue(); 3671253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin String className = entry.getKey(); 36847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin // mTotalClassCount += classCount; 3691253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 3701253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin if (mClassCounts.containsKey(className)) { 3711253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin classCount += mClassCounts.get(className); 3721253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3731253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin mClassCounts.put(className, classCount); 3741253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 3751253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 37647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin Log.i(TAG, "class counts: " + mClassCounts); 37747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 3781253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin 37947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin private void useFeature(String featureName) { 38047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin if (!mPredictor.containsKey(featureName)) { 38147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin mPredictor.put(featureName, new HistogramCounter()); 38247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin } 3831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin } 384f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin} 385