1f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/*
2f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Copyright (C) 2011 The Android Open Source Project
3f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
4f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Licensed under the Apache License, Version 2.0 (the "License");
5f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * you may not use this file except in compliance with the License.
6f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * You may obtain a copy of the License at
7f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
8f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *      http://www.apache.org/licenses/LICENSE-2.0
9f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin *
10f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * Unless required by applicable law or agreed to in writing, software
11f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * distributed under the License is distributed on an "AS IS" BASIS,
12f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * See the License for the specific language governing permissions and
14f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin * limitations under the License.
15f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */
16f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
17f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpackage android.bordeaux.learning;
18f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
19f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport android.util.Log;
20f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
21f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayInputStream;
22f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ByteArrayOutputStream;
23f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.IOException;
24f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectInputStream;
25f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.ObjectOutputStream;
26f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.io.Serializable;
27f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.ArrayList;
28f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Collections;
29f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Comparator;
30f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashMap;
31f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.HashSet;
32f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Iterator;
33f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.List;
34f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map;
35f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linimport java.util.Map.Entry;
3647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Linimport java.util.concurrent.ConcurrentHashMap;
37f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin/**
3847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * A histogram based predictor which records co-occurrences of applations with a speficic
3947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * feature, for example, location, * time of day, etc. The histogram is kept in a two level
4047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * hash table. The first level key is the feature value and the second level key is the app
4147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin * id.
42f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin */
43c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// TODOS:
44c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 1. Use forgetting factor to downweight istances propotional to the time
45c7c9cf164cc58d03156a53be35e58c3b75871a23Ruei-sung Lin// 2. Different features could have different weights on prediction scores.
4647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin// 3. Add function to remove sampleid (i.e. remove apps that are uninstalled).
4747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
4847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
49f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Linpublic class HistogramPredictor {
50f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    final static String TAG = "HistogramPredictor";
51f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
52f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    private HashMap<String, HistogramCounter> mPredictor =
53f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            new HashMap<String, HistogramCounter>();
54f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    private HashMap<String, Integer> mClassCounts = new HashMap<String, Integer>();
5647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private HashSet<String> mBlacklist = new HashSet<String>();
5747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
5847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private static final int MINIMAL_FEATURE_VALUE_COUNTS = 5;
5947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private static final int MINIMAL_APP_APPEARANCE_COUNTS = 5;
6047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
6147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    // This parameter ranges from 0 to 1 which determines the effect of app prior.
6247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    // When it is set to 0, app prior means completely neglected. When it is set to 1
6347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    // the predictor is a standard naive bayes model.
6447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private static final int PRIOR_K_VALUE = 1;
6547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
6647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private static final String[] APP_BLACKLIST = {
6747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.contacts",
6847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.chrome",
6947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.providers.downloads.ui",
7047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.settings",
7147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.vending",
7247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.android.mms",
7347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.google.android.gm",
7447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.google.android.gallery3d",
7547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        "com.google.android.apps.googlevoice",
7647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    };
7747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
7847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    public HistogramPredictor(String[] blackList) {
7947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        for (String appName : blackList) {
8047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            mBlacklist.add(appName);
8147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        }
8247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    }
83f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
84f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
85f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * This class keeps the histogram counts for each feature and provide the
86f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * joint probabilities of <feature, class>.
87f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
88f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    private class HistogramCounter {
89f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        private HashMap<String, HashMap<String, Integer> > mCounter =
90f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                new HashMap<String, HashMap<String, Integer> >();
91f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
92f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HistogramCounter() {
9347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            mCounter.clear();
94f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
95f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
96f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void setCounter(HashMap<String, HashMap<String, Integer> > counter) {
97f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            resetCounter();
98f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mCounter.putAll(counter);
99f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
100f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
101f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void resetCounter() {
102f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mCounter.clear();
103f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
104f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
105f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public void addSample(String className, String featureValue) {
106f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HashMap<String, Integer> classCounts;
107f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
108f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (!mCounter.containsKey(featureValue)) {
109f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                classCounts = new HashMap<String, Integer>();
110f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                mCounter.put(featureValue, classCounts);
1111253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            } else {
1121253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                classCounts = mCounter.get(featureValue);
113f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
114f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            int count = (classCounts.containsKey(className)) ?
115f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    classCounts.get(className) + 1 : 1;
116f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            classCounts.put(className, count);
117f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
118f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
119f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HashMap<String, Double> getClassScores(String featureValue) {
120f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HashMap<String, Double> classScores = new HashMap<String, Double>();
121f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
122f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (mCounter.containsKey(featureValue)) {
12347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                int totalCount = 0;
124f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                for(Map.Entry<String, Integer> entry :
125f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                        mCounter.get(featureValue).entrySet()) {
12647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    String app = entry.getKey();
12747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    int count = entry.getValue();
12847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
12947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    // For apps with counts less than or equal to one, we treated
13047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    // those as having count one. Hence their score, i.e. log(count)
13147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    // would be zero. classScroes stores only apps with non-zero scores.
13247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    // Note that totalCount also neglect app with single occurrence.
13347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    if (count > 1) {
13447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                        double score = Math.log((double) count);
13547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                        classScores.put(app, score);
13647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                        totalCount += count;
13747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    }
13847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                }
13947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                if (totalCount < MINIMAL_FEATURE_VALUE_COUNTS) {
14047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    classScores.clear();
141f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                }
142f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
143f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return classScores;
144f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
145f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
14647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        public byte[] getModel() {
14747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            try {
14847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
14947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                ObjectOutputStream objStream = new ObjectOutputStream(byteStream);
15047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                synchronized(mCounter) {
15147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    objStream.writeObject(mCounter);
15247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                }
15347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                byte[] bytes = byteStream.toByteArray();
15447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                return bytes;
15547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            } catch (IOException e) {
15647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                throw new RuntimeException("Can't get model");
15747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            }
15847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        }
15947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
16047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        public boolean setModel(final byte[] modelData) {
16147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            mCounter.clear();
16247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            HashMap<String, HashMap<String, Integer> > model;
16347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
16447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            try {
16547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                ByteArrayInputStream input = new ByteArrayInputStream(modelData);
16647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                ObjectInputStream objStream = new ObjectInputStream(input);
16747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                model = (HashMap<String, HashMap<String, Integer> >) objStream.readObject();
16847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            } catch (IOException e) {
16947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                throw new RuntimeException("Can't load model");
17047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            } catch (ClassNotFoundException e) {
17147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                throw new RuntimeException("Learning class not found");
17247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            }
17347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
17447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            synchronized(mCounter) {
17547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                mCounter.putAll(model);
17647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            }
17747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
17847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            return true;
17947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        }
18047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
18147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
182f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        public HashMap<String, HashMap<String, Integer> > getCounter() {
183f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return mCounter;
184f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
18578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin
18647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        public String toString() {
18747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            String result = "";
18847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            for (Map.Entry<String, HashMap<String, Integer> > entry :
18947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                     mCounter.entrySet()) {
19047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                result += "{ " + entry.getKey() + " : " +
19147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    entry.getValue().toString() + " }";
19278a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin            }
19347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            return result;
19478a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin        }
19578a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin    }
19678a66d98346a69f65e9d38bb0c96a5418c007197Ruei-sung Lin
197f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
19847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin     * Given a map of feature name -value pairs returns topK mostly likely apps to
19947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin     * be launched with corresponding likelihoods. If topK is set zero, it will return
20047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin     * the whole list.
201f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
202f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public List<Map.Entry<String, Double> > findTopClasses(Map<String, String> features, int topK) {
203f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // Most sophisticated function in this class
204f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, Double> appScores = new HashMap<String, Double>();
20547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        int validFeatureCount = 0;
2061253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
207f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // compute all app scores
208f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
209f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            String featureName = entry.getKey();
210f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            HistogramCounter counter = entry.getValue();
211f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
212f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            if (features.containsKey(featureName)) {
213f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                String featureValue = features.get(featureName);
214f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                HashMap<String, Double> scoreMap = counter.getClassScores(featureValue);
215f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
21647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                if (scoreMap.isEmpty()) {
21747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                  continue;
21847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                }
21947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                validFeatureCount++;
22047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
221f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                for (Map.Entry<String, Double> item : scoreMap.entrySet()) {
222f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    String appName = item.getKey();
223f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    double appScore = item.getValue();
22447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    if (appScores.containsKey(appName)) {
22547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                        appScore += appScores.get(appName);
22647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    }
22747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                    appScores.put(appName, appScore);
2281253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                }
2291253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            }
2301253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
2311253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
23247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        HashMap<String, Double> appCandidates = new HashMap<String, Double>();
23347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        for (Map.Entry<String, Double> entry : appScores.entrySet()) {
23447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            String appName = entry.getKey();
23547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            if (mBlacklist.contains(appName)) {
23647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                Log.i(TAG, appName + " is in blacklist");
23747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                continue;
23847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            }
23947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            if (!mClassCounts.containsKey(appName)) {
24047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                throw new RuntimeException("class count error!");
241f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
24247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            int appCount = mClassCounts.get(appName);
24347c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            if (appCount < MINIMAL_APP_APPEARANCE_COUNTS) {
24447c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                Log.i(TAG, appName + " doesn't have enough counts");
24547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                continue;
24647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            }
24747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin
24847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            double appScore = entry.getValue();
24947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            double appPrior = Math.log((double) appCount);
25047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            appCandidates.put(appName,
25147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                              appScore - appPrior * (validFeatureCount - PRIOR_K_VALUE));
252f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
253f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
254f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // sort app scores
255f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        List<Map.Entry<String, Double> > appList =
25647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin               new ArrayList<Map.Entry<String, Double> >(appCandidates.size());
25747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        appList.addAll(appCandidates.entrySet());
258f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        Collections.sort(appList, new  Comparator<Map.Entry<String, Double> >() {
259f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            public int compare(Map.Entry<String, Double> o1,
260f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                               Map.Entry<String, Double> o2) {
261f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                return o2.getValue().compareTo(o1.getValue());
262f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            }
263f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        });
264f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
26547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        if (topK == 0) {
26647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            topK = appList.size();
26747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        }
26847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        return appList.subList(0, Math.min(topK, appList.size()));
269f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
270f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
271f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
272f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * Add a new observation of given sample id and features to the histograms
273f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
274f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public void addSample(String sampleId, Map<String, String> features) {
27547c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        for (Map.Entry<String, String> entry : features.entrySet()) {
276f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            String featureName = entry.getKey();
27747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            String featureValue = entry.getValue();
278f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
27947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            useFeature(featureName);
28047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            HistogramCounter counter = mPredictor.get(featureName);
28147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            counter.addSample(sampleId, featureValue);
282f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
2831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
2841253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        int sampleCount = (mClassCounts.containsKey(sampleId)) ?
2851253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            mClassCounts.get(sampleId) + 1 : 1;
2861253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.put(sampleId, sampleCount);
287f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
288f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
289f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
290f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * reset predictor to a empty model
291f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
292f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public void resetPredictor() {
293f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // TODO: not sure this step would reduce memory waste
294f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (HistogramCounter counter : mPredictor.values()) {
295f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            counter.resetCounter();
296f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
297f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        mPredictor.clear();
2981253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.clear();
299f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
300f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
301f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
302f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * convert the prediction model into a byte array
303f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
304f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public byte[] getModel() {
305f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        // TODO: convert model to a more memory efficient data structure.
306f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, HashMap<String, HashMap<String, Integer > > > model =
307f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                new HashMap<String, HashMap<String, HashMap<String, Integer > > >();
308f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for(Map.Entry<String, HistogramCounter> entry : mPredictor.entrySet()) {
309f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            model.put(entry.getKey(), entry.getValue().getCounter());
310f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
311f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
312f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        try {
313f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
314f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ObjectOutputStream objStream = new ObjectOutputStream(byteStream);
315f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            objStream.writeObject(model);
316f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            byte[] bytes = byteStream.toByteArray();
317f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            return bytes;
318f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (IOException e) {
319f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Can't get model");
320f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
321f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
322f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
323f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    /*
324f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     * set the prediction model from a model data in the format of byte array
325f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin     */
326f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    public boolean setModel(final byte[] modelData) {
327f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        HashMap<String, HashMap<String, HashMap<String, Integer > > > model;
328f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
329f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        try {
330f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ByteArrayInputStream input = new ByteArrayInputStream(modelData);
331f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            ObjectInputStream objStream = new ObjectInputStream(input);
332f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            model = (HashMap<String, HashMap<String, HashMap<String, Integer > > >)
333f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                    objStream.readObject();
334f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (IOException e) {
335f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Can't load model");
336f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        } catch (ClassNotFoundException e) {
337f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            throw new RuntimeException("Learning class not found");
338f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
339f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin
340f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        resetPredictor();
341f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        for (Map.Entry<String, HashMap<String, HashMap<String, Integer> > > entry :
342f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin                model.entrySet()) {
343f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            useFeature(entry.getKey());
344f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin            mPredictor.get(entry.getKey()).setCounter(entry.getValue());
345f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        }
3461253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3471253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        // TODO: this is a temporary fix for now
3481253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        loadClassCounter();
3491253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
350f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin        return true;
351f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin    }
3521253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3531253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    private void loadClassCounter() {
3541253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        String TIME_OF_WEEK = "Time of Week";
3551253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3561253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        if (!mPredictor.containsKey(TIME_OF_WEEK)) {
3571253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            throw new RuntimeException("Precition model error: missing Time of Week!");
3581253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
3591253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3601253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        HashMap<String, HashMap<String, Integer> > counter =
3611253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            mPredictor.get(TIME_OF_WEEK).getCounter();
3621253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3631253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        mClassCounts.clear();
3641253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        for (HashMap<String, Integer> map : counter.values()) {
3651253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            for (Map.Entry<String, Integer> entry : map.entrySet()) {
3661253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                int classCount = entry.getValue();
3671253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                String className = entry.getKey();
36847c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin                // mTotalClassCount += classCount;
3691253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
3701253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                if (mClassCounts.containsKey(className)) {
3711253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                    classCount += mClassCounts.get(className);
3721253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                }
3731253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin                mClassCounts.put(className, classCount);
3741253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin            }
3751253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin        }
37647c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        Log.i(TAG, "class counts: " + mClassCounts);
37747c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    }
3781253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin
37947c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin    private void useFeature(String featureName) {
38047c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        if (!mPredictor.containsKey(featureName)) {
38147c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin            mPredictor.put(featureName, new HistogramCounter());
38247c0dc05cde9e9d9cc57e1393429006bf8b23b32Ruei-sung Lin        }
3831253e9fb0b5570ab8adaed222655a5b052aa072eRuei-sung Lin    }
384f0f78449e8ab7d63894964c54b6ef390ca9ce044Ruei-sung Lin}
385