1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Stochastic Linear Ranking algorithms.
18// This class will implement a set of incremental algorithms for ranking tasks
19// They support both L1 and L2 regularizations.
20
21
22#ifndef LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
23#define LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
24
25#include <cmath>
26#include <hash_map>
27#include <string>
28
29#include <sys/types.h>
30#include "cutils/log.h"
31#include "common_defs.h"
32#include "learning_rate_controller-inl.h"
33#include "sparse_weight_vector.h"
34
35namespace learning_stochastic_linear {
36
37// NOTE: This Stochastic Linear Ranker supports only the following update types:
38// SL: Stochastic Linear
39// CS: Constraint Satisfaction
40template<class Key = std::string, class Hash = std::hash_map<std::string, double> >
41class StochasticLinearRanker {
42 public:
43  // initialize lambda_ and constraint to a meaningful default. Will give
44  // equal weight to the error and regularizer.
45  StochasticLinearRanker() {
46    iteration_num_ = 0;
47    lambda_ = 1.0;
48    learning_rate_controller_.SetLambda(lambda_);
49    mini_batch_size_ = 1;
50    learning_rate_controller_.SetMiniBatchSize(mini_batch_size_);
51    adaptation_mode_ = INV_LINEAR;
52    learning_rate_controller_.SetAdaptationMode(adaptation_mode_);
53    update_type_ = SL;
54    regularization_type_ = L2;
55    kernel_type_ = LINEAR;
56    kernel_param_ = 1.0;
57    kernel_gain_ = 1.0;
58    kernel_bias_ = 0.0;
59    rank_loss_type_ = PAIRWISE;
60    acceptence_probability_ = 0.1;
61    mini_batch_counter_ = 0;
62    gradient_l0_norm_ = -1;
63    norm_constraint_ = 1.0;
64  }
65
66  ~StochasticLinearRanker() {}
67  // Getters and setters
68  double GetIterationNumber() const {
69    return iteration_num_;
70  }
71  double GetNormContraint() const {
72    return norm_constraint_;
73  }
74  RegularizationType GetRegularizationType() const {
75    return regularization_type_;
76  }
77  double GetLambda() const {
78    return lambda_;
79  }
80  uint64 GetMiniBatchSize() const {
81    return mini_batch_size_;
82  }
83  int32 GetGradientL0Norm() const {
84    return gradient_l0_norm_;
85  }
86  UpdateType GetUpdateType() const {
87    return update_type_;
88  }
89  AdaptationMode GetAdaptationMode() const {
90    return adaptation_mode_;
91  }
92  KernelType GetKernelType() const {
93    return kernel_type_;
94  }
95  // This function returns the basic kernel parameter. In case of
96  // polynomial kernel, it implies the degree of the polynomial.  In case of
97  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
98  // it is not used.
99  double GetKernelParam() const {
100    return kernel_param_;
101  }
102  double GetKernelGain() const {
103    return kernel_gain_;;
104  }
105  double GetKernelBias() const {
106    return kernel_bias_;
107  }
108  RankLossType GetRankLossType() const {
109    return rank_loss_type_;
110  }
111  double GetAcceptanceProbability() const {
112    return acceptence_probability_;
113  }
114  void SetIterationNumber(uint64 num) {
115    iteration_num_=num;
116  }
117  void SetNormConstraint(const double norm) {
118    norm_constraint_ = norm;
119  }
120  void SetRegularizationType(const RegularizationType r) {
121    regularization_type_ = r;
122  }
123  void SetLambda(double l) {
124    lambda_ = l;
125    learning_rate_controller_.SetLambda(l);
126  }
127  void SetMiniBatchSize(const uint64 msize) {
128    mini_batch_size_ = msize;
129    learning_rate_controller_.SetMiniBatchSize(msize);
130  }
131  void SetAdaptationMode(AdaptationMode m) {
132    adaptation_mode_ = m;
133    learning_rate_controller_.SetAdaptationMode(m);
134  }
135  void SetKernelType(KernelType k ) {
136    kernel_type_ = k;
137  }
138  // This function sets the basic kernel parameter. In case of
139  // polynomial kernel, it implies the degree of the polynomial. In case of
140  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
141  // it is not used.
142  void SetKernelParam(double param) {
143    kernel_param_ = param;
144  }
145  // This function sets the kernel gain. NOTE: in most use cases, gain should
146  // be set to 1.0.
147  void SetKernelGain(double gain) {
148    kernel_gain_ = gain;
149  }
150  // This function sets the kernel bias. NOTE: in most use cases, bias should
151  // be set to 0.0.
152  void SetKernelBias(double bias) {
153    kernel_bias_ = bias;
154  }
155  void SetUpdateType(UpdateType u) {
156    update_type_ = u;
157  }
158  void SetRankLossType(RankLossType r) {
159    rank_loss_type_ = r;
160  }
161  void SetAcceptanceProbability(double p) {
162    acceptence_probability_ = p;
163  }
164  void SetGradientL0Norm(const int32 gradient_l0_norm) {
165    gradient_l0_norm_ = gradient_l0_norm;
166  }
167  // Load an existing model
168  void LoadWeights(const SparseWeightVector<Key, Hash> &model) {
169    weight_.LoadWeightVector(model);
170  }
171  // Save current model
172  void SaveWeights(SparseWeightVector<Key, Hash> *model) {
173    model->LoadWeightVector(weight_);
174  }
175  // Scoring
176  double ScoreSample(const SparseWeightVector<Key, Hash> &sample) {
177    const double dot = weight_.DotProduct(sample);
178    double w_square;
179    double s_square;
180    switch (kernel_type_) {
181      case LINEAR:
182        return dot;
183      case POLY:
184        return pow(kernel_gain_ * dot + kernel_bias_, kernel_param_);
185      case RBF:
186        w_square = weight_.L2Norm();
187        s_square = sample.L2Norm();
188        return exp(-1 * kernel_param_ * (w_square + s_square - 2 * dot));
189      default:
190      ALOGE("unsupported kernel: %d", kernel_type_);
191    }
192    return -1;
193  }
194  // Learning Functions
195  // Return values:
196  // 1 :full update went through
197  // 2 :partial update went through (for SL only)
198  // 0 :no update necessary.
199  // -1:error.
200  int UpdateClassifier(const SparseWeightVector<Key, Hash> &positive,
201                       const SparseWeightVector<Key, Hash> &negative);
202
203 private:
204  SparseWeightVector<Key, Hash> weight_;
205  double norm_constraint_;
206  double lambda_;
207  RegularizationType regularization_type_;
208  AdaptationMode adaptation_mode_;
209  UpdateType update_type_;
210  RankLossType rank_loss_type_;
211  KernelType kernel_type_;
212  // Kernel gain and bias are typically multiplicative and additive factors to
213  // the dot product while calculating the kernel function. Kernel param is
214  // kernel-specific. In case of polynomial kernel, it is the degree of the
215  // polynomial.
216  double kernel_param_;
217  double kernel_gain_;
218  double kernel_bias_;
219  double acceptence_probability_;
220  SparseWeightVector<Key, Hash> current_negative_;
221  LearningRateController learning_rate_controller_;
222  uint64 iteration_num_;
223  // We average out gradient updates for mini_batch_size_ samples
224  // before performing an iteration of the algorithm.
225  uint64 mini_batch_counter_;
226  uint64 mini_batch_size_;
227  // Specifies the number of non-zero entries allowed in a gradient.
228  // Default is -1 which means we take the gradient as given by data without
229  // adding any new constraints. positive number is treated as an L0 constraint
230  int32 gradient_l0_norm_;
231  // Sub-Gradient Updates
232  // Pure Sub-Gradient update without any reprojection
233  // Note that a form of L2 regularization is built into this
234  void UpdateSubGradient(const SparseWeightVector<Key, Hash> &positive,
235                         const SparseWeightVector<Key, Hash> &negative,
236                         const double learning_rate,
237                         const double positive_score,
238                         const double negative_score,
239                         const int32 gradient_l0_norm);
240
241};
242}  // namespace learning_stochastic_linear
243#endif  // LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
244