stochastic_linear_ranker.h revision a08525ea290ff4edc766eda1ec80388be866a79e
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Stochastic Linear Ranking algorithms.
18// This class will implement a set of incremental algorithms for ranking tasks
19// They support both L1 and L2 regularizations.
20
21
22#ifndef LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
23#define LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
24
25#include <sys/types.h>
26
27#include <cmath>
28#include <string>
29#include <unordered_map>
30
31#include "cutils/log.h"
32#include "common_defs.h"
33#include "learning_rate_controller-inl.h"
34#include "sparse_weight_vector.h"
35
36namespace learning_stochastic_linear {
37
38// NOTE: This Stochastic Linear Ranker supports only the following update types:
39// SL: Stochastic Linear
40// CS: Constraint Satisfaction
41template<class Key = std::string, class Hash = std::unordered_map<std::string, double> >
42class StochasticLinearRanker {
43 public:
44  // initialize lambda_ and constraint to a meaningful default. Will give
45  // equal weight to the error and regularizer.
46  StochasticLinearRanker() {
47    iteration_num_ = 0;
48    lambda_ = 1.0;
49    learning_rate_controller_.SetLambda(lambda_);
50    mini_batch_size_ = 1;
51    learning_rate_controller_.SetMiniBatchSize(mini_batch_size_);
52    adaptation_mode_ = INV_LINEAR;
53    learning_rate_controller_.SetAdaptationMode(adaptation_mode_);
54    update_type_ = SL;
55    regularization_type_ = L2;
56    kernel_type_ = LINEAR;
57    kernel_param_ = 1.0;
58    kernel_gain_ = 1.0;
59    kernel_bias_ = 0.0;
60    rank_loss_type_ = PAIRWISE;
61    acceptence_probability_ = 0.1;
62    mini_batch_counter_ = 0;
63    gradient_l0_norm_ = -1;
64    norm_constraint_ = 1.0;
65  }
66
67  ~StochasticLinearRanker() {}
68  // Getters and setters
69  double GetIterationNumber() const {
70    return iteration_num_;
71  }
72  double GetNormContraint() const {
73    return norm_constraint_;
74  }
75  RegularizationType GetRegularizationType() const {
76    return regularization_type_;
77  }
78  double GetLambda() const {
79    return lambda_;
80  }
81  uint64 GetMiniBatchSize() const {
82    return mini_batch_size_;
83  }
84  int32 GetGradientL0Norm() const {
85    return gradient_l0_norm_;
86  }
87  UpdateType GetUpdateType() const {
88    return update_type_;
89  }
90  AdaptationMode GetAdaptationMode() const {
91    return adaptation_mode_;
92  }
93  KernelType GetKernelType() const {
94    return kernel_type_;
95  }
96  // This function returns the basic kernel parameter. In case of
97  // polynomial kernel, it implies the degree of the polynomial.  In case of
98  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
99  // it is not used.
100  double GetKernelParam() const {
101    return kernel_param_;
102  }
103  double GetKernelGain() const {
104    return kernel_gain_;;
105  }
106  double GetKernelBias() const {
107    return kernel_bias_;
108  }
109  RankLossType GetRankLossType() const {
110    return rank_loss_type_;
111  }
112  double GetAcceptanceProbability() const {
113    return acceptence_probability_;
114  }
115  void SetIterationNumber(uint64 num) {
116    iteration_num_=num;
117  }
118  void SetNormConstraint(const double norm) {
119    norm_constraint_ = norm;
120  }
121  void SetRegularizationType(const RegularizationType r) {
122    regularization_type_ = r;
123  }
124  void SetLambda(double l) {
125    lambda_ = l;
126    learning_rate_controller_.SetLambda(l);
127  }
128  void SetMiniBatchSize(const uint64 msize) {
129    mini_batch_size_ = msize;
130    learning_rate_controller_.SetMiniBatchSize(msize);
131  }
132  void SetAdaptationMode(AdaptationMode m) {
133    adaptation_mode_ = m;
134    learning_rate_controller_.SetAdaptationMode(m);
135  }
136  void SetKernelType(KernelType k ) {
137    kernel_type_ = k;
138  }
139  // This function sets the basic kernel parameter. In case of
140  // polynomial kernel, it implies the degree of the polynomial. In case of
141  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
142  // it is not used.
143  void SetKernelParam(double param) {
144    kernel_param_ = param;
145  }
146  // This function sets the kernel gain. NOTE: in most use cases, gain should
147  // be set to 1.0.
148  void SetKernelGain(double gain) {
149    kernel_gain_ = gain;
150  }
151  // This function sets the kernel bias. NOTE: in most use cases, bias should
152  // be set to 0.0.
153  void SetKernelBias(double bias) {
154    kernel_bias_ = bias;
155  }
156  void SetUpdateType(UpdateType u) {
157    update_type_ = u;
158  }
159  void SetRankLossType(RankLossType r) {
160    rank_loss_type_ = r;
161  }
162  void SetAcceptanceProbability(double p) {
163    acceptence_probability_ = p;
164  }
165  void SetGradientL0Norm(const int32 gradient_l0_norm) {
166    gradient_l0_norm_ = gradient_l0_norm;
167  }
168  // Load an existing model
169  void LoadWeights(const SparseWeightVector<Key, Hash> &model) {
170    weight_.LoadWeightVector(model);
171  }
172  // Save current model
173  void SaveWeights(SparseWeightVector<Key, Hash> *model) {
174    model->LoadWeightVector(weight_);
175  }
176  // Scoring
177  double ScoreSample(const SparseWeightVector<Key, Hash> &sample) {
178    const double dot = weight_.DotProduct(sample);
179    double w_square;
180    double s_square;
181    switch (kernel_type_) {
182      case LINEAR:
183        return dot;
184      case POLY:
185        return pow(kernel_gain_ * dot + kernel_bias_, kernel_param_);
186      case RBF:
187        w_square = weight_.L2Norm();
188        s_square = sample.L2Norm();
189        return exp(-1 * kernel_param_ * (w_square + s_square - 2 * dot));
190      default:
191      ALOGE("unsupported kernel: %d", kernel_type_);
192    }
193    return -1;
194  }
195  // Learning Functions
196  // Return values:
197  // 1 :full update went through
198  // 2 :partial update went through (for SL only)
199  // 0 :no update necessary.
200  // -1:error.
201  int UpdateClassifier(const SparseWeightVector<Key, Hash> &positive,
202                       const SparseWeightVector<Key, Hash> &negative);
203
204 private:
205  SparseWeightVector<Key, Hash> weight_;
206  double norm_constraint_;
207  double lambda_;
208  RegularizationType regularization_type_;
209  AdaptationMode adaptation_mode_;
210  UpdateType update_type_;
211  RankLossType rank_loss_type_;
212  KernelType kernel_type_;
213  // Kernel gain and bias are typically multiplicative and additive factors to
214  // the dot product while calculating the kernel function. Kernel param is
215  // kernel-specific. In case of polynomial kernel, it is the degree of the
216  // polynomial.
217  double kernel_param_;
218  double kernel_gain_;
219  double kernel_bias_;
220  double acceptence_probability_;
221  SparseWeightVector<Key, Hash> current_negative_;
222  LearningRateController learning_rate_controller_;
223  uint64 iteration_num_;
224  // We average out gradient updates for mini_batch_size_ samples
225  // before performing an iteration of the algorithm.
226  uint64 mini_batch_counter_;
227  uint64 mini_batch_size_;
228  // Specifies the number of non-zero entries allowed in a gradient.
229  // Default is -1 which means we take the gradient as given by data without
230  // adding any new constraints. positive number is treated as an L0 constraint
231  int32 gradient_l0_norm_;
232  // Sub-Gradient Updates
233  // Pure Sub-Gradient update without any reprojection
234  // Note that a form of L2 regularization is built into this
235  void UpdateSubGradient(const SparseWeightVector<Key, Hash> &positive,
236                         const SparseWeightVector<Key, Hash> &negative,
237                         const double learning_rate,
238                         const double positive_score,
239                         const double negative_score,
240                         const int32 gradient_l0_norm);
241
242};
243}  // namespace learning_stochastic_linear
244#endif  // LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
245