stochastic_linear_ranker.h revision 6b4eebc73439cbc3ddfb547444a341d1f9be7996
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// Stochastic Linear Ranking algorithms. 18// This class will implement a set of incremental algorithms for ranking tasks 19// They support both L1 and L2 regularizations. 20 21 22#ifndef LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 23#define LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 24 25#include <cmath> 26#include <hash_map> 27#include <string> 28 29#include <sys/types.h> 30#include "cutils/log.h" 31#include "common_defs.h" 32#include "learning_rate_controller-inl.h" 33#include "sparse_weight_vector.h" 34 35namespace learning_stochastic_linear { 36 37// NOTE: This Stochastic Linear Ranker supports only the following update types: 38// SL: Stochastic Linear 39// CS: Constraint Satisfaction 40template<class Key = std::string, class Hash = std::hash_map<std::string, double> > 41class StochasticLinearRanker { 42 public: 43 // initialize lambda_ and constraint to a meaningful default. Will give 44 // equal weight to the error and regularizer. 45 StochasticLinearRanker() { 46 iteration_num_ = 0; 47 lambda_ = 1.0; 48 learning_rate_controller_.SetLambda(lambda_); 49 mini_batch_size_ = 1; 50 learning_rate_controller_.SetMiniBatchSize(mini_batch_size_); 51 learning_rate_controller_.SetAdaptationMode(INV_LINEAR); 52 update_type_ = SL; 53 kernel_type_ = LINEAR; 54 kernel_param_ = 1.0; 55 kernel_gain_ = 1.0; 56 kernel_bias_ = 0.0; 57 rank_loss_type_ = PAIRWISE; 58 acceptence_probability_ = 0.1; 59 mini_batch_counter_ = 0; 60 gradient_l0_norm_ = -1; 61 norm_constraint_ = 1.0; 62 } 63 64 ~StochasticLinearRanker() {} 65 // Getters and setters 66 double GetIterationNumber() const { 67 return iteration_num_; 68 } 69 double GetNormContraint() const { 70 return norm_constraint_; 71 } 72 RegularizationType GetRegularizationType() const { 73 return regularization_type_; 74 } 75 double GetLambda() const { 76 return lambda_; 77 } 78 uint64 GetMiniBatchSize() const { 79 return mini_batch_size_; 80 } 81 int32 GetGradientL0Norm() const { 82 return gradient_l0_norm_; 83 } 84 UpdateType GetUpdateType() const { 85 return update_type_; 86 } 87 AdaptationMode GetAdaptationMode() const { 88 return adaptation_mode_; 89 } 90 // This function additionally returns the basic kernel parameter. In case of 91 // polynomial kernel, it implies the degree of the polynomial. In case of 92 // RBF kernel, it implies the sigma parameter. In case of linear kernel, 93 // it is not used. 94 // It also returns the kernel gain and bias. 95 KernelType GetKernelType(double *param, double *gain, double *bias) const { 96 *param = kernel_param_; 97 *gain = kernel_gain_; 98 *bias = kernel_bias_; 99 return kernel_type_; 100 } 101 RankLossType GetRankLossType() const { 102 return rank_loss_type_; 103 } 104 double GetAcceptanceProbability() const { 105 return acceptence_probability_; 106 } 107 void SetIterationNumber(uint64 num) { 108 iteration_num_=num; 109 } 110 void SetNormConstraint(const double norm) { 111 norm_constraint_ = norm; 112 } 113 void SetRegularizationType(const RegularizationType r) { 114 regularization_type_ = r; 115 } 116 void SetLambda(double l) { 117 lambda_ = l; 118 learning_rate_controller_.SetLambda(l); 119 } 120 void SetMiniBatchSize(const uint64 msize) { 121 mini_batch_size_ = msize; 122 learning_rate_controller_.SetMiniBatchSize(msize); 123 } 124 void SetAdaptationMode(AdaptationMode m) { 125 adaptation_mode_ = m; 126 learning_rate_controller_.SetAdaptationMode(m); 127 } 128 // This function additionally sets the basic kernel parameter. In case of 129 // polynomial kernel, it implies the degree of the polynomial. In case of 130 // RBF kernel, it implies the sigma parameter. In case of linear kernel, 131 // it is not used. 132 // It also sets the kernel gain and bias. NOTE: in most use cases, gain should 133 // be set to 1.0 and bias to 0.0. 134 void SetKernelType(KernelType k, double param, double gain, double bias) { 135 kernel_type_ = k; 136 kernel_param_ = param; 137 kernel_gain_ = gain; 138 kernel_bias_ = bias; 139 } 140 void SetUpdateType(UpdateType u) { 141 update_type_ = u; 142 } 143 void SetRankLossType(RankLossType r) { 144 rank_loss_type_ = r; 145 } 146 void SetAcceptanceProbability(double p) { 147 acceptence_probability_ = p; 148 } 149 void SetGradientL0Norm(const int32 gradient_l0_norm) { 150 gradient_l0_norm_ = gradient_l0_norm; 151 } 152 // Load an existing model 153 void LoadWeights(const SparseWeightVector<Key, Hash> &model) { 154 weight_.LoadWeightVector(model); 155 } 156 // Save current model 157 void SaveWeights(SparseWeightVector<Key, Hash> *model) { 158 model->LoadWeightVector(weight_); 159 } 160 // Scoring 161 double ScoreSample(const SparseWeightVector<Key, Hash> &sample) { 162 const double dot = weight_.DotProduct(sample); 163 double w_square; 164 double s_square; 165 switch (kernel_type_) { 166 case LINEAR: 167 return dot; 168 case POLY: 169 return pow(kernel_gain_ * dot + kernel_bias_, kernel_param_); 170 case RBF: 171 w_square = weight_.L2Norm(); 172 s_square = sample.L2Norm(); 173 return exp(-1 * kernel_param_ * (w_square + s_square - 2 * dot)); 174 default: 175 ALOGE("unsupported kernel: %d", kernel_type_); 176 } 177 return -1; 178 } 179 // Learning Functions 180 // Return values: 181 // 1 :full update went through 182 // 2 :partial update went through (for SL only) 183 // 0 :no update necessary. 184 // -1:error. 185 int UpdateClassifier(const SparseWeightVector<Key, Hash> &positive, 186 const SparseWeightVector<Key, Hash> &negative); 187 188 private: 189 SparseWeightVector<Key, Hash> weight_; 190 double norm_constraint_; 191 double lambda_; 192 RegularizationType regularization_type_; 193 AdaptationMode adaptation_mode_; 194 UpdateType update_type_; 195 RankLossType rank_loss_type_; 196 KernelType kernel_type_; 197 // Kernel gain and bias are typically multiplicative and additive factors to 198 // the dot product while calculating the kernel function. Kernel param is 199 // kernel-specific. In case of polynomial kernel, it is the degree of the 200 // polynomial. 201 double kernel_param_; 202 double kernel_gain_; 203 double kernel_bias_; 204 double acceptence_probability_; 205 SparseWeightVector<Key, Hash> current_negative_; 206 LearningRateController learning_rate_controller_; 207 uint64 iteration_num_; 208 // We average out gradient updates for mini_batch_size_ samples 209 // before performing an iteration of the algorithm. 210 uint64 mini_batch_counter_; 211 uint64 mini_batch_size_; 212 // Specifies the number of non-zero entries allowed in a gradient. 213 // Default is -1 which means we take the gradient as given by data without 214 // adding any new constraints. positive number is treated as an L0 constraint 215 int32 gradient_l0_norm_; 216 // Sub-Gradient Updates 217 // Pure Sub-Gradient update without any reprojection 218 // Note that a form of L2 regularization is built into this 219 void UpdateSubGradient(const SparseWeightVector<Key, Hash> &positive, 220 const SparseWeightVector<Key, Hash> &negative, 221 const double learning_rate, 222 const double positive_score, 223 const double negative_score, 224 const int32 gradient_l0_norm); 225 226}; 227} // namespace learning_stochastic_linear 228#endif // LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 229