jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
linear_classifier.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "linear_classifier.hpp"
18 
19 #include <assert.h>
20 #include <float.h>
21 
22 #include <algorithm>
23 #include <map>
24 #include <queue>
25 #include <string>
26 #include <vector>
27 #include "jubatus/util/concurrent/lock.h"
28 #include "jubatus/util/lang/bind.h"
29 
30 #include "../common/exception.hpp"
31 #include "classifier_util.hpp"
32 
33 using std::string;
34 using std::vector;
37 
38 namespace jubatus {
39 namespace core {
40 namespace classifier {
41 
43  : storage_(storage), mixable_storage_(storage_) {
44 }
45 
47 }
48 
49 namespace {
50 // This function is a workaround for libc++.
51 // libc++'s std::function<void (<any types>)> does not accept
52 // functions which returns other than void.
53 void delete_label_wrapper(linear_classifier* cb, const std::string& label) {
54  cb->unlearn_label(label);
55 }
56 }
57 
59  jubatus::util::lang::shared_ptr<unlearner::unlearner_base>
60  label_unlearner) {
61  label_unlearner->set_callback(
62  jubatus::util::lang::bind(
63  delete_label_wrapper, this, jubatus::util::lang::_1));
64  mixable_storage_.set_label_unlearner(label_unlearner);
66 }
67 
69  const common::sfv_t& sfv,
70  classify_result& scores) const {
71  scores.clear();
72 
74  storage_->inp(sfv, ret);
75  for (map_feature_val1_t::const_iterator it = ret.begin(); it != ret.end();
76  ++it) {
77  scores.push_back(classify_result_elem(it->first, it->second));
78  }
79 }
80 
81 string linear_classifier::classify(const common::sfv_t& fv) const {
82  classify_result result;
83  classify_with_scores(fv, result);
84  float max_score = -FLT_MAX;
85  string max_class;
86  for (vector<classify_result_elem>::const_iterator it = result.begin();
87  it != result.end(); ++it) {
88  if (it == result.begin() || it->score > max_score) {
89  max_score = it->score;
90  max_class = it->label;
91  }
92  }
93  return max_class;
94 }
95 
97  storage_->clear();
98  if (unlearner_) {
99  unlearner_->clear();
100  }
101 }
102 
103 vector<string> linear_classifier::get_labels() const {
104  return storage_->get_labels();
105 }
106 
107 bool linear_classifier::set_label(const string& label) {
108  check_touchable(label);
109 
110  bool result = storage_->set_label(label);
111  if (unlearner_ && result) {
112  result = unlearner_->touch(label);
113  }
114 
115  return result;
116 }
117 
118 void linear_classifier::get_status(std::map<string, string>& status) const {
119  storage_->get_status(status);
120  status["storage"] = storage_->type();
121 }
122 
124  const common::sfv_t& sfv,
125  float step_width,
126  const string& pos_label,
127  const string& neg_label) {
128  storage_->bulk_update(sfv, step_width, pos_label, neg_label);
129 }
130 
132  const common::sfv_t& fv,
133  const string& label,
134  classify_result& scores) const {
135  classify_with_scores(fv, scores);
136  float max_score = -FLT_MAX;
137  string max_class;
138  for (vector<classify_result_elem>::const_iterator it = scores.begin();
139  it != scores.end(); ++it) {
140  if (it->label == label) {
141  continue;
142  }
143  if (it->score > max_score || it == scores.begin()) {
144  max_score = it->score;
145  max_class = it->label;
146  }
147  }
148  return max_class;
149 }
150 
152  const common::sfv_t& fv,
153  const string& label,
154  string& incorrect_label) const {
155  classify_result scores;
156  incorrect_label = get_largest_incorrect_label(fv, label, scores);
157  float correct_score = 0.f;
158  float incorrect_score = 0.f;
159  for (vector<classify_result_elem>::const_iterator it = scores.begin();
160  it != scores.end(); ++it) {
161  if (it->label == label) {
162  correct_score = it->score;
163  } else if (it->label == incorrect_label) {
164  incorrect_score = it->score;
165  }
166  }
167  return incorrect_score - correct_score;
168 }
169 
171  const common::sfv_t& sfv,
172  const string& label,
173  string& incorrect_label,
174  float& var) const {
175  float margin = calc_margin(sfv, label, incorrect_label);
176  var = 0.f;
177 
178  util::concurrent::scoped_lock lk(storage_->get_lock());
179  for (size_t i = 0; i < sfv.size(); ++i) {
180  const string& feature = sfv[i].first;
181  const float val = sfv[i].second;
182  feature_val2_t weight_covars;
183  storage_->get2_nolock(feature, weight_covars);
184  float label_covar = 1.f;
185  float incorrect_label_covar = 1.f;
186  for (size_t j = 0; j < weight_covars.size(); ++j) {
187  if (weight_covars[j].first == label) {
188  label_covar = weight_covars[j].second.v2;
189  } else if (weight_covars[j].first == incorrect_label) {
190  incorrect_label_covar = weight_covars[j].second.v2;
191  }
192  }
193  var += (label_covar + incorrect_label_covar) * val * val;
194  }
195  return margin;
196 }
197 
199  float ret = 0.f;
200  for (size_t i = 0; i < fv.size(); ++i) {
201  ret += fv[i].second * fv[i].second;
202  }
203  return ret;
204 }
205 
207  storage_->pack(pk);
208 }
209 void linear_classifier::unpack(msgpack::object o) {
210  storage_->unpack(o);
211 }
212 
214  return &mixable_storage_;
215 }
216 
217 void linear_classifier::touch(const std::string& label) {
218  check_touchable(label);
219 
220  if (unlearner_) {
221  unlearner_->touch(label);
222  }
223 }
224 
225 void linear_classifier::check_touchable(const std::string& label) {
226  if (unlearner_ && !unlearner_->can_touch(label)) {
228  "no more space available to add new label: " + label));
229  }
230 }
231 
232 bool linear_classifier::delete_label(const std::string& label) {
233  // Remove the label from the model.
234  bool result = storage_->delete_label(label);
235 
236  if (unlearner_ && result) {
237  // Notify unlearner that the label was removed.
238  result = unlearner_->remove(label);
239  }
240 
241  return result;
242 }
243 
247 bool linear_classifier::unlearn_label(const std::string& label) {
248  // this method must be called via touch() function.
249  // touch() must be done with holding lock
250  // so this function must not get lock
251  return storage_->delete_label_nolock(label);
252 }
253 
254 } // namespace classifier
255 } // namespace core
256 } // namespace jubatus
jubatus::util::lang::shared_ptr< jubatus::core::storage::storage_base > storage_ptr
std::vector< classify_result_elem > classify_result
static float squared_norm(const common::sfv_t &sfv)
bool unlearn_label(const std::string &label)
jubatus::util::data::unordered_map< std::string, val1_t > map_feature_val1_t
void set_label_unlearner(jubatus::util::lang::shared_ptr< unlearner::unlearner_base > label_unlearner)
void classify_with_scores(const common::sfv_t &fv, classify_result &scores) const
void set_label_unlearner(jubatus::util::lang::shared_ptr< unlearner::unlearner_base > label_unlearner)
jubatus::util::lang::shared_ptr< unlearner::unlearner_base > label_unlearner() const
float calc_margin_and_variance(const common::sfv_t &sfv, const std::string &label, std::string &incorrect_label, float &variance) const
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
void check_touchable(const std::string &label)
jubatus::util::lang::shared_ptr< unlearner::unlearner_base > unlearner_
void pack(framework::packer &pk) const
std::string classify(const common::sfv_t &fv) const
std::vector< std::string > get_labels() const
framework::linear_function_mixer mixable_storage_
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
virtual void get_status(std::map< std::string, std::string > &status) const
std::string get_largest_incorrect_label(const common::sfv_t &sfv, const std::string &label, classify_result &scores) const
std::vector< std::pair< std::string, val2_t > > feature_val2_t
float calc_margin(const common::sfv_t &sfv, const std::string &label, std::string &incorrect_label) const
void update_weight(const common::sfv_t &sfv, float step_weigth, const std::string &pos_label, const std::string &neg_class)