jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
local_storage_mixture.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011-2014 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
18 #include <cmath>
19 #include <map>
20 #include <string>
21 #include <vector>
22 #include "jubatus/util/data/intern.h"
23 
24 using std::string;
25 
26 namespace jubatus {
27 namespace core {
28 namespace storage {
29 
30 namespace {
31 
32 void increase(val3_t& a, const val3_t& b) {
33  a.v1 += b.v1;
34  a.v2 += b.v2;
35  a.v3 += b.v3;
36 }
37 
38 void delete_label_from_weight(uint64_t delete_id, id_features3_t& tbl) {
39  for (id_features3_t::iterator it = tbl.begin(); it != tbl.end(); ) {
40  it->second.erase(delete_id);
41  if (it->second.empty()) {
42  it = tbl.erase(it);
43  } else {
44  ++it;
45  }
46  }
47 }
48 
49 } // namespace
50 
52 }
53 
55 }
56 
58  const string& feature,
59  id_feature_val3_t& ret) const {
60  ret.clear();
61  id_features3_t::const_iterator it = tbl_.find(feature);
62 
63  bool found = false;
64  if (it != tbl_.end()) {
65  ret = it->second;
66  found = true;
67  }
68 
69  id_features3_t::const_iterator it_diff = tbl_diff_.find(feature);
70  if (it_diff != tbl_diff_.end()) {
71  found = true;
72  for (id_feature_val3_t::const_iterator it2 = it_diff->second.begin();
73  it2 != it_diff->second.end(); ++it2) {
74  val3_t& val3 = ret[it2->first]; // may create
75  increase(val3, it2->second);
76  }
77  }
78  return found;
79 }
80 
82  const std::string& feature,
83  feature_val1_t& ret) const {
84  util::concurrent::scoped_lock lk(mutex_);
85  get_nolock(feature, ret);
86 }
88  const std::string& feature,
89  feature_val1_t& ret) const {
90  ret.clear();
92  get_internal(feature, m3);
93  for (id_feature_val3_t::const_iterator it = m3.begin(); it != m3.end();
94  ++it) {
95  ret.push_back(make_pair(class2id_.get_key(it->first), it->second.v1));
96  }
97 }
98 
100  const std::string& feature,
101  feature_val2_t& ret) const {
102  util::concurrent::scoped_lock lk(mutex_);
103  get2_nolock(feature, ret);
104 }
106  const std::string& feature,
107  feature_val2_t& ret) const {
108  ret.clear();
110  get_internal(feature, m3);
111  for (id_feature_val3_t::const_iterator it = m3.begin(); it != m3.end();
112  ++it) {
113  ret.push_back(
114  make_pair(class2id_.get_key(it->first),
115  val2_t(it->second.v1, it->second.v2)));
116  }
117 }
118 
120  const std::string& feature,
121  feature_val3_t& ret) const {
122  util::concurrent::scoped_lock lk(mutex_);
123  get3_nolock(feature, ret);
124 }
126  const std::string& feature,
127  feature_val3_t& ret) const {
128  ret.clear();
130  get_internal(feature, m3);
131  for (id_feature_val3_t::const_iterator it = m3.begin(); it != m3.end();
132  ++it) {
133  ret.push_back(make_pair(class2id_.get_key(it->first), it->second));
134  }
135 }
136 
138  map_feature_val1_t& ret) const {
139  ret.clear();
140 
141  // Use uin64_t map instead of string map as hash function for string is slow
142  jubatus::util::data::unordered_map<uint64_t, float> ret_id;
143  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
144  const string& feature = it->first;
145  const float val = it->second;
147  get_internal(feature, m);
148  for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
149  ++it3) {
150  ret_id[it3->first] += it3->second.v1 * val;
151  }
152  }
153 
154  std::vector<std::string> labels = class2id_.get_all_id2key();
155  for (size_t i = 0; i < labels.size(); ++i) {
156  const std::string& label = labels[i];
157  uint64_t id = class2id_.get_id_const(label);
158  if (id == common::key_manager::NOTFOUND || ret_id.count(id) == 0) {
159  ret[label] = 0.0;
160  } else {
161  ret[label] = ret_id[id];
162  }
163  }
164 }
165 
167  const string& feature,
168  const string& klass,
169  const val1_t& w) {
170  util::concurrent::scoped_lock lk(mutex_);
171  set_nolock(feature, klass, w);
172 }
174  const string& feature,
175  const string& klass,
176  const val1_t& w) {
177  uint64_t class_id = class2id_.get_id(klass);
178  float w_in_table = tbl_[feature][class_id].v1;
179  tbl_diff_[feature][class_id].v1 = w - w_in_table;
180 }
181 
183  const string& feature,
184  const string& klass,
185  const val2_t& w) {
186  util::concurrent::scoped_lock lk(mutex_);
187  set2_nolock(feature, klass, w);
188 }
190  const string& feature,
191  const string& klass,
192  const val2_t& w) {
193  uint64_t class_id = class2id_.get_id(klass);
194  float w1_in_table = tbl_[feature][class_id].v1;
195  float w2_in_table = tbl_[feature][class_id].v2;
196 
197  val3_t& triple = tbl_diff_[feature][class_id];
198  triple.v1 = w.v1 - w1_in_table;
199  triple.v2 = w.v2 - w2_in_table;
200 }
201 
203  const string& feature,
204  const string& klass,
205  const val3_t& w) {
206  util::concurrent::scoped_lock lk(mutex_);
207  set3_nolock(feature, klass, w);
208 }
210  const string& feature,
211  const string& klass,
212  const val3_t& w) {
213  uint64_t class_id = class2id_.get_id(klass);
214  val3_t v = tbl_[feature][class_id];
215  tbl_diff_[feature][class_id] = w - v;
216 }
217 
219  std::map<std::string, std::string>& status) const {
220  status["num_features"] =
221  jubatus::util::lang::lexical_cast<std::string>(tbl_.size());
222  status["num_classes"] = jubatus::util::lang::lexical_cast<std::string>(
223  class2id_.size());
224  status["diff_size"] =
225  jubatus::util::lang::lexical_cast<std::string>(tbl_diff_.size());
226 }
227 
229  const string& feature,
230  const string& inc_class,
231  const string& dec_class,
232  const val1_t& v) {
233  id_feature_val3_t& feature_row = tbl_diff_[feature];
234  feature_row[class2id_.get_id(inc_class)].v1 += v;
235  feature_row[class2id_.get_id(dec_class)].v1 -= v;
236 }
237 
239  const common::sfv_t& sfv,
240  float step_width,
241  const string& inc_class,
242  const string& dec_class) {
243  uint64_t inc_id = class2id_.get_id(inc_class);
244  typedef common::sfv_t::const_iterator iter_t;
245  if (dec_class != "") {
246  uint64_t dec_id = class2id_.get_id(dec_class);
247  for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
248  float val = it->second * step_width;
249  id_feature_val3_t& feature_row = tbl_diff_[it->first];
250  feature_row[inc_id].v1 += val;
251  feature_row[dec_id].v1 -= val;
252  }
253  } else {
254  for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
255  float val = it->second * step_width;
256  id_feature_val3_t& feature_row = tbl_diff_[it->first];
257  feature_row[inc_id].v1 += val;
258  }
259  }
260 }
261 
263  ret.diff.clear();
264  for (jubatus::util::data::unordered_map<string, id_feature_val3_t>::
265  const_iterator it = tbl_diff_.begin(); it != tbl_diff_.end(); ++it) {
266  id_feature_val3_t::const_iterator it2 = it->second.begin();
267  feature_val3_t fv3;
268  for (; it2 != it->second.end(); ++it2) {
269  fv3.push_back(make_pair(class2id_.get_key(it2->first), it2->second));
270  }
271  ret.diff.push_back(make_pair(it->first, fv3));
272  }
274 }
275 
277  const diff_t& average) {
278  if (average.expect_version == model_version_) {
279  for (features3_t::const_iterator it = average.diff.begin();
280  it != average.diff.end();
281  ++it) {
282  const feature_val3_t& avg = it->second;
283  id_feature_val3_t& orig = tbl_[it->first];
284  for (feature_val3_t::const_iterator it2 = avg.begin(); it2 != avg.end();
285  ++it2) {
286  val3_t& triple = orig[class2id_.get_id(it2->first)]; // may create
287  increase(triple, it2->second);
288  }
289  }
291  tbl_diff_.clear();
292  return true;
293  } else {
294  return false;
295  }
296 }
297 
298 void local_storage_mixture::register_label(const std::string& label) {
299  // get_id method creates an entry when the label doesn't exist
300  class2id_.get_id(label);
301 }
302 
303 bool local_storage_mixture::delete_label(const std::string& label) {
304  util::concurrent::scoped_lock lk(mutex_);
305  return delete_label_nolock(label);
306 }
307 
308 bool local_storage_mixture::delete_label_nolock(const std::string& label) {
309  uint64_t delete_id = class2id_.get_id_const(label);
310  if (delete_id == common::key_manager::NOTFOUND) {
311  return false;
312  }
313  delete_label_from_weight(delete_id, tbl_);
314  delete_label_from_weight(delete_id, tbl_diff_);
315  class2id_.delete_key(label);
316  return true;
317 }
318 
320  // Clear and minimize
321  id_features3_t().swap(tbl_);
323  id_features3_t().swap(tbl_diff_);
324 }
325 
326 std::vector<std::string> local_storage_mixture::get_labels() const {
327  return class2id_.get_all_id2key();
328 }
329 
330 bool local_storage_mixture::set_label(const std::string& label) {
331  return class2id_.set_key(label);
332 }
333 
335  packer.pack(*this);
336 }
337 
338 void local_storage_mixture::unpack(msgpack::object o) {
339  o.convert(this);
340 }
341 
342 std::string local_storage_mixture::type() const {
343  return "local_storage_mixture";
344 }
345 
346 } // namespace storage
347 } // namespace core
348 } // namespace jubatus
uint64_t get_id_const(const std::string &key) const
Definition: key_manager.cpp:67
void set3_nolock(const std::string &feature, const std::string &klass, const val3_t &w)
void update(const std::string &feature, const std::string &inc_class, const std::string &dec_class, const val1_t &v)
void get2_nolock(const std::string &feature, feature_val2_t &ret) const
void set(const std::string &feature, const std::string &klass, const val1_t &w)
bool get_internal(const std::string &feature, id_feature_val3_t &ret) const
void inp(const common::sfv_t &sfv, map_feature_val1_t &ret) const
inner product
void get3(const std::string &feature, feature_val3_t &ret) const
jubatus::util::data::unordered_map< std::string, val1_t > map_feature_val1_t
void get3_nolock(const std::string &feature, feature_val3_t &ret) const
void get(const std::string &feature, feature_val1_t &ret) const
bool set_key(const std::string &key)
Definition: key_manager.cpp:57
jubatus::util::data::unordered_map< std::string, id_feature_val3_t > id_features3_t
void set3(const std::string &feature, const std::string &klass, const val3_t &w)
std::vector< std::pair< std::string, val1_t > > feature_val1_t
std::vector< std::string > get_labels() const
const std::string & get_key(const uint64_t id) const
Definition: key_manager.cpp:78
void get_nolock(const std::string &feature, feature_val1_t &ret) const
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
std::vector< T > v(size)
void pack(framework::packer &packer) const
void get_status(std::map< std::string, std::string > &status) const
void get2(const std::string &feature, feature_val2_t &ret) const
void set_nolock(const std::string &feature, const std::string &klass, const val1_t &w)
uint64_t get_id(const std::string &key)
Definition: key_manager.cpp:48
std::vector< std::pair< std::string, val3_t > > feature_val3_t
void set2(const std::string &feature, const std::string &klass, const val2_t &w)
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
void delete_key(const std::string &name)
jubatus::util::data::unordered_map< uint64_t, val3_t > id_feature_val3_t
std::vector< std::pair< std::string, val2_t > > feature_val2_t
void set2_nolock(const std::string &feature, const std::string &klass, const val2_t &w)
std::vector< std::string > get_all_id2key() const
Definition: key_manager.cpp:87
void bulk_update(const common::sfv_t &sfv, float step_width, const std::string &inc_class, const std::string &dec_class)