jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
local_storage.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011-2014 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "local_storage.hpp"
18 #include <cmath>
19 #include <map>
20 #include <string>
21 #include <vector>
22 #include "jubatus/util/data/intern.h"
23 #include "jubatus/util/concurrent/lock.h"
24 
25 using std::string;
26 using std::vector;
27 using jubatus::util::concurrent::scoped_lock;
28 
29 
30 namespace jubatus {
31 namespace core {
32 namespace storage {
33 
35 }
36 
38 }
39 
40 void local_storage::get(const string& feature, feature_val1_t& ret) const {
41  scoped_lock lk(mutex_);
42  get_nolock(feature, ret);
43 }
44 
45 void local_storage::get_nolock(const string& feature,
46  feature_val1_t& ret) const {
47  ret.clear();
48  id_features3_t::const_iterator cit = tbl_.find(feature);
49  if (cit == tbl_.end()) {
50  return;
51  }
52  const id_feature_val3_t& m = cit->second;
53  for (id_feature_val3_t::const_iterator it = m.begin(); it != m.end(); ++it) {
54  ret.push_back(make_pair(class2id_.get_key(it->first), it->second.v1));
55  }
56 }
57 
58 void local_storage::get2(const string& feature, feature_val2_t& ret) const {
59  scoped_lock lk(mutex_);
60  get2_nolock(feature, ret);
61 }
62 
63 void local_storage::get2_nolock(const string& feature,
64  feature_val2_t& ret) const {
65  ret.clear();
66  id_features3_t::const_iterator cit = tbl_.find(feature);
67  if (cit == tbl_.end()) {
68  return;
69  }
70  const id_feature_val3_t& m = cit->second;
71  for (id_feature_val3_t::const_iterator it = m.begin(); it != m.end(); ++it) {
72  ret.push_back(make_pair(class2id_.get_key(it->first),
73  val2_t(it->second.v1, it->second.v2)));
74  }
75 }
76 
77 void local_storage::get3(const string& feature, feature_val3_t& ret) const {
78  scoped_lock lk(mutex_);
79  get3_nolock(feature, ret);
80 }
81 
82 void local_storage::get3_nolock(const string& feature,
83  feature_val3_t& ret) const {
84  ret.clear();
85  id_features3_t::const_iterator cit = tbl_.find(feature);
86  if (cit == tbl_.end()) {
87  return;
88  }
89  const id_feature_val3_t& m = cit->second;
90  for (id_feature_val3_t::const_iterator it = m.begin(); it != m.end(); ++it) {
91  ret.push_back(make_pair(class2id_.get_key(it->first), it->second));
92  }
93 }
94 
96  const {
97  ret.clear();
98 
99  scoped_lock lk(mutex_);
100  // Use uin64_t map instead of string map as hash function for string is slow
101  jubatus::util::data::unordered_map<uint64_t, float> ret_id;
102  for (common::sfv_t::const_iterator it = sfv.begin(); it != sfv.end(); ++it) {
103  const string& feature = it->first;
104  const float val = it->second;
105  id_features3_t::const_iterator it2 = tbl_.find(feature);
106  if (it2 == tbl_.end()) {
107  continue;
108  }
109  const id_feature_val3_t& m = it2->second;
110  for (id_feature_val3_t::const_iterator it3 = m.begin(); it3 != m.end();
111  ++it3) {
112  ret_id[it3->first] += it3->second.v1 * val;
113  }
114  }
115 
116  std::vector<std::string> labels = class2id_.get_all_id2key();
117  for (size_t i = 0; i < labels.size(); ++i) {
118  const std::string& label = labels[i];
119  uint64_t id = class2id_.get_id_const(label);
120  if (id == common::key_manager::NOTFOUND || ret_id.count(id) == 0) {
121  ret[label] = 0.0;
122  } else {
123  ret[label] = ret_id[id];
124  }
125  }
126 }
127 
129  const string& feature,
130  const string& klass,
131  const val1_t& w) {
132  scoped_lock lk(mutex_);
133  set_nolock(feature, klass, w);
134  tbl_[feature][class2id_.get_id(klass)].v1 = w;
135 }
137  const string& feature,
138  const string& klass,
139  const val1_t& w) {
140  tbl_[feature][class2id_.get_id(klass)].v1 = w;
141 }
142 
144  const string& feature,
145  const string& klass,
146  const val2_t& w) {
147  scoped_lock lk(mutex_);
148  set2_nolock(feature, klass, w);
149 }
150 
152  const string& feature,
153  const string& klass,
154  const val2_t& w) {
155  val3_t& val3 = tbl_[feature][class2id_.get_id(klass)];
156  val3.v1 = w.v1;
157  val3.v2 = w.v2;
158 }
159 
161  const string& feature,
162  const string& klass,
163  const val3_t& w) {
164  scoped_lock lk(mutex_);
165  set3_nolock(feature, klass, w);
166 }
167 
169  const string& feature,
170  const string& klass,
171  const val3_t& w) {
172  tbl_[feature][class2id_.get_id(klass)] = w;
173 }
174 
175 void local_storage::get_status(std::map<string, std::string>& status) const {
176  scoped_lock lk(mutex_);
177  status["num_features"] =
178  jubatus::util::lang::lexical_cast<std::string>(tbl_.size());
179  status["num_classes"] =
180  jubatus::util::lang::lexical_cast<std::string>(class2id_.size());
181 }
182 
184  float sum = 0.f;
185  for (id_feature_val3_t::const_iterator it = f.begin(); it != f.end(); ++it) {
186  sum += std::fabs(it->second.v1);
187  }
188  return sum;
189 }
190 
192  const common::sfv_t& sfv,
193  float step_width,
194  const string& inc_class,
195  const string& dec_class) {
196  scoped_lock lk(mutex_);
197  uint64_t inc_id = class2id_.get_id(inc_class);
198  typedef common::sfv_t::const_iterator iter_t;
199  if (dec_class != "") {
200  uint64_t dec_id = class2id_.get_id(dec_class);
201  for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
202  float val = it->second * step_width;
203  id_feature_val3_t& feature_row = tbl_[it->first];
204  feature_row[inc_id].v1 += val;
205  feature_row[dec_id].v1 -= val;
206  }
207  } else {
208  for (iter_t it = sfv.begin(); it != sfv.end(); ++it) {
209  float val = it->second * step_width;
210  id_feature_val3_t& feature_row = tbl_[it->first];
211  feature_row[inc_id].v1 += val;
212  }
213  }
214 }
215 
217  const string& feature,
218  const string& inc_class,
219  const string& dec_class,
220  const val1_t& v) {
221  scoped_lock lk(mutex_);
222  id_feature_val3_t& feature_row = tbl_[feature];
223  feature_row[class2id_.get_id(inc_class)].v1 += v;
224  feature_row[class2id_.get_id(dec_class)].v1 -= v;
225 }
226 
227 util::concurrent::mutex& local_storage::get_lock() const {
228  return mutex_;
229 }
230 
231 void local_storage::register_label(const std::string& label) {
232  scoped_lock lk(mutex_);
233  // get_id method creates an entry when the label doesn't exist
234  class2id_.get_id(label);
235 }
236 
237 vector<string> local_storage::get_labels() const {
238  scoped_lock lk(mutex_);
239  return class2id_.get_all_id2key();
240 }
241 
242 bool local_storage::set_label(const std::string& label) {
243  scoped_lock lk(mutex_);
244  return class2id_.set_key(label);
245 }
246 
247 bool local_storage::delete_label(const std::string& label) {
248  scoped_lock lk(mutex_);
249  return delete_label_nolock(label);
250 }
251 
252 bool local_storage::delete_label_nolock(const std::string& label) {
253  uint64_t delete_id = class2id_.get_id_const(label);
254  if (delete_id == common::key_manager::NOTFOUND) {
255  return false;
256  }
257  for (id_features3_t::iterator it = tbl_.begin();
258  it != tbl_.end();
259  ) {
260  const bool deleted = it->second.erase(delete_id);
261  if (deleted && it->second.empty()) {
262  it = tbl_.erase(it);
263  } else {
264  ++it;
265  }
266  }
267  class2id_.delete_key(label);
268  return true;
269 }
270 
272  scoped_lock lk(mutex_);
273  // Clear and minimize
274  id_features3_t().swap(tbl_);
276 }
277 
279  scoped_lock lk(mutex_);
280  packer.pack(*this);
281 }
282 
283 void local_storage::unpack(msgpack::object o) {
284  scoped_lock lk(mutex_);
285  o.convert(this);
286 }
287 
288 std::string local_storage::type() const {
289  return "local_storage";
290 }
291 
292 } // namespace storage
293 } // namespace core
294 } // namespace jubatus
uint64_t get_id_const(const std::string &key) const
Definition: key_manager.cpp:67
void set3(const std::string &feature, const std::string &klass, const val3_t &w)
void set2(const std::string &feature, const std::string &klass, const val2_t &w)
bool set_label(const std::string &label)
void set2_nolock(const std::string &feature, const std::string &klass, const val2_t &w)
void get_status(std::map< std::string, std::string > &status) const
void update(const std::string &feature, const std::string &inc_class, const std::string &dec_class, const val1_t &v)
unordered_map< string, uint64_t >::const_iterator cit
Definition: key_manager.cpp:33
jubatus::util::data::unordered_map< std::string, val1_t > map_feature_val1_t
void get_nolock(const std::string &feature, feature_val1_t &ret) const
bool set_key(const std::string &key)
Definition: key_manager.cpp:57
jubatus::util::data::unordered_map< std::string, id_feature_val3_t > id_features3_t
void inp(const common::sfv_t &sfv, map_feature_val1_t &ret) const
void register_label(const std::string &label)
void get2_nolock(const std::string &feature, feature_val2_t &ret) const
void pack(framework::packer &packer) const
void bulk_update(const common::sfv_t &sfv, float step_width, const std::string &inc_class, const std::string &dec_class)
std::vector< std::pair< std::string, val1_t > > feature_val1_t
void get(const std::string &feature, feature_val1_t &ret) const
void set(const std::string &feature, const std::string &klass, const val1_t &w)
void get3(const std::string &feature, feature_val3_t &ret) const
const std::string & get_key(const uint64_t id) const
Definition: key_manager.cpp:78
void set3_nolock(const std::string &feature, const std::string &klass, const val3_t &w)
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
bool delete_label_nolock(const std::string &label)
std::vector< T > v(size)
double sum(const common::sfv_t &p)
Definition: util.cpp:47
uint64_t get_id(const std::string &key)
Definition: key_manager.cpp:48
std::vector< std::pair< std::string, val3_t > > feature_val3_t
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
void delete_key(const std::string &name)
bool delete_label(const std::string &label)
jubatus::util::data::unordered_map< uint64_t, val3_t > id_feature_val3_t
void set_nolock(const std::string &feature, const std::string &klass, const val1_t &w)
std::vector< std::string > get_labels() const
std::vector< std::pair< std::string, val2_t > > feature_val2_t
std::vector< std::string > get_all_id2key() const
Definition: key_manager.cpp:87
void get3_nolock(const std::string &feature, feature_val3_t &ret) const
util::concurrent::mutex & get_lock() const
void get2(const std::string &feature, feature_val2_t &ret) const
float feature_fabssum(const id_feature_val3_t &f)