jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
bit_index_storage.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "bit_index_storage.hpp"
18 #include <functional>
19 #include <iterator>
20 #include <sstream>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "fixed_size_heap.hpp"
25 
26 using std::greater;
27 using std::istringstream;
28 using std::ostringstream;
29 using std::make_pair;
30 using std::pair;
31 using std::string;
32 using std::vector;
33 
34 namespace jubatus {
35 namespace core {
36 namespace storage {
37 
39 }
40 
42 }
43 
44 void bit_index_storage::set_row(const string& row, const bit_vector& bv) {
45  bitvals_diff_[row] = bv;
46 }
47 
48 void bit_index_storage::get_row(const string& row, bit_vector& bv) const {
49  {
50  bit_table_t::const_iterator it = bitvals_diff_.find(row);
51  if (it != bitvals_diff_.end()) {
52  bv = it->second;
53  return;
54  }
55  }
56  {
57  bit_table_t::const_iterator it = bitvals_.find(row);
58  if (it != bitvals_.end()) {
59  bv = it->second;
60  return;
61  }
62  }
63  bv = bit_vector();
64 }
65 
66 void bit_index_storage::remove_row(const string& row) {
67  if (bitvals_.find(row) == bitvals_.end()) {
68  // The row is not in the master table; we can
69  // immedeately remove it from the diff table.
70  bitvals_diff_.erase(row);
71  } else {
72  // Keep the row in the diff table until next MIX to
73  // propagate the removal of this row to other nodes.
74  bitvals_diff_[row] = bit_vector();
75  }
76 }
77 
79  bit_table_t().swap(bitvals_);
80  bit_table_t().swap(bitvals_diff_);
81 }
82 
83 void bit_index_storage::get_all_row_ids(std::vector<std::string>& ids) const {
84  ids.clear();
85  for (bit_table_t::const_iterator it = bitvals_.begin(); it != bitvals_.end();
86  ++it) {
87  ids.push_back(it->first);
88  }
89  for (bit_table_t::const_iterator it = bitvals_diff_.begin();
90  it != bitvals_diff_.end(); ++it) {
91  if (bitvals_.find(it->first) == bitvals_.end()) {
92  ids.push_back(it->first);
93  }
94  }
95 }
96 
98  diff = bitvals_diff_;
99 }
100 
102  const bit_table_t& mixed_diff) {
103  for (bit_table_t::const_iterator it = mixed_diff.begin();
104  it != mixed_diff.end(); ++it) {
105  if (it->second.bit_num() == 0) {
106  bitvals_.erase(it->first);
107  } else {
108  bitvals_[it->first] = it->second;
109  }
110  }
111  bitvals_diff_.clear();
112  return true;
113 }
114 
115 void bit_index_storage::mix(const bit_table_t& lhs, bit_table_t& rhs) const {
116  for (bit_table_t::const_iterator it = lhs.begin(); it != lhs.end(); ++it) {
117  rhs[it->first] = it->second;
118  }
119 }
120 
122  greater<pair<uint64_t, string> > > heap_type;
123 
124 static void similar_row_one(
125  const bit_vector& x,
126  const pair<string, bit_vector>& y,
127  heap_type& heap) {
128  uint64_t match_num = x.calc_hamming_similarity(y.second);
129  heap.push(make_pair(match_num, y.first));
130 }
131 
133  const bit_vector& bv,
134  vector<pair<string, float> >& ids,
135  uint64_t ret_num) const {
136  ids.clear();
137  uint64_t bit_num = bv.bit_num();
138  if (bit_num == 0) {
139  return;
140  }
141 
142  heap_type heap(ret_num);
143 
144  for (bit_table_t::const_iterator it = bitvals_diff_.begin();
145  it != bitvals_diff_.end(); ++it) {
146  similar_row_one(bv, *it, heap);
147  }
148  for (bit_table_t::const_iterator it = bitvals_.begin(); it != bitvals_.end();
149  ++it) {
150  if (bitvals_diff_.find(it->first) != bitvals_diff_.end()) {
151  continue;
152  }
153  similar_row_one(bv, *it, heap);
154  }
155 
156  vector<pair<uint64_t, string> > scores;
157  heap.get_sorted(scores);
158  for (size_t i = 0; i < scores.size() && i < ret_num; ++i) {
159  ids.push_back(make_pair(scores[i].second,
160  static_cast<float>(scores[i].first) / bit_num));
161  }
162 }
163 
165  packer.pack(*this);
166 }
167 
168 void bit_index_storage::unpack(msgpack::object o) {
169  o.convert(this);
170 }
171 
172 string bit_index_storage::name() const {
173  return string("bit_index_storage");
174 }
175 
176 } // namespace storage
177 } // namespace core
178 } // namespace jubatus
void similar_row(const bit_vector &bv, std::vector< std::pair< std::string, float > > &ids, uint64_t ret_num) const
void get_sorted(std::vector< T > &v) const
fixed_size_heap< pair< uint64_t, string >, greater< pair< uint64_t, string > > > heap_type
void set_row(const std::string &row, const bit_vector &bv)
void get_all_row_ids(std::vector< std::string > &ids) const
bool put_diff(const bit_table_t &mixed_diff)
void pack(framework::packer &packer) const
jubatus::util::data::unordered_map< std::string, bit_vector > bit_table_t
void get_row(const std::string &row, bit_vector &bv) const
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
bit_vector_base< uint64_t > bit_vector
uint64_t calc_hamming_similarity(const bit_vector_base &bv) const
Definition: bit_vector.hpp:273
void get_diff(bit_table_t &diff) const
static void similar_row_one(const bit_vector &x, const pair< string, bit_vector > &y, heap_type &heap)
void mix(const bit_table_t &lhs, bit_table_t &rhs) const