jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
Classes | Public Member Functions | Private Member Functions | Static Private Member Functions | Private Attributes | List of all members
jubatus::core::storage::inverted_index_storage Class Reference

#include <inverted_index_storage.hpp>

Collaboration diagram for jubatus::core::storage::inverted_index_storage:
Collaboration graph

Classes

struct  diff_type
 

Public Member Functions

void calc_scores (const common::sfv_t &sfv, std::vector< std::pair< std::string, float > > &scores, size_t ret_num) const
 
void clear ()
 
float get (const std::string &row, const std::string &column) const
 
void get_all_column_ids (std::vector< std::string > &ids) const
 
void get_diff (diff_type &diff_str) const
 
storage::version get_version () const
 
 inverted_index_storage ()
 
void mix (const diff_type &lhs_str, diff_type &rhs_str) const
 
 MSGPACK_DEFINE (inv_, inv_diff_, column2norm_, column2norm_diff_, column2id_)
 
std::string name () const
 
void pack (framework::packer &packer) const
 
bool put_diff (const diff_type &mixed_diff)
 
void remove (const std::string &row, const std::string &column)
 
void set (const std::string &row, const std::string &column, float val)
 
void unpack (msgpack::object o)
 
 ~inverted_index_storage ()
 

Private Member Functions

void add_inp_scores (const std::string &row, float val, std::vector< float > &scores) const
 
float calc_columnl2norm (uint64_t column_id) const
 
float get_from_tbl (const std::string &row, uint64_t column_id, const tbl_t &tbl, bool &exist) const
 

Static Private Member Functions

static float calc_l2norm (const common::sfv_t &sfv)
 

Private Attributes

common::key_manager column2id_
 
imap_float_t column2norm_
 
imap_float_t column2norm_diff_
 
tbl_t inv_
 
tbl_t inv_diff_
 

Detailed Description

Definition at line 37 of file inverted_index_storage.hpp.

Constructor & Destructor Documentation

jubatus::core::storage::inverted_index_storage::inverted_index_storage ( )

Definition at line 41 of file inverted_index_storage.cpp.

41  {
42 }
jubatus::core::storage::inverted_index_storage::~inverted_index_storage ( )

Definition at line 44 of file inverted_index_storage.cpp.

44  {
45 }

Member Function Documentation

void jubatus::core::storage::inverted_index_storage::add_inp_scores ( const std::string &  row,
float  val,
std::vector< float > &  scores 
) const
private

Definition at line 307 of file inverted_index_storage.cpp.

References jubatus::core::common::assoc_vector< K, V >::begin(), jubatus::core::common::assoc_vector< K, V >::end(), jubatus::core::common::assoc_vector< K, V >::find(), inv_, and inv_diff_.

Referenced by calc_scores().

310  {
311  tbl_t::const_iterator it_diff = inv_diff_.find(row);
312  if (it_diff != inv_diff_.end()) {
313  const row_t& row_v = it_diff->second;
314  for (row_t::const_iterator row_it = row_v.begin(); row_it != row_v.end();
315  ++row_it) {
316  scores[row_it->first] += row_it->second * val;
317  }
318  }
319 
320  tbl_t::const_iterator it = inv_.find(row);
321  if (it != inv_.end()) {
322  const row_t& row_v = it->second;
323  if (it_diff == inv_diff_.end()) {
324  for (row_t::const_iterator row_it = row_v.begin(); row_it != row_v.end();
325  ++row_it) {
326  scores[row_it->first] += row_it->second * val;
327  }
328  } else {
329  const row_t& row_diff_v = it_diff->second;
330  for (row_t::const_iterator row_it = row_v.begin(); row_it != row_v.end();
331  ++row_it) {
332  if (row_diff_v.find(row_it->first) == row_diff_v.end()) {
333  scores[row_it->first] += row_it->second * val;
334  }
335  }
336  }
337  }
338 }
data_type::const_iterator const_iterator
jubatus::core::common::assoc_vector< uint64_t, float > row_t

Here is the call graph for this function:

Here is the caller graph for this function:

float jubatus::core::storage::inverted_index_storage::calc_columnl2norm ( uint64_t  column_id) const
private

Definition at line 294 of file inverted_index_storage.cpp.

References column2norm_, and column2norm_diff_.

Referenced by calc_scores().

294  {
295  float ret = 0.f;
296  imap_float_t::const_iterator it_diff = column2norm_diff_.find(column_id);
297  if (it_diff != column2norm_diff_.end()) {
298  ret += it_diff->second;
299  }
300  imap_float_t::const_iterator it = column2norm_.find(column_id);
301  if (it != column2norm_.end()) {
302  ret += it->second;
303  }
304  return std::sqrt(ret);
305 }

Here is the caller graph for this function:

float jubatus::core::storage::inverted_index_storage::calc_l2norm ( const common::sfv_t sfv)
staticprivate

Definition at line 286 of file inverted_index_storage.cpp.

Referenced by calc_scores().

286  {
287  float ret = 0.f;
288  for (size_t i = 0; i < sfv.size(); ++i) {
289  ret += sfv[i].second * sfv[i].second;
290  }
291  return std::sqrt(ret);
292 }

Here is the caller graph for this function:

void jubatus::core::storage::inverted_index_storage::calc_scores ( const common::sfv_t sfv,
std::vector< std::pair< std::string, float > > &  scores,
size_t  ret_num 
) const

Definition at line 248 of file inverted_index_storage.cpp.

References add_inp_scores(), calc_columnl2norm(), calc_l2norm(), column2id_, jubatus::core::common::key_manager::get_key(), jubatus::core::common::key_manager::get_max_id(), jubatus::core::storage::fixed_size_heap< T, Comp >::get_sorted(), and jubatus::core::storage::fixed_size_heap< T, Comp >::push().

251  {
252  float query_norm = calc_l2norm(query);
253  if (query_norm == 0.f) {
254  return;
255  }
256 
257  std::vector<float> i_scores(column2id_.get_max_id() + 1, 0.0);
258  for (size_t i = 0; i < query.size(); ++i) {
259  const string& fid = query[i].first;
260  float val = query[i].second;
261  add_inp_scores(fid, val, i_scores);
262  }
263 
264  storage::fixed_size_heap<pair<float, uint64_t>,
265  std::greater<pair<float, uint64_t> > > heap(ret_num);
266  for (size_t i = 0; i < i_scores.size(); ++i) {
267  float score = i_scores[i];
268  if (score == 0.f)
269  continue;
270  float norm = calc_columnl2norm(i);
271  if (norm == 0.f)
272  continue;
273  float normed_score = score / norm / query_norm;
274  heap.push(make_pair(normed_score, i));
275  }
276  vector<pair<float, uint64_t> > sorted_scores;
277  heap.get_sorted(sorted_scores);
278 
279  for (size_t i = 0; i < sorted_scores.size() && i < ret_num; ++i) {
280  scores.push_back(
281  make_pair(column2id_.get_key(sorted_scores[i].second),
282  sorted_scores[i].first));
283  }
284 }
const std::string & get_key(const uint64_t id) const
Definition: key_manager.cpp:78
static float calc_l2norm(const common::sfv_t &sfv)
void add_inp_scores(const std::string &row, float val, std::vector< float > &scores) const

Here is the call graph for this function:

void jubatus::core::storage::inverted_index_storage::clear ( )

Definition at line 149 of file inverted_index_storage.cpp.

References column2id_, column2norm_, column2norm_diff_, inv_, inv_diff_, and jubatus::core::common::key_manager::swap().

149  {
150  tbl_t().swap(inv_);
151  tbl_t().swap(inv_diff_);
152  imap_float_t().swap(column2norm_);
154  common::key_manager().swap(column2id_);
155 }
jubatus::util::data::unordered_map< uint64_t, float > imap_float_t
jubatus::util::data::unordered_map< std::string, row_t > tbl_t

Here is the call graph for this function:

float jubatus::core::storage::inverted_index_storage::get ( const std::string &  row,
const std::string &  column 
) const

Definition at line 66 of file inverted_index_storage.cpp.

References column2id_, get_from_tbl(), jubatus::core::common::key_manager::get_id_const(), inv_, inv_diff_, and jubatus::core::common::key_manager::NOTFOUND.

68  {
69  uint64_t column_id = column2id_.get_id_const(column);
70  if (column_id == common::key_manager::NOTFOUND) {
71  return 0.f;
72  }
73  {
74  bool exist = false;
75  float ret = get_from_tbl(row, column_id, inv_diff_, exist);
76  if (exist) {
77  return ret;
78  }
79  }
80  {
81  bool exist = false;
82  float ret = get_from_tbl(row, column_id, inv_, exist);
83  if (exist) {
84  return ret;
85  }
86  }
87  return 0.0;
88 }
uint64_t get_id_const(const std::string &key) const
Definition: key_manager.cpp:67
float get_from_tbl(const std::string &row, uint64_t column_id, const tbl_t &tbl, bool &exist) const

Here is the call graph for this function:

void jubatus::core::storage::inverted_index_storage::get_all_column_ids ( std::vector< std::string > &  ids) const

Definition at line 157 of file inverted_index_storage.cpp.

References column2id_, column2norm_, column2norm_diff_, and jubatus::core::common::key_manager::get_key().

158  {
159  ids.clear();
160  for (imap_float_t::const_iterator it = column2norm_.begin();
161  it != column2norm_.end(); ++it) {
162  ids.push_back(column2id_.get_key(it->first));
163  }
164  for (imap_float_t::const_iterator it = column2norm_diff_.begin();
165  it != column2norm_diff_.end(); ++it) {
166  if (column2norm_.find(it->first) == column2norm_.end()) {
167  ids.push_back(column2id_.get_key(it->first));
168  }
169  }
170 }
const std::string & get_key(const uint64_t id) const
Definition: key_manager.cpp:78

Here is the call graph for this function:

void jubatus::core::storage::inverted_index_storage::get_diff ( diff_type diff_str) const

Definition at line 172 of file inverted_index_storage.cpp.

References column2id_, jubatus::core::storage::inverted_index_storage::diff_type::column2norm, column2norm_diff_, jubatus::core::common::key_manager::get_key(), jubatus::core::storage::inverted_index_storage::diff_type::inv, inv_diff_, and jubatus::core::storage::sparse_matrix_storage::set_row().

172  {
173  for (tbl_t::const_iterator it = inv_diff_.begin(); it != inv_diff_.end();
174  ++it) {
175  vector<pair<string, float> > columns;
176  for (row_t::const_iterator it2 = it->second.begin();
177  it2 != it->second.end(); ++it2) {
178  columns.push_back(make_pair(column2id_.get_key(it2->first), it2->second));
179  }
180  diff.inv.set_row(it->first, columns);
181  }
182 
183  for (imap_float_t::const_iterator it = column2norm_diff_.begin();
184  it != column2norm_diff_.end(); ++it) {
185  diff.column2norm[column2id_.get_key(it->first)] = it->second;
186  }
187 }
data_type::const_iterator const_iterator
const std::string & get_key(const uint64_t id) const
Definition: key_manager.cpp:78

Here is the call graph for this function:

float jubatus::core::storage::inverted_index_storage::get_from_tbl ( const std::string &  row,
uint64_t  column_id,
const tbl_t tbl,
bool &  exist 
) const
private

Definition at line 90 of file inverted_index_storage.cpp.

References jubatus::core::common::key_manager::NOTFOUND.

Referenced by get(), and remove().

94  {
95  exist = false;
96 
97  if (column_id == common::key_manager::NOTFOUND) {
98  return 0.f;
99  }
100  tbl_t::const_iterator it = tbl.find(row);
101  if (it == tbl.end()) {
102  return 0.f;
103  } else {
104  row_t::const_iterator it_row = it->second.find(column_id);
105  if (it_row == it->second.end()) {
106  return 0.f;
107  } else {
108  exist = true;
109  return it_row->second;
110  }
111  }
112 }
data_type::const_iterator const_iterator

Here is the caller graph for this function:

storage::version jubatus::core::storage::inverted_index_storage::get_version ( ) const
inline

Definition at line 64 of file inverted_index_storage.hpp.

64  {
65  return storage::version();
66  }
void jubatus::core::storage::inverted_index_storage::mix ( const diff_type lhs_str,
diff_type rhs_str 
) const

Definition at line 221 of file inverted_index_storage.cpp.

References jubatus::core::storage::inverted_index_storage::diff_type::column2norm, jubatus::core::storage::sparse_matrix_storage::get_all_row_ids(), jubatus::core::storage::sparse_matrix_storage::get_row(), jubatus::core::storage::inverted_index_storage::diff_type::inv, and jubatus::core::storage::sparse_matrix_storage::set_row().

221  {
222  // merge inv diffs
223  vector<string> ids;
224  lhs.inv.get_all_row_ids(ids);
225  for (size_t i = 0; i < ids.size(); ++i) {
226  const string& row = ids[i];
227 
228  vector<pair<string, float> > columns;
229  lhs.inv.get_row(row, columns);
230  rhs.inv.set_row(row, columns);
231  }
232 
233  // merge norm diffs
234  for (map_float_t::const_iterator it = lhs.column2norm.begin();
235  it != lhs.column2norm.end(); ++it) {
236  rhs.column2norm[it->first] += it->second;
237  }
238 }

Here is the call graph for this function:

jubatus::core::storage::inverted_index_storage::MSGPACK_DEFINE ( inv_  ,
inv_diff_  ,
column2norm_  ,
column2norm_diff_  ,
column2id_   
)
std::string jubatus::core::storage::inverted_index_storage::name ( ) const

Definition at line 340 of file inverted_index_storage.cpp.

340  {
341  return string("inverted_index_storage");
342 }
void jubatus::core::storage::inverted_index_storage::pack ( framework::packer packer) const

Definition at line 240 of file inverted_index_storage.cpp.

240  {
241  packer.pack(*this);
242 }
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
bool jubatus::core::storage::inverted_index_storage::put_diff ( const diff_type mixed_diff)

Definition at line 189 of file inverted_index_storage.cpp.

References column2id_, jubatus::core::storage::inverted_index_storage::diff_type::column2norm, column2norm_, column2norm_diff_, jubatus::core::common::assoc_vector< K, V >::erase(), jubatus::core::storage::sparse_matrix_storage::get_all_row_ids(), jubatus::core::common::key_manager::get_id(), jubatus::core::storage::sparse_matrix_storage::get_row(), jubatus::core::storage::inverted_index_storage::diff_type::inv, inv_, inv_diff_, and jubatus::core::common::jsonconfig::v().

190  {
191  vector<string> ids;
192  mixed_diff.inv.get_all_row_ids(ids);
193  for (size_t i = 0; i < ids.size(); ++i) {
194  const string& row = ids[i];
195  row_t& v = inv_[row];
196  vector<pair<string, float> > columns;
197  mixed_diff.inv.get_row(row, columns);
198  for (size_t j = 0; j < columns.size(); ++j) {
199  size_t id = column2id_.get_id(columns[j].first);
200  if (columns[j].second == 0.f) {
201  v.erase(id);
202  } else {
203  v[id] = columns[j].second;
204  }
205  }
206  }
207  inv_diff_.clear();
208 
209  for (map_float_t::const_iterator it = mixed_diff.column2norm.begin();
210  it != mixed_diff.column2norm.end(); ++it) {
211  uint64_t column_index = column2id_.get_id(it->first);
212  column2norm_[column_index] += it->second;
213  if (column2norm_[column_index] == 0.f) {
214  column2norm_.erase(column_index);
215  }
216  }
217  column2norm_diff_.clear();
218  return true;
219 }
std::vector< T > v(size)
uint64_t get_id(const std::string &key)
Definition: key_manager.cpp:48
jubatus::core::common::assoc_vector< uint64_t, float > row_t

Here is the call graph for this function:

void jubatus::core::storage::inverted_index_storage::remove ( const std::string &  row,
const std::string &  column 
)

Definition at line 114 of file inverted_index_storage.cpp.

References column2id_, get_from_tbl(), jubatus::core::common::key_manager::get_id_const(), inv_, inv_diff_, jubatus::core::common::key_manager::NOTFOUND, and set().

Referenced by jubatus::core::recommender::inverted_index::clear_row().

116  {
117  uint64_t column_id = column2id_.get_id_const(column);
118  if (column_id == common::key_manager::NOTFOUND) {
119  return;
120  }
121 
122  set(row, column, 0.f);
123 
124  // Test if the data exists in the master table.
125  bool exist = false;
126  get_from_tbl(row, column_id, inv_, exist);
127 
128  // If the data exists in the master table, we should
129  // keep it in the diff table until next MIX to propagate
130  // the removal of this data to other nodes.
131  // Otherwise we can immediately remove the row from
132  // the diff table.
133  if (!exist) {
134  tbl_t::iterator it = inv_diff_.find(row);
135  if (it != inv_diff_.end()) {
136  row_t::iterator it_row = it->second.find(column_id);
137  if (it_row != it->second.end()) {
138  it->second.erase(it_row);
139  if (it->second.empty()) {
140  // There are no columns that belongs to this row,
141  // so we can remove the row itself.
142  inv_diff_.erase(it);
143  }
144  }
145  }
146  }
147 }
uint64_t get_id_const(const std::string &key) const
Definition: key_manager.cpp:67
void set(const std::string &row, const std::string &column, float val)
float get_from_tbl(const std::string &row, uint64_t column_id, const tbl_t &tbl, bool &exist) const

Here is the call graph for this function:

Here is the caller graph for this function:

void jubatus::core::storage::inverted_index_storage::set ( const std::string &  row,
const std::string &  column,
float  val 
)

Definition at line 47 of file inverted_index_storage.cpp.

References column2id_, column2norm_diff_, jubatus::core::common::key_manager::get_id(), jubatus::core::common::key_manager::get_id_const(), inv_diff_, and jubatus::core::common::key_manager::NOTFOUND.

Referenced by remove(), and jubatus::core::recommender::inverted_index::update_row().

50  {
51  uint64_t column_id = column2id_.get_id_const(column);
52 
53  if (column_id == common::key_manager::NOTFOUND) {
54  column_id = column2id_.get_id(column);
55  } else {
56  float cur_val = get(row, column);
57  column2norm_diff_[column_id] -= cur_val * cur_val;
58  }
59  inv_diff_[row][column_id] = val;
60  column2norm_diff_[column_id] += val * val;
61  if (column2norm_diff_[column_id] == 0) {
62  column2norm_diff_.erase(column_id);
63  }
64 }
uint64_t get_id_const(const std::string &key) const
Definition: key_manager.cpp:67
uint64_t get_id(const std::string &key)
Definition: key_manager.cpp:48

Here is the call graph for this function:

Here is the caller graph for this function:

void jubatus::core::storage::inverted_index_storage::unpack ( msgpack::object  o)

Definition at line 244 of file inverted_index_storage.cpp.

244  {
245  o.convert(this);
246 }

Member Data Documentation

common::key_manager jubatus::core::storage::inverted_index_storage::column2id_
private
imap_float_t jubatus::core::storage::inverted_index_storage::column2norm_
private

Definition at line 91 of file inverted_index_storage.hpp.

Referenced by calc_columnl2norm(), clear(), get_all_column_ids(), and put_diff().

imap_float_t jubatus::core::storage::inverted_index_storage::column2norm_diff_
private
tbl_t jubatus::core::storage::inverted_index_storage::inv_
private

Definition at line 89 of file inverted_index_storage.hpp.

Referenced by add_inp_scores(), clear(), get(), put_diff(), and remove().

tbl_t jubatus::core::storage::inverted_index_storage::inv_diff_
private

Definition at line 90 of file inverted_index_storage.hpp.

Referenced by add_inp_scores(), clear(), get(), get_diff(), put_diff(), remove(), and set().


The documentation for this class was generated from the following files: