jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
lsh_index_storage.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2012 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_STORAGE_LSH_INDEX_STORAGE_HPP_
18 #define JUBATUS_CORE_STORAGE_LSH_INDEX_STORAGE_HPP_
19 
20 #include <iosfwd>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include <msgpack.hpp>
25 #include "jubatus/util/data/unordered_map.h"
26 #include "jubatus/util/data/unordered_set.h"
27 #include "lsh_vector.hpp"
28 #include "storage_type.hpp"
29 #include "../common/key_manager.hpp"
30 #include "../common/unordered_map.hpp"
31 #include "../framework/mixable_helper.hpp"
32 
33 namespace jubatus {
34 namespace core {
35 namespace storage {
36 
37 struct lsh_entry {
38  std::vector<uint64_t> lsh_hash;
40  float norm;
41 
42  MSGPACK_DEFINE(lsh_hash, simhash_bv, norm);
43 };
44 
45 typedef jubatus::util::data::unordered_map<std::string, lsh_entry>
47 
48 typedef jubatus::util::data::unordered_map<uint64_t, std::vector<uint64_t> >
50 
52  public:
54  lsh_index_storage(size_t lsh_num, size_t table_num, uint32_t seed);
55  lsh_index_storage(size_t table_num, const std::vector<float>& shift);
56  virtual ~lsh_index_storage();
57 
58  // hash is a randomly-projected and scaled hash values without shifting
59  void set_row(
60  const std::string& row,
61  const std::vector<float>& hash,
62  float norm);
63  void remove_row(const std::string& row);
64  void clear();
65  void get_all_row_ids(std::vector<std::string>& ids) const;
66 
67  void similar_row(
68  const std::vector<float>& hash,
69  float norm,
70  uint64_t probe_num,
71  uint64_t ret_num,
72  std::vector<std::pair<std::string, float> >& ids) const;
73  void similar_row(
74  const std::string& id,
75  uint64_t ret_num,
76  std::vector<std::pair<std::string, float> >& ids) const;
77  std::string name() const;
78 
79  size_t table_num() const {
80  return table_num_;
81  }
82 
83  size_t all_lsh_num() const {
84  return shift_.size();
85  }
87  return storage::version();
88  }
89 
90  void pack(framework::packer& packer) const;
91  void unpack(msgpack::object o);
92 
93  void get_diff(lsh_master_table_t& diff) const;
94  bool put_diff(const lsh_master_table_t& mixed_diff);
95  void mix(const lsh_master_table_t& lhs, lsh_master_table_t& rhs) const;
96 
99 
100  private:
101  lsh_master_table_t::iterator remove_and_get_row(const std::string& row);
102  void put_empty_entry(uint64_t row_id, const lsh_entry& entry);
103 
104 
105  std::vector<float> make_entry(
106  const std::vector<float>& hash,
107  float norm,
108  lsh_entry& entry) const;
109  bool retrieve_hit_rows(
110  uint64_t hash,
111  size_t ret_num,
112  jubatus::util::data::unordered_set<uint64_t>& cands) const;
113 
115  const jubatus::util::data::unordered_set<uint64_t>& cands,
116  const bit_vector& query_simhash,
117  float query_norm,
118  uint64_t ret_num,
119  std::vector<std::pair<std::string, float> >& ids) const;
120  const lsh_entry* get_lsh_entry(const std::string& row) const;
121  void remove_model_row(const std::string& row);
122  void set_mixed_row(const std::string& row, const lsh_entry& entry);
123 
124  lsh_master_table_t master_table_;
125  lsh_master_table_t master_table_diff_;
126 
129 
130  std::vector<float> shift_;
131  uint64_t table_num_;
133 };
134 
137 
138 } // namespace storage
139 } // namespace core
140 } // namespace jubatus
141 
142 #endif // JUBATUS_CORE_STORAGE_LSH_INDEX_STORAGE_HPP_
void get_sorted_similar_rows(const jubatus::util::data::unordered_set< uint64_t > &cands, const bit_vector &query_simhash, float query_norm, uint64_t ret_num, std::vector< std::pair< std::string, float > > &ids) const
framework::linear_mixable_helper< lsh_index_storage, lsh_master_table_t > mixable_lsh_index_storage
Definition: euclid_lsh.hpp:43
bool put_diff(const lsh_master_table_t &mixed_diff)
bit_vector simhash_bv
void get_diff(lsh_master_table_t &diff) const
void get_all_row_ids(std::vector< std::string > &ids) const
MSGPACK_DEFINE(master_table_, master_table_diff_, lsh_table_, lsh_table_diff_, shift_, table_num_, key_manager_)
void pack(framework::packer &packer) const
bool retrieve_hit_rows(uint64_t hash, size_t ret_num, jubatus::util::data::unordered_set< uint64_t > &cands) const
MSGPACK_DEFINE(lsh_hash, simhash_bv, norm)
float norm
void mix(const lsh_master_table_t &lhs, lsh_master_table_t &rhs) const
jubatus::util::data::unordered_map< std::string, lsh_entry > lsh_master_table_t
Definition: euclid_lsh.hpp:39
std::vector< float > make_entry(const std::vector< float > &hash, float norm, lsh_entry &entry) const
const lsh_entry * get_lsh_entry(const std::string &row) const
void set_row(const std::string &row, const std::vector< float > &hash, float norm)
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
lsh_master_table_t::iterator remove_and_get_row(const std::string &row)
void set_mixed_row(const std::string &row, const lsh_entry &entry)
jubatus::util::data::unordered_map< uint64_t, std::vector< uint64_t > > lsh_table_t
void put_empty_entry(uint64_t row_id, const lsh_entry &entry)
std::vector< uint64_t > lsh_hash
void similar_row(const std::vector< float > &hash, float norm, uint64_t probe_num, uint64_t ret_num, std::vector< std::pair< std::string, float > > &ids) const
void remove_model_row(const std::string &row)