jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
euclid_lsh.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2012 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_RECOMMENDER_EUCLID_LSH_HPP_
18 #define JUBATUS_CORE_RECOMMENDER_EUCLID_LSH_HPP_
19 
20 #include <stdint.h>
21 #include <utility>
22 #include <string>
23 #include <vector>
24 #include "jubatus/util/data/unordered_map.h"
25 #include "jubatus/util/concurrent/lock.h"
26 #include "jubatus/util/concurrent/mutex.h"
27 #include "jubatus/util/lang/shared_ptr.h"
28 #include "jubatus/util/text/json.h"
29 #include "recommender_base.hpp"
30 
31 namespace jubatus {
32 namespace core {
33 namespace framework {
34 template <typename Model, typename Diff>
35 class linear_mixable_helper;
36 } // namespace framework
37 namespace storage {
38 class lsh_index_storage;
39 struct lsh_entry;
40 typedef jubatus::util::data::unordered_map<std::string, lsh_entry>
44 } // namespace storage
45 namespace recommender {
46 
47 class euclid_lsh : public recommender_base {
48  public:
51 
52  static const uint64_t DEFAULT_HASH_NUM;
53  static const uint64_t DEFAULT_TABLE_NUM;
54  static const float DEFAULT_BIN_WIDTH;
55  static const uint32_t DEFAULT_NUM_PROBE;
56  static const uint32_t DEFAULT_SEED;
57  static const bool DEFAULT_RETAIN_PROJECTION;
58 
59  struct config {
60  config();
61 
62  int64_t hash_num;
63  int64_t table_num;
64  float bin_width;
65  int32_t probe_num;
66  int32_t seed;
68 
69  template<typename Ar>
70  void serialize(Ar& ar) {
71  ar
72  & JUBA_MEMBER(hash_num)
73  & JUBA_MEMBER(table_num)
74  & JUBA_MEMBER(bin_width)
75  & JUBA_MEMBER(probe_num)
76  & JUBA_MEMBER(seed)
77  & JUBA_MEMBER(retain_projection);
78  }
79  };
80 
81  euclid_lsh();
82  explicit euclid_lsh(const config& config);
83  ~euclid_lsh();
84 
85  virtual void neighbor_row(
86  const common::sfv_t& query,
87  std::vector<std::pair<std::string, float> >& ids,
88  size_t ret_num) const;
89  virtual void neighbor_row(
90  const std::string& id,
91  std::vector<std::pair<std::string, float> >& ids,
92  size_t ret_num) const;
93 
94  virtual void similar_row(
95  const common::sfv_t& query,
96  std::vector<std::pair<std::string, float> >& ids,
97  size_t ret_num) const;
98  virtual void similar_row(
99  const std::string& id,
100  std::vector<std::pair<std::string, float> >& ids,
101  size_t ret_num) const;
102 
103  virtual void clear();
104  virtual void clear_row(const std::string& id);
105  virtual void update_row(const std::string& id, const sfv_diff_t& diff);
106  virtual void get_all_row_ids(std::vector<std::string>& ids) const;
107 
108  virtual std::string type() const;
109 
111 
112  void pack(framework::packer& packer) const;
113  void unpack(msgpack::object o);
114 
115  private:
116  std::vector<float> calculate_lsh(const common::sfv_t& query) const;
117  std::vector<float> get_projection(uint32_t seed) const;
118 
119  void initialize_model();
120 
121  jubatus::util::lang::shared_ptr<storage::mixable_lsh_index_storage>
123  float bin_width_;
124  uint32_t num_probe_;
125 
126  mutable jubatus::util::data::unordered_map<uint32_t, std::vector<float> >
128  mutable jubatus::util::concurrent::mutex cache_lock_;
130 };
131 
132 } // namespace recommender
133 } // namespace core
134 } // namespace jubatus
135 
136 #endif // JUBATUS_CORE_RECOMMENDER_EUCLID_LSH_HPP_
storage::lsh_index_storage lsh_index_storage
Definition: euclid_lsh.cpp:91
framework::linear_mixable_helper< lsh_index_storage, lsh_master_table_t > mixable_lsh_index_storage
Definition: euclid_lsh.hpp:43
static const uint64_t DEFAULT_TABLE_NUM
Definition: euclid_lsh.hpp:53
virtual void similar_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
jubatus::util::data::unordered_map< uint32_t, std::vector< float > > projection_cache_
Definition: euclid_lsh.hpp:127
static const uint64_t DEFAULT_HASH_NUM
Definition: euclid_lsh.hpp:52
virtual void similar_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const =0
virtual void neighbor_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
virtual void get_all_row_ids(std::vector< std::string > &ids) const
Definition: euclid_lsh.cpp:210
jubatus::util::data::unordered_map< std::string, lsh_entry > lsh_master_table_t
Definition: euclid_lsh.hpp:39
core::common::sfv_t sfv_diff_t
virtual std::string type() const
Definition: euclid_lsh.cpp:214
void pack(framework::packer &packer) const
Definition: euclid_lsh.cpp:259
static const uint32_t DEFAULT_SEED
Definition: euclid_lsh.hpp:56
static const bool DEFAULT_RETAIN_PROJECTION
Definition: euclid_lsh.hpp:57
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
std::vector< float > get_projection(uint32_t seed) const
Definition: euclid_lsh.cpp:237
virtual void update_row(const std::string &id, const sfv_diff_t &diff)
Definition: euclid_lsh.cpp:199
virtual void neighbor_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const =0
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
static const uint32_t DEFAULT_NUM_PROBE
Definition: euclid_lsh.hpp:55
std::vector< float > calculate_lsh(const common::sfv_t &query) const
Definition: euclid_lsh.cpp:222
virtual void clear_row(const std::string &id)
Definition: euclid_lsh.cpp:194
jubatus::util::concurrent::mutex cache_lock_
Definition: euclid_lsh.hpp:128
framework::mixable * get_mixable() const
Definition: euclid_lsh.cpp:218
jubatus::util::lang::shared_ptr< storage::mixable_lsh_index_storage > mixable_storage_
Definition: euclid_lsh.hpp:122