jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
minhash.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_RECOMMENDER_MINHASH_HPP_
18 #define JUBATUS_CORE_RECOMMENDER_MINHASH_HPP_
19 
20 #include <string>
21 #include <utility>
22 #include <vector>
23 #include "jubatus/util/data/serialization.h"
24 #include "jubatus/util/lang/shared_ptr.h"
25 
26 #include "recommender_base.hpp"
27 #include "../storage/bit_index_storage.hpp"
28 
29 namespace jubatus {
30 namespace core {
31 namespace storage {
32 class bit_index_storage;
33 } // namespace storage
34 namespace recommender {
35 
36 class minhash : public recommender_base {
37  public:
38  struct config {
40  : hash_num(64) {
41  }
42 
43  int64_t hash_num;
44 
45  template<typename Ar>
46  void serialize(Ar& ar) {
47  ar & JUBA_MEMBER(hash_num);
48  }
49  };
50 
51  minhash();
52  explicit minhash(const config& config);
53  ~minhash();
54 
55  void similar_row(
56  const common::sfv_t& query,
57  std::vector<std::pair<std::string, float> >& ids,
58  size_t ret_num) const;
59  void neighbor_row(
60  const common::sfv_t& query,
61  std::vector<std::pair<std::string, float> >& ids,
62  size_t ret_num) const;
63  void clear();
64  void clear_row(const std::string& id);
65  void update_row(const std::string& id, const sfv_diff_t& diff);
66  void get_all_row_ids(std::vector<std::string>& ids) const;
67  std::string type() const;
68 
70 
71  void pack(framework::packer& packer) const;
72  void unpack(msgpack::object o);
73 
74  private:
76  const common::sfv_t& sfv,
77  core::storage::bit_vector& bv) const;
78 
79  static float calc_hash(uint64_t a, uint64_t b, float val);
80  static void hash_mix64(uint64_t& a, uint64_t& b, uint64_t& c);
81 
82  void initialize_model();
83 
84  static const uint64_t hash_prime;
85  uint64_t hash_num_;
86  jubatus::util::lang::shared_ptr<storage::mixable_bit_index_storage>
88 };
89 
90 } // namespace recommender
91 } // namespace core
92 } // namespace jubatus
93 
94 #endif // JUBATUS_CORE_RECOMMENDER_MINHASH_HPP_
void calc_minhash_values(const common::sfv_t &sfv, core::storage::bit_vector &bv) const
Definition: minhash.cpp:93
void update_row(const std::string &id, const sfv_diff_t &diff)
Definition: minhash.cpp:117
void similar_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
Definition: minhash.cpp:59
void neighbor_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
Definition: minhash.cpp:73
void pack(framework::packer &packer) const
Definition: minhash.cpp:192
jubatus::util::lang::shared_ptr< storage::mixable_bit_index_storage > mixable_storage_
Definition: minhash.hpp:87
core::common::sfv_t sfv_diff_t
void clear_row(const std::string &id)
Definition: minhash.cpp:88
framework::mixable * get_mixable() const
Definition: minhash.cpp:183
static const uint64_t hash_prime
Definition: minhash.hpp:84
void get_all_row_ids(std::vector< std::string > &ids) const
Definition: minhash.cpp:127
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
static float calc_hash(uint64_t a, uint64_t b, float val)
Definition: minhash.cpp:171
void unpack(msgpack::object o)
Definition: minhash.cpp:198
static void hash_mix64(uint64_t &a, uint64_t &b, uint64_t &c)
Definition: minhash.cpp:132
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29