jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
lsh.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "lsh.hpp"
18 
19 #include <cmath>
20 #include <algorithm>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "../common/exception.hpp"
25 #include "../common/hash.hpp"
26 #include "lsh_util.hpp"
27 #include "../storage/bit_index_storage.hpp"
28 
29 using std::pair;
30 using std::string;
31 using std::vector;
33 
34 namespace jubatus {
35 namespace core {
36 namespace recommender {
37 
38 static const uint64_t DEFAULT_HASH_NUM = 64; // should be in config
39 
41  : hash_num(DEFAULT_HASH_NUM) {
42 }
43 
44 lsh::lsh(uint64_t hash_num)
45  : hash_num_(hash_num) {
46  if (!(1 <= hash_num)) {
47  throw JUBATUS_EXCEPTION(
48  common::invalid_parameter("1 <= hash_num"));
49  }
51 }
52 
54  : hash_num_(config.hash_num) {
55 
56  if (!(1 <= config.hash_num)) {
57  throw JUBATUS_EXCEPTION(
58  common::invalid_parameter("1 <= hash_num"));
59  }
60 
62 }
63 
65  : hash_num_(DEFAULT_HASH_NUM) {
67 }
68 
70 }
71 
73  const common::sfv_t& query,
74  vector<pair<string, float> >& ids,
75  size_t ret_num) const {
76  ids.clear();
77  if (ret_num == 0) {
78  return;
79  }
80 
81  bit_vector query_bv;
82  calc_lsh_values(query, query_bv);
83  mixable_storage_->get_model()->similar_row(query_bv, ids, ret_num);
84 }
85 
87  const common::sfv_t& query,
88  vector<pair<string, float> >& ids,
89  size_t ret_num) const {
90  similar_row(query, ids, ret_num);
91  for (size_t i = 0; i < ids.size(); ++i) {
92  ids[i].second = 1 - ids[i].second;
93  }
94 }
95 
96 void lsh::clear() {
97  orig_.clear();
98  jubatus::util::data::unordered_map<std::string, std::vector<float> >()
100  mixable_storage_->get_model()->clear();
101 }
102 
103 void lsh::clear_row(const string& id) {
104  orig_.remove_row(id);
105  mixable_storage_->get_model()->remove_row(id);
106 }
107 
108 void lsh::calc_lsh_values(const common::sfv_t& sfv, bit_vector& bv) const {
109  const_cast<lsh*>(this)->generate_column_bases(sfv);
110 
111  vector<float> lsh_vals;
113  set_bit_vector(lsh_vals, bv);
114 }
115 
117  for (size_t i = 0; i < sfv.size(); ++i) {
118  generate_column_base(sfv[i].first);
119  }
120 }
121 
122 void lsh::generate_column_base(const string& column) {
123  if (column2baseval_.count(column) != 0) {
124  return;
125  }
126  const uint32_t seed = common::hash_util::calc_string_hash(column);
128 }
129 
130 void lsh::update_row(const string& id, const sfv_diff_t& diff) {
131  generate_column_bases(diff);
132  orig_.set_row(id, diff);
133  common::sfv_t row;
134  orig_.get_row(id, row);
135  bit_vector bv;
136  calc_lsh_values(row, bv);
137  mixable_storage_->get_model()->set_row(id, bv);
138 }
139 
140 void lsh::get_all_row_ids(std::vector<std::string>& ids) const {
141  mixable_storage_->get_model()->get_all_row_ids(ids);
142 }
143 
144 string lsh::type() const {
145  return string("lsh");
146 }
147 
149  return mixable_storage_.get();
150 }
151 
154  typedef storage::mixable_bit_index_storage storage_t;
155  model_ptr p(new storage::bit_index_storage);
156  mixable_storage_.reset(new storage_t(p));
157 }
158 
160  packer.pack_array(2);
161  orig_.pack(packer);
162  mixable_storage_->get_model()->pack(packer);
163 }
164 
165 void lsh::unpack(msgpack::object o) {
166  if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
167  throw msgpack::type_error();
168  }
169  orig_.unpack(o.via.array.ptr[0]);
170  mixable_storage_->get_model()->unpack(o.via.array.ptr[1]);
171 }
172 
173 } // namespace recommender
174 } // namespace core
175 } // namespace jubatus
void similar_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
Definition: lsh.cpp:72
void get_row(const std::string &row, std::vector< std::pair< std::string, float > > &columns) const
const uint64_t hash_num_
Definition: lsh.hpp:91
void pack(framework::packer &packer) const
Definition: lsh.cpp:159
void unpack(msgpack::object o)
Definition: lsh.cpp:165
void get_all_row_ids(std::vector< std::string > &ids) const
Definition: lsh.cpp:140
void set_bit_vector(const std::vector< float > &vec, bit_vector &bit_vec)
Definition: lsh_util.cpp:44
void update_row(const std::string &id, const sfv_diff_t &diff)
Definition: lsh.cpp:130
void neighbor_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
Definition: lsh.cpp:86
jubatus::util::lang::shared_ptr< Model > model_ptr
core::common::sfv_t sfv_diff_t
void generate_column_bases(const common::sfv_t &v)
Definition: lsh.cpp:116
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
std::string type() const
Definition: lsh.cpp:144
void pack(framework::packer &packer) const
framework::mixable * get_mixable() const
Definition: lsh.cpp:148
void clear_row(const std::string &id)
Definition: lsh.cpp:103
void generate_column_base(const std::string &column)
Definition: lsh.cpp:122
void swap(weighted_point &p1, weighted_point &p2)
Definition: types.hpp:47
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
bit_vector_base< uint64_t > bit_vector
void prod_invert_and_vector(const unordered_map< string, vector< float > > &matrix, const common::sfv_t &vec, size_t dim, vector< float > &ret)
Definition: lsh_util.cpp:55
jubatus::util::data::unordered_map< std::string, std::vector< float > > column2baseval_
Definition: lsh.hpp:86
static const uint64_t DEFAULT_HASH_NUM
Definition: lsh.cpp:38
void calc_lsh_values(const common::sfv_t &sfv, storage::bit_vector &bv) const
Definition: lsh.cpp:108
core::storage::sparse_matrix_storage orig_
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
jubatus::util::lang::shared_ptr< storage::mixable_bit_index_storage > mixable_storage_
Definition: lsh.hpp:89
void generate_random_vector(size_t dim, uint32_t seed, vector< float > &ret)
Definition: lsh_util.cpp:35
void set_row(const std::string &row, const std::vector< std::pair< std::string, float > > &columns)
static uint64_t calc_string_hash(const std::string &s)
Definition: hash.hpp:29
storage::mixable_lsh_index_storage::model_ptr model_ptr
Definition: euclid_lsh.cpp:90