jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
lsh_function.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include <vector>
18 #include "jubatus/util/math/random.h"
19 #include "../common/hash.hpp"
20 #include "../common/type.hpp"
21 #include "../storage/bit_vector.hpp"
22 #include "lsh_function.hpp"
23 
24 using std::vector;
26 
27 namespace jubatus {
28 namespace core {
29 namespace nearest_neighbor {
30 
31 vector<float> random_projection(const common::sfv_t& sfv, uint32_t hash_num) {
32  vector<float> proj(hash_num);
33  for (size_t i = 0; i < sfv.size(); ++i) {
34  const uint32_t seed = common::hash_util::calc_string_hash(sfv[i].first);
35  jubatus::util::math::random::mtrand rnd(seed);
36  for (uint32_t j = 0; j < hash_num; ++j) {
37  proj[j] += sfv[i].second * rnd.next_gaussian();
38  }
39  }
40  return proj;
41 }
42 
43 bit_vector binarize(const vector<float>& proj) {
44  bit_vector bv(proj.size());
45  for (size_t i = 0; i < proj.size(); ++i) {
46  if (proj[i] > 0) {
47  bv.set_bit(i);
48  }
49  }
50  return bv;
51 }
52 
53 bit_vector cosine_lsh(const common::sfv_t& sfv, uint32_t hash_num) {
54  return binarize(random_projection(sfv, hash_num));
55 }
56 
57 } // namespace nearest_neighbor
58 } // namespace core
59 } // namespace jubatus
bit_vector binarize(const vector< float > &proj)
bit_vector_base< uint64_t > bit_vector
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
bit_vector cosine_lsh(const common::sfv_t &sfv, uint32_t hash_num)
static uint64_t calc_string_hash(const std::string &s)
Definition: hash.hpp:29
vector< float > random_projection(const common::sfv_t &sfv, uint32_t hash_num)