jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
minhash.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2012 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_NEAREST_NEIGHBOR_MINHASH_HPP_
18 #define JUBATUS_CORE_NEAREST_NEIGHBOR_MINHASH_HPP_
19 
20 #include <stdint.h>
21 #include <map>
22 #include <string>
23 #include <vector>
24 #include "jubatus/util/data/serialization.h"
25 #include "jubatus/util/lang/shared_ptr.h"
27 #include "../common/type.hpp"
28 
29 namespace jubatus {
30 namespace core {
31 namespace storage {
32 class column_table;
33 template <typename bit_base> class bit_vector_base;
34 typedef bit_vector_base<uint64_t> bit_vector;
35 }
36 namespace nearest_neighbor {
37 
39  public:
40  struct config {
42  : hash_num(64u) {
43  }
44 
45  int32_t hash_num;
46 
47  template <typename Ar>
48  void serialize(Ar& ar) {
49  ar & JUBA_MEMBER(hash_num);
50  }
51  };
52 
53  minhash(
54  const config& conf,
55  jubatus::util::lang::shared_ptr<storage::column_table> table,
56  const std::string& id);
57  minhash(
58  const config& conf,
59  jubatus::util::lang::shared_ptr<storage::column_table> table,
60  std::vector<storage::column_type>& schema,
61  const std::string& id);
62 
63  virtual std::string type() const {
64  return "minhash";
65  }
66 
67  private:
68  virtual storage::bit_vector hash(const common::sfv_t& sfv) const;
69 };
70 
71 } // namespace nearest_neighbor
72 } // namespace core
73 } // namespace jubatus
74 
75 #endif // JUBATUS_CORE_NEAREST_NEIGHBOR_MINHASH_HPP_
minhash(const config &conf, jubatus::util::lang::shared_ptr< storage::column_table > table, const std::string &id)
bit_vector_base< uint64_t > bit_vector
virtual std::string type() const
Definition: minhash.hpp:63
virtual storage::bit_vector hash(const common::sfv_t &sfv) const
Definition: minhash.cpp:128
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29