24 #include "jubatus/util/lang/cast.h"
25 #include "../storage/fixed_size_heap.hpp"
26 #include "../storage/column_table.hpp"
34 using jubatus::util::lang::lexical_cast;
44 namespace nearest_neighbor {
49 for (
size_t i = 0; i < sfv.size(); ++i) {
50 sqnorm += sfv[i].second * sfv[i].second;
56 return std::sqrt(squared_l2norm(sfv));
63 jubatus::util::lang::shared_ptr<column_table> table,
64 const std::string&
id)
65 : nearest_neighbor_base(table, id) {
68 vector<column_type> schema;
70 get_table()->init(schema);
73 euclid_lsh::euclid_lsh(
75 jubatus::util::lang::shared_ptr<column_table> table,
76 vector<column_type>& schema,
77 const std::string&
id)
78 : nearest_neighbor_base(table, id) {
86 get_table()->add(
id,
owner(my_id_),
cosine_lsh(sfv, hash_num_), l2norm(sfv));
89 void euclid_lsh::neighbor_row(
91 vector<pair<string, float> >& ids,
92 uint64_t ret_num)
const {
93 neighbor_row_from_hash(
100 void euclid_lsh::neighbor_row(
101 const std::string& query_id,
102 vector<pair<string, float> >& ids,
103 uint64_t ret_num)
const {
104 const pair<bool, uint64_t> maybe_index =
105 get_const_table()->exact_match(query_id);
106 if (!maybe_index.first) {
111 const bit_vector bv = lsh_column()[maybe_index.second];
112 const float norm = norm_column()[maybe_index.second];
113 neighbor_row_from_hash(bv, norm, ids, ret_num);
116 void euclid_lsh::set_config(
const config& conf) {
125 void euclid_lsh::fill_schema(vector<column_type>& schema) {
126 first_column_id_ = schema.size();
127 schema.push_back(
column_type(column_type::bit_vector_type, hash_num_));
128 schema.push_back(
column_type(column_type::float_type));
132 return get_const_table()->get_bit_vector_column(first_column_id_);
136 return get_const_table()->get_float_column(first_column_id_ + 1);
139 void euclid_lsh::neighbor_row_from_hash(
142 vector<pair<string, float> >& ids,
143 uint64_t ret_num)
const {
144 jubatus::util::lang::shared_ptr<const column_table> table = get_const_table();
151 const float denom = bv.bit_num();
152 for (
size_t i = 0; i < table->size(); ++i) {
153 const size_t hamm_dist = bv.calc_hamming_distance(bv_col[i]);
154 const float theta = hamm_dist * M_PI / denom;
156 norm_col[i] * (norm_col[i] - 2 * norm * std::cos(theta));
157 heap.
push(make_pair(score, i));
161 vector<pair<float, size_t> > sorted;
166 for (
size_t i = 0; i < sorted.size(); ++i) {
167 ids.push_back(make_pair(table->get_key(sorted[i].second),
168 std::sqrt(squared_norm + sorted[i].first)));
void get_sorted(std::vector< T > &v) const
static float squared_norm(const common::sfv_t &fv)
const bit_vector_column const_bit_vector_column
#define JUBATUS_EXCEPTION(e)
bit_vector_base< uint64_t > bit_vector
std::vector< std::pair< std::string, float > > sfv_t
bit_vector cosine_lsh(const common::sfv_t &sfv, uint32_t hash_num)
const typed_column< float > const_float_column
euclid_lsh(const config &conf, jubatus::util::lang::shared_ptr< storage::column_table > table, const std::string &id)