24 #include "../common/hash.hpp"
30 using jubatus::util::lang::lexical_cast;
38 namespace nearest_neighbor {
41 void hash_mix64(uint64_t& a, uint64_t& b, uint64_t& c) {
91 float calc_hash(uint64_t a, uint64_t b,
float val) {
92 static const uint64_t HASH_PRIME = 0xc3a5c85c97cb3127ULL;
94 uint64_t c = HASH_PRIME;
97 float r =
static_cast<float>(a) / static_cast<float>(0xFFFFFFFFFFFFFFFFLLU);
98 return - std::log(r) / val;
105 jubatus::util::lang::shared_ptr<column_table> table,
106 const std::string&
id)
107 : bit_vector_nearest_neighbor_base(conf.hash_num, table, id) {
109 if (!(1 <= conf.hash_num)) {
111 common::invalid_parameter(
"1 <= hash_num"));
117 jubatus::util::lang::shared_ptr<column_table> table,
118 vector<column_type>& schema,
119 const std::string&
id)
120 : bit_vector_nearest_neighbor_base(conf.hash_num, table, schema, id) {
122 if (!(1 <= conf.hash_num)) {
124 common::invalid_parameter(
"1 <= hash_num"));
129 vector<float> min_values_buffer(bitnum(), FLT_MAX);
130 vector<uint64_t> hash_buffer(bitnum());
131 for (
size_t i = 0; i < sfv.size(); ++i) {
133 float val = sfv[i].second;
134 for (uint32_t j = 0; j < bitnum(); ++j) {
135 float hashval = calc_hash(key_hash, j, val);
136 if (hashval < min_values_buffer[j]) {
137 min_values_buffer[j] = hashval;
138 hash_buffer[j] = key_hash;
144 for (
size_t i = 0; i < hash_buffer.size(); ++i) {
145 if ((hash_buffer[i] & 1LLU) == 1) {
#define JUBATUS_EXCEPTION(e)
minhash(const config &conf, jubatus::util::lang::shared_ptr< storage::column_table > table, const std::string &id)
bit_vector_base< uint64_t > bit_vector
std::vector< std::pair< std::string, float > > sfv_t
static uint64_t calc_string_hash(const std::string &s)