32 using std::priority_queue;
40 typedef pair<float, pair<int, vector<int> > > diff_type;
41 typedef priority_queue<
47 const vector<float>& hash,
48 size_t num_hash_tables,
49 vector<vector<float> >& hashes) {
50 const size_t hash_size = hash.size() / num_hash_tables;
51 hashes.resize(num_hash_tables);
52 for (
size_t i = 0; i < num_hash_tables; ++i) {
53 hashes[i].resize(hash_size);
54 std::vector<float>::const_iterator begin = hash.begin();
55 copy(begin + i*hash_size, begin + (i+1)*hash_size, hashes[i].begin());
59 void threshold(
const vector<float>& hash, lsh_vector& lv) {
60 lv.resize_and_clear(hash.size());
61 for (
size_t j = 0; j < hash.size(); ++j) {
62 lv.set(j, std::floor(hash[j]));
67 const lsh_vector& src,
68 const vector<int>& diff,
69 const vector<pair<float, int> >& cands) {
71 for (
size_t i = 0; i < diff.size(); ++i) {
72 const int d_idx = cands[diff[i]].second;
74 ret.set(d_idx, ret.get(d_idx) + 1);
76 ret.set(~d_idx, ret.get(~d_idx) - 1);
85 const vector<float>& hash,
86 size_t num_hash_tables) {
87 partition(hash, num_hash_tables,
hash_);
89 for (
size_t i = 0; i <
hash_.size(); ++i) {
95 const int base_size =
base_[0].size();
98 for (
size_t i = 0; i <
base_.size(); ++i) {
100 cands.resize(2 * base_size);
101 for (
int j = 0; j < base_size; ++j) {
103 cands[j * 2] = make_pair((1 - dist) * (1 - dist), j);
104 cands[j * 2 + 1] = make_pair(dist * dist, ~j);
106 sort(cands.begin(), cands.end());
109 for (
size_t i = 0; i <
base_.size(); ++i) {
113 make_pair(i, vector<int>(1))));
119 const size_t base_size =
base_[0].size();
121 for (
size_t i = 0; i <
base_.size(); ++i) {
122 for (
size_t j = 0; j < base_size; ++j) {
123 lv.
set(i * base_size + j,
base_[i].
get(j));
133 const size_t i =
heap_.top().second.first;
134 pair<size_t, lsh_vector> ret(
142 const int index =
heap_.top().second.first;
145 vector<int> shifted =
heap_.top().second.second;
148 vector<int> expanded =
heap_.top().second.second;
149 expanded.push_back(expanded.back() + 1);
151 const float score_base =
heap_.top().first;
154 if ((
size_t) shifted.back() < cands.size()) {
155 const float score_diff = cands[shifted.back()].first
156 - cands[shifted.back() - 1].first;
157 heap_.push(make_pair(score_base + score_diff, make_pair(index, shifted)));
160 if ((
size_t) expanded.back() < cands.size()) {
161 const float score_diff = cands[expanded.back()].first;
162 heap_.push(make_pair(score_base + score_diff, make_pair(index, expanded)));
lsh_probe_generator(const std::vector< float > &hash, size_t num_hash_tables)
void next_perturbations()
std::pair< size_t, lsh_vector > get_next_table_and_vector()
double dist(const common::sfv_t &p1, const common::sfv_t &p2)
fixed_size_heap< pair< uint64_t, string >, greater< pair< uint64_t, string > > > heap_type
std::vector< std::vector< float > > hash_
std::vector< std::vector< std::pair< float, int > > > perturbation_sets_
void set(size_t pos, int value)
std::vector< lsh_vector > base_
lsh_vector base_all() const