24 #include "jubatus/util/data/serialization.h"
25 #include "jubatus/util/lang/cast.h"
26 #include "jubatus/util/math/random.h"
27 #include "jubatus/util/concurrent/lock.h"
28 #include "jubatus/util/concurrent/mutex.h"
29 #include "../common/hash.hpp"
30 #include "../storage/lsh_util.hpp"
31 #include "../storage/lsh_vector.hpp"
32 #include "../storage/lsh_index_storage.hpp"
39 using jubatus::util::math::random::mtrand;
40 using jubatus::util::concurrent::scoped_lock;
44 namespace recommender {
48 struct greater_second {
50 const pair<string, float>& l,
51 const pair<string, float>& r)
const {
52 return l.second > r.second;
58 for (
size_t i = 0; i < sfv.size(); ++i) {
59 sqnorm += sfv[i].second * sfv[i].second;
61 return std::sqrt(sqnorm);
64 void calc_projection(uint32_t seed,
size_t size, vector<float>& ret) {
67 for (
size_t i = 0; i < size; ++i) {
68 ret[i] = rnd.next_gaussian();
76 table_num(DEFAULT_TABLE_NUM),
77 bin_width(DEFAULT_BIN_WIDTH),
78 probe_num(DEFAULT_NUM_PROBE),
80 retain_projection(DEFAULT_RETAIN_PROJECTION) {
104 : mixable_storage_(),
105 bin_width_(config.bin_width),
106 num_probe_(config.probe_num),
107 retain_projection_(config.retain_projection) {
129 if (!(0 <= config.
seed)) {
147 vector<pair<string, float> >& ids,
148 size_t ret_num)
const {
150 for (
size_t i = 0; i < ids.size(); ++i) {
151 ids[i].second = -ids[i].second;
157 vector<pair<string, float> >& ids,
158 size_t ret_num)
const {
160 for (
size_t i = 0; i < ids.size(); ++i) {
161 ids[i].second = -ids[i].second;
167 vector<pair<string, float> >& ids,
168 size_t ret_num)
const {
173 const float norm = calc_norm(query);
174 lsh_index.similar_row(hash, norm,
num_probe_, ret_num, ids);
179 vector<pair<string, float> >& ids,
180 size_t ret_num)
const {
190 jubatus::util::data::unordered_map<uint32_t, std::vector<float> >()
206 const float norm = calc_norm(row);
207 lsh_index.
set_row(
id, hash, norm);
224 for (
size_t i = 0; i < query.size(); ++i) {
227 for (
size_t j = 0; j < hash.size(); ++j) {
228 hash[j] += query[i].second * proj[j];
231 for (
size_t j = 0; j < hash.size(); ++j) {
260 packer.pack_array(2);
266 if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
267 throw msgpack::type_error();
storage::lsh_index_storage lsh_index_storage
framework::linear_mixable_helper< lsh_index_storage, lsh_master_table_t > mixable_lsh_index_storage
void get_row(const std::string &row, std::vector< std::pair< std::string, float > > &columns) const
static const uint64_t DEFAULT_TABLE_NUM
virtual void similar_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
jubatus::util::data::unordered_map< uint32_t, std::vector< float > > projection_cache_
static const uint64_t DEFAULT_HASH_NUM
void unpack(msgpack::object o)
virtual void neighbor_row(const common::sfv_t &query, std::vector< std::pair< std::string, float > > &ids, size_t ret_num) const
virtual void get_all_row_ids(std::vector< std::string > &ids) const
jubatus::util::lang::shared_ptr< Model > model_ptr
core::common::sfv_t sfv_diff_t
#define JUBATUS_EXCEPTION(e)
virtual std::string type() const
void pack(framework::packer &packer) const
void pack(framework::packer &packer) const
static const uint32_t DEFAULT_SEED
void set_row(const std::string &row, const std::vector< float > &hash, float norm)
void swap(weighted_point &p1, weighted_point &p2)
static const bool DEFAULT_RETAIN_PROJECTION
msgpack::packer< jubatus_packer > packer
std::vector< float > get_projection(uint32_t seed) const
virtual void update_row(const std::string &id, const sfv_diff_t &diff)
static const uint64_t DEFAULT_HASH_NUM
void unpack(msgpack::object o)
void remove_row(const std::string &row)
core::storage::sparse_matrix_storage orig_
std::vector< std::pair< std::string, float > > sfv_t
static const uint32_t DEFAULT_NUM_PROBE
std::vector< float > calculate_lsh(const common::sfv_t &query) const
virtual void clear_row(const std::string &id)
jubatus::util::concurrent::mutex cache_lock_
void set_row(const std::string &row, const std::vector< std::pair< std::string, float > > &columns)
static uint64_t calc_string_hash(const std::string &s)
framework::mixable * get_mixable() const
jubatus::util::lang::shared_ptr< storage::mixable_lsh_index_storage > mixable_storage_
storage::mixable_lsh_index_storage::model_ptr model_ptr
static const float DEFAULT_BIN_WIDTH