jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
Classes | Functions
jubatus::core::nearest_neighbor Namespace Reference

Classes

class  bit_vector_nearest_neighbor_base
 
class  euclid_lsh
 
class  lsh
 
class  minhash
 
class  nearest_neighbor_base
 
class  nearest_neighbor_exception
 
class  unimplemented_exception
 

Functions

storage::bit_vector binarize (const std::vector< float > &proj)
 
bit_vector binarize (const vector< float > &proj)
 
bit_vector cosine_lsh (const common::sfv_t &sfv, uint32_t hash_num)
 
shared_ptr< nearest_neighbor_basecreate_nearest_neighbor (const std::string &name, const common::jsonconfig::config &config, shared_ptr< storage::column_table > table, const std::string &id)
 
jubatus::util::lang::shared_ptr< nearest_neighbor_basecreate_nearest_neighbor (const std::string &name, const common::jsonconfig::config &config, jubatus::util::lang::shared_ptr< storage::column_table > table, const std::string &id)
 
vector< float > random_projection (const common::sfv_t &sfv, uint32_t hash_num)
 
void ranking_hamming_bit_vectors (const bit_vector &query, const const_bit_vector_column &bvs, vector< pair< uint64_t, float > > &ret, uint64_t ret_num)
 
void ranking_hamming_bit_vectors (const storage::bit_vector &query, const storage::const_bit_vector_column &bvs, std::vector< std::pair< uint64_t, float > > &ret, uint64_t ret_num)
 

Function Documentation

storage::bit_vector jubatus::core::nearest_neighbor::binarize ( const std::vector< float > &  proj)
bit_vector jubatus::core::nearest_neighbor::binarize ( const vector< float > &  proj)

Definition at line 43 of file lsh_function.cpp.

References jubatus::core::storage::bit_vector_base< bit_base >::set_bit().

Referenced by cosine_lsh(), and jubatus::core::storage::lsh_index_storage::make_entry().

43  {
44  bit_vector bv(proj.size());
45  for (size_t i = 0; i < proj.size(); ++i) {
46  if (proj[i] > 0) {
47  bv.set_bit(i);
48  }
49  }
50  return bv;
51 }
bit_vector_base< uint64_t > bit_vector

Here is the call graph for this function:

Here is the caller graph for this function:

storage::bit_vector jubatus::core::nearest_neighbor::cosine_lsh ( const common::sfv_t sfv,
uint32_t  hash_num 
)

Definition at line 53 of file lsh_function.cpp.

References binarize(), and random_projection().

Referenced by jubatus::core::nearest_neighbor::lsh::hash(), and jubatus::core::nearest_neighbor::euclid_lsh::set_row().

53  {
54  return binarize(random_projection(sfv, hash_num));
55 }
bit_vector binarize(const vector< float > &proj)
vector< float > random_projection(const common::sfv_t &sfv, uint32_t hash_num)

Here is the call graph for this function:

Here is the caller graph for this function:

shared_ptr<nearest_neighbor_base> jubatus::core::nearest_neighbor::create_nearest_neighbor ( const std::string &  name,
const common::jsonconfig::config config,
shared_ptr< storage::column_table table,
const std::string &  id 
)

Definition at line 32 of file nearest_neighbor_factory.cpp.

References jubatus::core::common::jsonconfig::config_cast_check(), and JUBATUS_EXCEPTION.

Referenced by jubatus::core::anomaly::anomaly_factory::create_anomaly(), jubatus::core::classifier::classifier_factory::create_classifier(), and jubatus::core::recommender::recommender_factory::create_recommender().

36  {
37 
39 
40  if (name == "euclid_lsh") {
41  return shared_ptr<nearest_neighbor_base>(
42  new euclid_lsh(config_cast_check<euclid_lsh::config>(config),
43  table,
44  id));
45  } else if (name == "lsh") {
46  return shared_ptr<nearest_neighbor_base>(
47  new lsh(config_cast_check<lsh::config>(config), table, id));
48  } else if (name == "minhash") {
49  return shared_ptr<nearest_neighbor_base>(
50  new minhash(config_cast_check<minhash::config>(config), table, id));
51  } else {
52  throw JUBATUS_EXCEPTION(common::unsupported_method(name));
53  }
54 }
T config_cast_check(const config &c)
Definition: cast.hpp:311
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79

Here is the call graph for this function:

Here is the caller graph for this function:

jubatus::util::lang::shared_ptr<nearest_neighbor_base> jubatus::core::nearest_neighbor::create_nearest_neighbor ( const std::string &  name,
const common::jsonconfig::config config,
jubatus::util::lang::shared_ptr< storage::column_table table,
const std::string &  id 
)
std::vector< float > jubatus::core::nearest_neighbor::random_projection ( const common::sfv_t sfv,
uint32_t  hash_num 
)

Definition at line 31 of file lsh_function.cpp.

References jubatus::core::common::hash_util::calc_string_hash().

Referenced by cosine_lsh().

31  {
32  vector<float> proj(hash_num);
33  for (size_t i = 0; i < sfv.size(); ++i) {
34  const uint32_t seed = common::hash_util::calc_string_hash(sfv[i].first);
35  jubatus::util::math::random::mtrand rnd(seed);
36  for (uint32_t j = 0; j < hash_num; ++j) {
37  proj[j] += sfv[i].second * rnd.next_gaussian();
38  }
39  }
40  return proj;
41 }

Here is the call graph for this function:

Here is the caller graph for this function:

void jubatus::core::nearest_neighbor::ranking_hamming_bit_vectors ( const bit_vector &  query,
const const_bit_vector_column &  bvs,
vector< pair< uint64_t, float > > &  ret,
uint64_t  ret_num 
)

Definition at line 34 of file bit_vector_ranking.cpp.

References jubatus::core::clustering::dist(), jubatus::core::storage::fixed_size_heap< T, Comp >::get_sorted(), and jubatus::core::storage::fixed_size_heap< T, Comp >::push().

Referenced by jubatus::core::nearest_neighbor::bit_vector_nearest_neighbor_base::neighbor_row_from_hash().

38  {
39  storage::fixed_size_heap<pair<uint32_t, uint64_t> > heap(ret_num);
40  for (uint64_t i = 0; i < bvs.size(); ++i) {
41  const size_t dist = query.calc_hamming_distance(bvs[i]);
42  heap.push(make_pair(dist, i));
43  }
44 
45  vector<pair<uint32_t, uint64_t> > sorted;
46  heap.get_sorted(sorted);
47 
48  ret.clear();
49  const float denom = query.bit_num();
50  for (size_t i = 0; i < sorted.size(); ++i) {
51  ret.push_back(make_pair(sorted[i].second, sorted[i].first / denom));
52  }
53 }
double dist(const common::sfv_t &p1, const common::sfv_t &p2)
Definition: util.cpp:151

Here is the call graph for this function:

Here is the caller graph for this function:

void jubatus::core::nearest_neighbor::ranking_hamming_bit_vectors ( const storage::bit_vector query,
const storage::const_bit_vector_column bvs,
std::vector< std::pair< uint64_t, float > > &  ret,
uint64_t  ret_num 
)