jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
Namespaces | Classes | Typedefs | Enumerations | Functions
jubatus::core::clustering Namespace Reference

Namespaces

 compressor
 

Classes

class  clustering
 
struct  clustering_config
 
class  clustering_error
 
class  clustering_method
 
class  clustering_method_factory
 
class  compressive_storage
 
class  discrete_distribution
 
class  eigen_feature_mapper
 
struct  eigen_wsvec_t
 
class  event_dispatcher
 
class  gmm
 
class  gmm_clustering_method
 
class  kmeans_clustering_method
 
class  simple_storage
 
class  storage
 
class  storage_factory
 
struct  weighted_point
 

Typedefs

typedef std::vector< cluster_unitcluster_set
 
typedef std::vector< std::pair< cluster_weight, jubatus::core::fv_converter::datum > > cluster_unit
 
typedef double cluster_weight
 
typedef std::vector< std::pair< std::string, wplist > > diff_t
 
typedef std::vector< eigen_smat_teigen_smat_list_t
 
typedef Eigen::SparseMatrix< double > eigen_smat_t
 
typedef std::vector< jubatus::util::lang::shared_ptr< eigen_solver_t > > eigen_solver_list_t
 
typedef Eigen::SimplicialCholesky< eigen_smat_teigen_solver_t
 
typedef std::vector< eigen_svec_teigen_svec_list_t
 
typedef Eigen::SparseVector< double > eigen_svec_t
 
typedef std::vector< eigen_wsvec_teigen_wsvec_list_t
 
typedef framework::linear_mixable_helper< storage, diff_tmixable_storage
 
typedef std::vector< common::sfv_tplist
 
typedef std::vector< weighted_pointwplist
 

Enumerations

enum  storage_event_type { REVISION_CHANGE, UPDATE }
 

Functions

common::sfv_t add (const common::sfv_t &p1, const common::sfv_t &p2)
 
void concat (const wplist &src, wplist &dst)
 
char digit (int num, int r, int n)
 
double dist (const common::sfv_t &p1, const common::sfv_t &p2)
 
double dist (const weighted_point &d1, const weighted_point &d2)
 
void dump_wplist (const wplist &src)
 
weighted_point get_point (size_t d)
 
weighted_point get_point (size_t d, const std::vector< double > &c)
 
std::vector< weighted_pointget_points (size_t n, size_t d)
 
std::vector< weighted_pointget_points (size_t n, size_t d, const std::vector< double > &c)
 
std::pair< size_t, double > min_dist (const common::sfv_t &p, const std::vector< common::sfv_t > &P)
 
pair< size_t, double > min_dist (const common::sfv_t &p, const vector< common::sfv_t > &P)
 
std::pair< size_t, double > min_dist (const weighted_point &d1, const wplist &P)
 
common::sfv_t scalar_dot (const common::sfv_t &p, double s)
 
void scalar_mul_and_add (const common::sfv_t &left, float s, common::sfv_t &right)
 
common::sfv_t sub (const common::sfv_t &p1, const common::sfv_t &p2)
 
double sum (const common::sfv_t &p)
 
double sum2 (const common::sfv_t &p)
 
void swap (weighted_point &p1, weighted_point &p2)
 

Typedef Documentation

Definition at line 37 of file types.hpp.

Definition at line 36 of file types.hpp.

Definition at line 34 of file types.hpp.

typedef std::vector<std::pair<std::string, wplist> > jubatus::core::clustering::diff_t

Definition at line 57 of file types.hpp.

Definition at line 33 of file gmm_types.hpp.

typedef Eigen::SparseMatrix<double> jubatus::core::clustering::eigen_smat_t

Definition at line 30 of file gmm_types.hpp.

typedef std::vector<jubatus::util::lang::shared_ptr<eigen_solver_t> > jubatus::core::clustering::eigen_solver_list_t

Definition at line 35 of file gmm_types.hpp.

typedef Eigen::SimplicialCholesky<eigen_smat_t> jubatus::core::clustering::eigen_solver_t

Definition at line 31 of file gmm_types.hpp.

Definition at line 32 of file gmm_types.hpp.

typedef Eigen::SparseVector<double> jubatus::core::clustering::eigen_svec_t

Definition at line 29 of file gmm_types.hpp.

Definition at line 42 of file gmm_types.hpp.

Definition at line 79 of file storage.hpp.

Definition at line 56 of file types.hpp.

Definition at line 55 of file types.hpp.

Enumeration Type Documentation

Enumerator
REVISION_CHANGE 
UPDATE 

Definition at line 34 of file storage.hpp.

Function Documentation

common::sfv_t jubatus::core::clustering::add ( const common::sfv_t p1,
const common::sfv_t p2 
)

Definition at line 89 of file util.cpp.

89  {
90  common::sfv_t ret;
91  common::sfv_t::const_iterator it1 = p1.begin();
92  common::sfv_t::const_iterator it2 = p2.begin();
93  while (it1 != p1.end() && it2 != p2.end()) {
94  if ((*it1).first < (*it2).first) {
95  ret.push_back((*it1));
96  ++it1;
97  } else if ((*it1).first > (*it2).first) {
98  ret.push_back((*it2));
99  ++it2;
100  } else {
101  ret.push_back(make_pair((*it1).first, (*it1).second + (*it2).second));
102  ++it1;
103  ++it2;
104  }
105  }
106  for (; it1 != p1.end(); ++it1) {
107  ret.push_back((*it1));
108  }
109  for (; it2 != p2.end(); ++it2) {
110  ret.push_back((*it2));
111  }
112 
113  return ret;
114 }
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
void jubatus::core::clustering::concat ( const wplist src,
wplist dst 
)
char jubatus::core::clustering::digit ( int  num,
int  r,
int  n 
)

Definition at line 37 of file util.cpp.

Referenced by jubatus::core::clustering::compressive_storage::is_next_bucket_full().

37  {
38  if (r < 0) {
39  return 0;
40  }
41  for (int i = 0; i < r; ++i) {
42  num /= n;
43  }
44  return num % n;
45 }

Here is the caller graph for this function:

double jubatus::core::clustering::dist ( const common::sfv_t p1,
const common::sfv_t p2 
)

Definition at line 151 of file util.cpp.

Referenced by dist(), jubatus::core::clustering::kmeans_clustering_method::do_batch_update(), jubatus::core::graph::graph_wo_index::get_diff_eigen_score(), jubatus::core::storage::lsh_index_storage::get_sorted_similar_rows(), jubatus::core::storage::lsh_probe_generator::init(), min_dist(), jubatus::core::nearest_neighbor::ranking_hamming_bit_vectors(), and jubatus::core::graph::graph_wo_index::update_spt_node().

151  {
152  double ret = 0;
153  common::sfv_t::const_iterator it1 = p1.begin();
154  common::sfv_t::const_iterator it2 = p2.begin();
155  while (it1 != p1.end() && it2 != p2.end()) {
156  int cmp = strcmp(it1->first.c_str(), it2->first.c_str());
157  if (cmp < 0) {
158  ret += it1->second * it1->second;
159  ++it1;
160  } else if (cmp > 0) {
161  ret += it2->second * it2->second;
162  ++it2;
163  } else {
164  ret += (it1->second - it2->second) * (it1->second - it2->second);
165  ++it1;
166  ++it2;
167  }
168  }
169  for (; it1 != p1.end(); ++it1) {
170  ret += std::pow(it1->second, 2);
171  }
172  for (; it2 != p2.end(); ++it2) {
173  ret += std::pow(it2->second, 2);
174  }
175  return std::sqrt(ret);
176 }

Here is the caller graph for this function:

double jubatus::core::clustering::dist ( const weighted_point d1,
const weighted_point d2 
)

Definition at line 178 of file util.cpp.

References jubatus::core::clustering::weighted_point::data, and dist().

178  {
179  return dist(d1.data, d2.data);
180 }
double dist(const weighted_point &d1, const weighted_point &d2)
Definition: util.cpp:178

Here is the call graph for this function:

void jubatus::core::clustering::dump_wplist ( const wplist src)
weighted_point jubatus::core::clustering::get_point ( size_t  d)
inline

Definition at line 38 of file testutil.hpp.

References jubatus::core::clustering::weighted_point::data, and jubatus::core::clustering::weighted_point::weight.

Referenced by get_points().

38  {
39  static mtrand r_(0);
40  weighted_point ret;
41  for (size_t i = 0; i < d; ++i) {
42  ret.data.push_back(make_pair(
43  lexical_cast<string, int>(i), r_.next_gaussian(0, 10)));
44  }
45  ret.weight = 1;
46  return ret;
47 }

Here is the caller graph for this function:

weighted_point jubatus::core::clustering::get_point ( size_t  d,
const std::vector< double > &  c 
)
inline

Definition at line 49 of file testutil.hpp.

References jubatus::core::clustering::weighted_point::data, and jubatus::core::clustering::weighted_point::weight.

49  {
50  static mtrand r_(0);
51  weighted_point ret;
52  for (size_t i = 0; i < d; ++i) {
53  ret.data.push_back(std::make_pair(
54  "#test_" + lexical_cast<string, int>(i),
55  c[i] + r_.next_gaussian(0, 10)));
56  }
57  ret.weight = 1;
58  return ret;
59 }
std::vector<weighted_point> jubatus::core::clustering::get_points ( size_t  n,
size_t  d 
)
inline

Definition at line 61 of file testutil.hpp.

References get_point().

61  {
62  std::vector<weighted_point> ret;
63  for (size_t i = 0; i < n; ++i) {
64  ret.push_back(get_point(d));
65  }
66  return ret;
67 }
weighted_point get_point(size_t d, const std::vector< double > &c)
Definition: testutil.hpp:49

Here is the call graph for this function:

std::vector<weighted_point> jubatus::core::clustering::get_points ( size_t  n,
size_t  d,
const std::vector< double > &  c 
)
inline

Definition at line 69 of file testutil.hpp.

References get_point().

70  {
71  std::vector<weighted_point> ret;
72  for (size_t i = 0; i < n; ++i) {
73  ret.push_back(get_point(d, c));
74  }
75  return ret;
76 }
weighted_point get_point(size_t d, const std::vector< double > &c)
Definition: testutil.hpp:49

Here is the call graph for this function:

std::pair<size_t, double> jubatus::core::clustering::min_dist ( const common::sfv_t p,
const std::vector< common::sfv_t > &  P 
)
pair<size_t, double> jubatus::core::clustering::min_dist ( const common::sfv_t p,
const vector< common::sfv_t > &  P 
)

Definition at line 182 of file util.cpp.

References dist().

Referenced by jubatus::core::clustering::compressor::kmeans_compressor::bicriteria_to_coreset(), jubatus::core::clustering::kmeans_clustering_method::do_batch_update(), jubatus::core::clustering::compressor::kmeans_compressor::get_bicriteria(), jubatus::core::clustering::kmeans_clustering_method::get_clusters(), jubatus::core::clustering::kmeans_clustering_method::get_nearest_center_index(), and jubatus::core::clustering::kmeans_clustering_method::initialize_centers().

184  {
185  size_t idx = 0;
186  double mindist = DBL_MAX;
187  for (vector<common::sfv_t>::const_iterator it = P.begin();
188  it != P.end(); ++it) {
189  double d = dist(p, *it);
190  if (mindist > d) {
191  idx = it - P.begin();
192  mindist = d;
193  }
194  }
195  return std::make_pair(idx, mindist);
196 }
double dist(const weighted_point &d1, const weighted_point &d2)
Definition: util.cpp:178

Here is the call graph for this function:

Here is the caller graph for this function:

std::pair< size_t, double > jubatus::core::clustering::min_dist ( const weighted_point d1,
const wplist P 
)

Definition at line 198 of file util.cpp.

References dist().

198  {
199  double md = DBL_MAX;
200  size_t midx = 0;
201  for (wplist::const_iterator it = P.begin(); it != P.end(); ++it) {
202  double d = dist((*it), d1);
203  if (md > d) {
204  midx = it - P.begin();
205  md = d;
206  }
207  }
208  return std::make_pair(midx, md);
209 }
double dist(const weighted_point &d1, const weighted_point &d2)
Definition: util.cpp:178

Here is the call graph for this function:

common::sfv_t jubatus::core::clustering::scalar_dot ( const common::sfv_t p,
double  s 
)

Definition at line 143 of file util.cpp.

Referenced by jubatus::core::clustering::kmeans_clustering_method::do_batch_update().

143  {
144  common::sfv_t ret;
145  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it) {
146  ret.push_back(make_pair((*it).first, (*it).second*s));
147  }
148  return ret;
149 }
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29

Here is the caller graph for this function:

void jubatus::core::clustering::scalar_mul_and_add ( const common::sfv_t left,
float  s,
common::sfv_t right 
)

Definition at line 62 of file util.cpp.

Referenced by jubatus::core::clustering::kmeans_clustering_method::do_batch_update().

65  {
66  common::sfv_t::const_iterator l = left.begin();
67  common::sfv_t::iterator r = right.begin();
68  while (l != left.end() && r != right.end()) {
69  if (l->first < r->first) {
70  std::pair<std::string, float> p = *l;
71  p.second *= s;
72  r = right.insert(r, p);
73  ++l;
74  } else if (l->first > r->first) {
75  ++r;
76  } else {
77  r->second += l->second * s;
78  ++l;
79  ++r;
80  }
81  }
82  for (; l != left.end(); ++l) {
83  std::pair<std::string, float> p = *l;
84  p.second *= s;
85  right.push_back(p);
86  }
87 }

Here is the caller graph for this function:

common::sfv_t jubatus::core::clustering::sub ( const common::sfv_t p1,
const common::sfv_t p2 
)

Definition at line 116 of file util.cpp.

116  {
117  common::sfv_t ret;
118  common::sfv_t::const_iterator it1 = p1.begin();
119  common::sfv_t::const_iterator it2 = p2.begin();
120  while (it1 != p1.end() && it2 != p2.end()) {
121  if ((*it1).first < (*it2).first) {
122  ret.push_back((*it1));
123  ++it1;
124  } else if ((*it1).first > (*it2).first) {
125  ret.push_back(make_pair((*it2).first, -(*it2).second));
126  ++it2;
127  } else {
128  ret.push_back(make_pair((*it1).first, (*it1).second - (*it2).second));
129  ++it1;
130  ++it2;
131  }
132  }
133  for (; it1 != p1.end(); ++it1) {
134  ret.push_back((*it1));
135  }
136  for (; it2 != p2.end(); ++it2) {
137  ret.push_back(make_pair((*it2).first, -(*it2).second));
138  }
139 
140  return ret;
141 }
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
double jubatus::core::clustering::sum ( const common::sfv_t p)

Definition at line 47 of file util.cpp.

Referenced by jubatus::core::clustering::gmm::cluster_probs(), and jubatus::core::storage::feature_fabssum().

47  {
48  double s = 0;
49  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it) {
50  s += (*it).second;
51  }
52  return s;
53 }

Here is the caller graph for this function:

double jubatus::core::clustering::sum2 ( const common::sfv_t p)

Definition at line 54 of file util.cpp.

54  {
55  double s = 0;
56  for (common::sfv_t::const_iterator it = p.begin(); it != p.end(); ++it) {
57  s += std::pow((*it).second, 2);
58  }
59  return s;
60 }
void jubatus::core::clustering::swap ( weighted_point p1,
weighted_point p2 
)
inline