25 #include "jubatus/util/data/unordered_map.h"
26 #include "jubatus/util/lang/bind.h"
27 #include "jubatus/util/lang/shared_ptr.h"
28 #include "../common/exception.hpp"
29 #include "../storage/column_table.hpp"
30 #include "../framework/mixable_versioned_table.hpp"
31 #include "../nearest_neighbor/nearest_neighbor_base.hpp"
33 using jubatus::util::data::unordered_map;
34 using jubatus::util::data::unordered_set;
35 using jubatus::util::lang::shared_ptr;
36 using jubatus::util::lang::bind;
45 const uint32_t DEFAULT_NEIGHBOR_NUM = 10;
46 const uint32_t DEFAULT_REVERSE_NN_NUM = 30;
48 const size_t KDIST_COLUMN_INDEX = 0;
49 const size_t LRD_COLUMN_INDEX = 1;
51 shared_ptr<column_table> create_lof_table() {
53 std::vector<storage::column_type> schema(
59 float calculate_lof(
float lrd,
const std::vector<float>& neighbor_lrds) {
60 if (neighbor_lrds.empty()) {
61 return lrd == 0 ? 1 : std::numeric_limits<float>::infinity();
64 const float sum_neighbor_lrd = std::accumulate(
65 neighbor_lrds.begin(), neighbor_lrds.end(), 0.0f);
67 if (std::isinf(sum_neighbor_lrd) && std::isinf(lrd)) {
71 return sum_neighbor_lrd / (neighbor_lrds.size() * lrd);
78 reverse_nearest_neighbor_num(DEFAULT_REVERSE_NN_NUM) {
83 const std::string&
id,
84 shared_ptr<nearest_neighbor_base> nearest_neighbor_engine)
99 "nearest_neighbor_num <= reverse_nearest_neighbor_num"));
108 const std::string&
id,
109 shared_ptr<nearest_neighbor_base> nearest_neighbor_engine,
110 shared_ptr<unlearner::unlearner_base>
unlearner)
118 shared_ptr<column_table> lof_table = create_lof_table();
132 std::vector<float> neighbor_lrds;
135 return calculate_lof(lrd, neighbor_lrds);
139 std::vector<float> neighbor_lrds;
142 return calculate_lof(lrd, neighbor_lrds);
162 unordered_set<std::string> update_set;
165 if (table->exact_match(
id).first) {
176 update_set.insert(
id);
189 std::vector<framework::mixable*> mixables;
201 "no more space available to add new ID: " +
id));
216 unordered_set<std::string> reverse_knn;
218 reverse_knn.erase(key);
228 std::vector<float>& neighbor_lrds)
const {
229 std::vector<std::pair<std::string, float> > neighbors;
237 const std::string&
id,
238 std::vector<float>& neighbor_lrds)
const {
239 std::vector<std::pair<std::string, float> > neighbors;
244 for (
size_t i = 0; i < neighbors.size(); ++i) {
245 if (neighbors[i].first ==
id) {
246 std::swap(neighbors[i], neighbors.back());
247 neighbors.pop_back();
259 const std::vector<std::pair<std::string, float> >& neighbors,
260 std::vector<float>& neighbor_lrds)
const {
261 neighbor_lrds.resize(neighbors.size());
262 if (neighbors.empty()) {
263 return std::numeric_limits<float>::infinity();
267 std::vector<parameter> parameters(neighbors.size());
268 for (
size_t i = 0; i < neighbors.size(); ++i) {
270 neighbor_lrds[i] = parameters[i].lrd;
274 float sum_reachability = 0;
275 for (
size_t i = 0; i < neighbors.size(); ++i) {
277 sum_reachability += std::max(neighbors[i].second, parameters[i].kdist);
280 if (sum_reachability == 0) {
282 return std::numeric_limits<float>::infinity();
287 return neighbors.size() / sum_reachability;
291 const std::string& query,
292 unordered_set<std::string>& neighbors)
const {
293 std::vector<std::pair<std::string, float> > nn_result;
297 for (
size_t i = 0; i < nn_result.size(); ++i) {
298 neighbors.insert(nn_result[i].first);
305 table->get_float_column(KDIST_COLUMN_INDEX);
308 std::vector<uint64_t> ids;
309 ids.reserve(neighbors.size());
310 for (unordered_set<std::string>::const_iterator it = neighbors.begin();
311 it != neighbors.end(); ++it) {
312 const std::pair<bool, uint64_t> hit = table->exact_match(*it);
318 unordered_map<uint64_t, std::vector<std::pair<uint64_t, float> > >
323 std::vector<std::pair<std::string, float> > nn_result;
324 for (std::vector<uint64_t>::const_iterator it = ids.begin();
325 it != ids.end(); ++it) {
328 std::vector<std::pair<uint64_t, float> >& nn_indexes =
329 nested_neighbors[*it];
331 nn_indexes.reserve(nn_result.size());
332 for (
size_t i = 0; i < nn_result.size(); ++i) {
333 const std::pair<bool, uint64_t> hit =
334 table->exact_match(nn_result[i].first);
336 nn_indexes.push_back(std::make_pair(hit.second, nn_result[i].second));
340 kdist_column[*it] = nn_result.back().second;
345 for (std::vector<uint64_t>::const_iterator it = ids.begin();
346 it != ids.end(); ++it) {
347 const std::vector<std::pair<uint64_t, float> >& nn = nested_neighbors[*it];
350 const size_t length = std::min(
352 float sum_reachability = 0;
353 for (
size_t i = 0; i < length; ++i) {
354 sum_reachability += std::max(nn[i].second, kdist_column[nn[i].first]);
357 if (sum_reachability == 0) {
358 lrd = std::numeric_limits<float>::infinity();
360 lrd = length / sum_reachability;
363 lrd_column[*it] = lrd;
364 table->update_clock(*it, owner);
371 std::pair<bool, uint64_t> hit = table->exact_match(row);
374 "row \"" + row +
"\" not found in light_lof table"));
377 param.
kdist = table->get_float_column(KDIST_COLUMN_INDEX)[hit.second];
378 param.
lrd = table->get_float_column(LRD_COLUMN_INDEX)[hit.second];
383 packer.pack_array(2);
389 if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
390 throw msgpack::type_error();
float collect_lrds_from_neighbors(const std::vector< std::pair< std::string, float > > &neighbors, std::vector< float > &neighbor_lrd) const
jubatus::util::lang::shared_ptr< nearest_neighbor::nearest_neighbor_base > nearest_neighbor_engine_
jubatus::util::lang::shared_ptr< framework::mixable_versioned_table > mixable_scores_
int reverse_nearest_neighbor_num
void set_row(const std::string &id, const common::sfv_t &sfv)
parameter get_row_parameter(const std::string &row) const
void touch(const std::string &id)
void push_back(const T &value)
void pack(framework::packer &packer) const
light_lof(const config &config, const std::string &id, jubatus::util::lang::shared_ptr< nearest_neighbor::nearest_neighbor_base > nearest_neighbor_engine)
void update_row(const std::string &id, const sfv_diff_t &diff)
jubatus::util::lang::shared_ptr< framework::mixable_versioned_table > mixable_nearest_neighbor_
#define JUBATUS_EXCEPTION(e)
jubatus::util::data::optional< std::string > unlearner
void swap(weighted_point &p1, weighted_point &p2)
void clear_row(const std::string &id)
msgpack::packer< jubatus_packer > packer
float collect_lrds(const common::sfv_t &query, std::vector< float > &neighbor_lrds) const
float calc_anomaly_score(const common::sfv_t &query) const
void update_entries(const jubatus::util::data::unordered_set< std::string > &neighbors)
void unlearn(const std::string &id)
std::vector< std::pair< std::string, float > > sfv_t
jubatus::util::lang::shared_ptr< unlearner::unlearner_base > unlearner_
void collect_neighbors(const std::string &query, jubatus::util::data::unordered_set< std::string > &neighbors) const
std::vector< framework::mixable * > get_mixables() const
void get_all_row_ids(std::vector< std::string > &ids) const
void unpack(msgpack::object o)