jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
weight_manager.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2012 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "weight_manager.hpp"
18 
19 #include <cmath>
20 #include <string>
21 #include <utility>
22 #include "../common/type.hpp"
24 #include "jubatus/util/concurrent/lock.h"
25 
26 using jubatus::util::concurrent::scoped_lock;
27 
28 namespace jubatus {
29 namespace core {
30 namespace fv_converter {
31 
32 namespace {
33 
34 struct is_zero {
35  bool operator()(const std::pair<std::string, float>& p) {
36  return p.second == 0;
37  }
38 };
39 
40 } // namespace
41 
43 }
44 
46  : weights_(w) {
47 }
48 
50  const storage::version& v)
51  : weights_(w), version_(v) {
52 }
53 
56  if (version_ == target.version_) {
57  weights_.merge(target.weights_);
58  } else if (version_ < target.version_) {
59  weights_ = target.weights_;
60  version_ = target.version_;
61  }
62  return *this;
63 }
64 
66  : diff_weights_(),
67  master_weights_() {
68 }
69 
71  scoped_lock lk(mutex_);
73 }
74 
76  scoped_lock lk(mutex_);
77  for (common::sfv_t::iterator it = fv.begin(); it != fv.end(); ++it) {
78  double global_weight = get_global_weight(it->first);
79  it->second = static_cast<float>(it->second * global_weight);
80  }
81  fv.erase(remove_if(fv.begin(), fv.end(), is_zero()), fv.end());
82 }
83 
84 double weight_manager::get_global_weight(const std::string& key) const {
85  size_t p = key.find_last_of('/');
86  if (p == std::string::npos) {
87  return 1.0;
88  }
89  std::string type = key.substr(p + 1);
90  if (type == "bin") {
91  return 1.0;
92  } else if (type == "idf") {
93  double doc_count = get_document_count();
94  double doc_freq = get_document_frequency(key);
95  return std::log((doc_count + 1) / (doc_freq + 1));
96  } else if (type == "weight") {
97  p = key.find_last_of('#');
98  if (p == std::string::npos) {
99  return 0;
100  } else {
101  return get_user_weight(key.substr(0, p));
102  }
103  } else {
104  return 1;
105  }
106 }
107 
108 void weight_manager::add_weight(const std::string& key, float weight) {
109  scoped_lock lk(mutex_);
110  diff_weights_.add_weight(key, weight);
111 }
112 
113 } // namespace fv_converter
114 } // namespace core
115 } // namespace jubatus
size_t get_document_frequency(const std::string &key) const
double get_user_weight(const std::string &key) const
void get_weight(common::sfv_t &fv) const
versioned_weight_diff & merge(const versioned_weight_diff &target)
void update_document_frequency(const common::sfv_t &fv)
void add_weight(const std::string &key, float weight)
double get_global_weight(const std::string &key) const
std::vector< T > v(size)
void add_weight(const std::string &key, float weight)
void update_weight(const common::sfv_t &fv)
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29