jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
stat.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2012 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "stat.hpp"
18 #include <algorithm>
19 #include <cmath>
20 #include <string>
21 #include <utility>
22 #include "jubatus/util/system/time_util.h"
23 
24 using std::string;
25 using jubatus::util::system::time::clock_time;
26 using jubatus::util::system::time::get_clock_time;
27 
28 namespace jubatus {
29 namespace core {
30 namespace stat {
31 
32 stat::stat(size_t window_size)
33  : window_size_(window_size),
34  e_(0),
35  n_(0) {
36  if (!(1 <= window_size)) {
37  throw JUBATUS_EXCEPTION(
38  common::invalid_parameter("1 <= window_size"));
39  }
40 }
41 
42 stat::~stat() {
43 }
44 
45 void stat::get_diff(std::pair<double, size_t>& ret) const {
46  ret.first = 0;
47  ret.second = 0;
48 
49  for (jubatus::util::data::unordered_map<std::string, stat_val>::
50  const_iterator p = stats_.begin(); p != stats_.end(); ++p) {
51  double pr = p->second.n_;
52  ret.first += pr * std::log(pr);
53  ret.second += pr;
54  }
55 }
56 
57 bool stat::put_diff(const std::pair<double, size_t>& e) {
58  e_ = e.first;
59  n_ = e.second;
60  return true;
61 }
62 
63 void stat::mix(
64  const std::pair<double, size_t>& lhs,
65  std::pair<double, size_t>& ret) const {
66  ret.first += lhs.first;
67  ret.second += lhs.second;
68 }
69 
70 void stat::push(const std::string& key, double val) {
71  {
72  clock_time ct = get_clock_time();
73  window_.push_back(make_pair((uint64_t) ct, make_pair(key, val)));
74  stats_[key].add(val);
75  }
76  while (window_.size() > window_size_) {
77  string key = window_.front().second.first;
78  double val = window_.front().second.second;
79  stat_val& st = stats_[key];
80  window_.pop_front();
81  st.rem(val, key, *this);
82  if (st.n_ == 0) {
83  stats_.erase(key);
84  }
85  }
86 }
87 
88 double stat::sum(const std::string& key) const {
89  jubatus::util::data::unordered_map<std::string, stat_val>::const_iterator p =
90  stats_.find(key);
91  if (p == stats_.end()) {
92  throw JUBATUS_EXCEPTION(stat_error("sum: key " + key + " not found"));
93  }
94  return p->second.sum_;
95 }
96 
97 double stat::stddev(const std::string& key) const {
98  jubatus::util::data::unordered_map<std::string, stat_val>::const_iterator p =
99  stats_.find(key);
100  if (p == stats_.end()) {
101  throw JUBATUS_EXCEPTION(stat_error("stddev: key " + key + " not found"));
102  }
103  const stat_val& st = p->second;
104  return std::sqrt(moment(key, 2, st.sum_ / st.n_));
105 }
106 
107 double stat::max(const std::string& key) const {
108  jubatus::util::data::unordered_map<std::string, stat_val>::const_iterator p =
109  stats_.find(key);
110  if (p == stats_.end()) {
111  throw JUBATUS_EXCEPTION(stat_error("max: key " + key + " not found"));
112  }
113  const stat_val& st = p->second;
114  return st.max_;
115 }
116 
117 double stat::min(const std::string& key) const {
118  jubatus::util::data::unordered_map<std::string, stat_val>::const_iterator p =
119  stats_.find(key);
120  if (p == stats_.end()) {
121  throw JUBATUS_EXCEPTION(stat_error("min: key " + key + " not found"));
122  }
123  const stat_val& st = p->second;
124  return st.min_;
125 }
126 
127 double stat::entropy() const {
128  if (n_ == 0) {
129  // not MIXed ever yet
130  size_t total = 0;
131  for (jubatus::util::data::unordered_map<std::string, stat_val>::
132  const_iterator p = stats_.begin(); p != stats_.end(); ++p) {
133  total += p->second.n_;
134  }
135  double ret = 0;
136  for (jubatus::util::data::unordered_map<std::string, stat_val>::
137  const_iterator p = stats_.begin(); p != stats_.end(); ++p) {
138  double pr = p->second.n_ / static_cast<double>(total);
139  ret += pr * std::log(pr);
140  }
141  return -1.0 * ret;
142  }
143  double n = n_;
144  return std::log(n) - e_ / n_;
145 }
146 
147 double stat::moment(const std::string& key, int n, double c) const {
148  if (n < 0) {
149  return -1;
150  }
151  jubatus::util::data::unordered_map<std::string, stat_val>::const_iterator p =
152  stats_.find(key);
153  if (p == stats_.end()) {
154  throw JUBATUS_EXCEPTION(stat_error("moment: key " + key + " not found"));
155  }
156  const stat_val& st = p->second;
157 
158  if (n == 0) {
159  return 1;
160  }
161 
162  if (n == 1) {
163  return (st.sum_ - c * st.n_) / st.n_;
164  }
165 
166  if (n == 2) {
167  return (st.sum2_ - 2 * st.sum_ * c) / st.n_ + c * c;
168  }
169 
170  // fallback
171  double ret = 0;
172  for (size_t i = 0; i < window_.size(); ++i) {
173  if (window_[i].second.first != key) {
174  continue;
175  }
176  ret += std::pow(window_[i].second.second - c, n);
177  }
178  return ret / st.n_;
179 }
180 
181 void stat::clear() {
182  window_.clear();
183  stats_.clear();
184 }
185 
186 void stat::pack(framework::packer& packer) const {
187  packer.pack(*this);
188 }
189 void stat::unpack(msgpack::object o) {
190  o.convert(this);
191 }
192 std::string stat::type() const {
193  return "stat";
194 }
195 
196 } // namespace stat
197 } // namespace core
198 } // namespace jubatus
void rem(double d, const std::string &key, stat &st)
Definition: stat.hpp:113
std::deque< std::pair< uint64_t, std::pair< std::string, double > > > window_
Definition: stat.hpp:166
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
double moment(const std::string &key, int n, double c) const
Definition: stat.cpp:147
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
double sum(const common::sfv_t &p)
Definition: util.cpp:47
jubatus::util::data::unordered_map< std::string, stat_val > stats_
Definition: stat.hpp:167