jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
burst.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2014 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_BURST_BURST_HPP_
18 #define JUBATUS_CORE_BURST_BURST_HPP_
19 
20 #include <string>
21 #include <vector>
22 #include "jubatus/util/data/unordered_map.h"
23 #include "jubatus/util/lang/scoped_ptr.h"
24 #include "jubatus/util/text/json.h"
25 
26 #include "../framework/mixable_helper.hpp"
27 #include "burst_result.hpp"
28 
29 namespace jubatus {
30 namespace core {
31 namespace burst {
32 
33 struct burst_options {
39 
41  window_batch_size,
42  batch_interval,
43  result_window_rotate_size,
44  max_reuse_batch_num,
45  costcut_threshold);
46 
47  template<class Ar>
48  void serialize(Ar& ar) {
49  ar & JUBA_MEMBER(window_batch_size)
50  & JUBA_MEMBER(batch_interval)
51  & JUBA_MEMBER(result_window_rotate_size)
52  & JUBA_MEMBER(max_reuse_batch_num)
53  & JUBA_MEMBER(costcut_threshold);
54  }
55 };
56 
58  double scaling_param;
59  double gamma;
60 
61  MSGPACK_DEFINE(scaling_param, gamma);
62 };
63 
65  std::string keyword;
66  double scaling_param;
67  double gamma;
68 };
69 
70 class burst {
71  public:
73  typedef jubatus::util::data::unordered_map<std::string, result_t> result_map;
74  typedef std::vector<keyword_with_params> keyword_list;
75 
76  explicit burst(const burst_options& options);
77  ~burst();
78 
79  bool add_keyword(const std::string& keyword,
80  const keyword_params& params,
81  bool processed_in_this_server);
82  bool remove_keyword(const std::string& keyword);
83  bool remove_all_keywords();
84  keyword_list get_all_keywords() const;
85  keyword_list get_processed_keywords() const;
86 
87  bool add_document(const std::string& str, double pos);
88 
89  void calculate_results();
90 
91  result_t get_result(const std::string& keyword) const;
92  result_t get_result_at(const std::string& keyword, double pos) const;
93 
94  // return results processed in this server; the other results are merged
95  result_map get_all_bursted_results() const;
96  result_map get_all_bursted_results_at(double pos) const;
97 
98  class diff_t {
99  friend class burst;
100  class impl_;
101  jubatus::util::lang::shared_ptr<const impl_> p_;
102  public:
103  diff_t() {}
104  void mix(const diff_t& mixed);
106  void msgpack_unpack(msgpack::object o);
107  };
108  void get_diff(diff_t& ret) const;
109  bool put_diff(const diff_t&);
110  static void mix(const diff_t& lhs, diff_t& ret) {
111  ret.mix(lhs);
112  }
113  // return true iff put_diff has been called
114  bool has_been_mixed() const;
115 
116  void set_processed_keywords(const std::vector<std::string>& keywords);
117 
118  void clear();
120  void pack(framework::packer& packer) const;
121  void unpack(msgpack::object o);
122 
123  private:
124  class impl_;
125  jubatus::util::lang::scoped_ptr<impl_> p_;
126 };
127 
130 
131 } // namespace burst
132 } // namespace core
133 } // namespace jubatus
134 
135 #endif // JUBATUS_CORE_BURST_BURST_HPP_
keyword_list get_processed_keywords() const
Definition: burst.cpp:544
storage::version get_version() const
Definition: burst.cpp:618
MSGPACK_DEFINE(window_batch_size, batch_interval, result_window_rotate_size, max_reuse_batch_num, costcut_threshold)
result_t get_result_at(const std::string &keyword, double pos) const
Definition: burst.cpp:563
result_map get_all_bursted_results() const
Definition: burst.cpp:568
burst(const burst_options &options)
Definition: burst.cpp:515
jubatus::util::lang::shared_ptr< const impl_ > p_
Definition: burst.hpp:100
burst_result result_t
Definition: burst.hpp:72
result_map get_all_bursted_results_at(double pos) const
Definition: burst.cpp:572
jubatus::util::lang::scoped_ptr< impl_ > p_
Definition: burst.hpp:124
keyword_list get_all_keywords() const
Definition: burst.cpp:539
void pack(framework::packer &packer) const
Definition: burst.cpp:622
void msgpack_pack(framework::packer &packer) const
Definition: burst.cpp:591
void mix(const diff_t &mixed)
Definition: burst.cpp:582
bool add_document(const std::string &str, double pos)
void msgpack_unpack(msgpack::object o)
Definition: burst.cpp:598
static void mix(const diff_t &lhs, diff_t &ret)
Definition: burst.hpp:110
void unpack(msgpack::object o)
Definition: burst.cpp:626
bool has_been_mixed() const
Definition: burst.cpp:609
bool put_diff(const diff_t &)
Definition: burst.cpp:605
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
std::vector< std::pair< std::string, wplist > > diff_t
Definition: types.hpp:57
void get_diff(diff_t &ret) const
Definition: burst.cpp:601
bool remove_keyword(const std::string &keyword)
jubatus::util::data::unordered_map< std::string, result_t > result_map
Definition: burst.hpp:73
void set_processed_keywords(const std::vector< std::string > &keywords)
MSGPACK_DEFINE(scaling_param, gamma)
bool add_keyword(const std::string &keyword, const keyword_params &params, bool processed_in_this_server)
framework::linear_mixable_helper< burst, burst::diff_t > mixable_burst
Definition: burst.hpp:129
result_t get_result(const std::string &keyword) const
Definition: burst.cpp:559
std::vector< keyword_with_params > keyword_list
Definition: burst.hpp:74