jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
datum_to_fv_converter.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
18 
19 #include <cmath>
20 #include <iostream>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #include "jubatus/util/data/optional.h"
25 #include "jubatus/util/lang/shared_ptr.h"
26 #include "binary_feature.hpp"
27 #include "combination_feature.hpp"
28 #include "counter.hpp"
29 #include "datum.hpp"
30 #include "exception.hpp"
31 #include "feature_hasher.hpp"
32 #include "match_all.hpp"
34 #include "num_feature.hpp"
35 #include "num_filter.hpp"
36 #include "space_splitter.hpp"
37 #include "string_feature.hpp"
38 #include "string_filter.hpp"
39 #include "weight_manager.hpp"
40 #include "without_split.hpp"
41 
42 namespace jubatus {
43 namespace core {
44 namespace fv_converter {
45 
47 
49  private:
50  typedef jubatus::util::data::unordered_map<std::string, float> weight_t;
51 
53  jubatus::util::lang::shared_ptr<key_matcher> matcher_;
54  jubatus::util::lang::shared_ptr<string_filter> filter_;
55  std::string suffix_;
56 
57  void filter(const datum::sv_t& string_values, datum::sv_t& filtered) const {
58  for (size_t i = 0; i < string_values.size(); ++i) {
59  const std::pair<std::string, std::string>& value = string_values[i];
60  if (matcher_->match(value.first)) {
61  std::string out;
62  filter_->filter(value.second, out);
63  std::string dest = value.first + suffix_;
64  filtered.push_back(std::make_pair(dest, out));
65  }
66  }
67  }
68  };
69 
70  struct num_filter_rule {
71  jubatus::util::lang::shared_ptr<key_matcher> matcher_;
72  jubatus::util::lang::shared_ptr<num_filter> filter_;
73  std::string suffix_;
74 
75  void filter(const datum::nv_t& num_values, datum::nv_t& filtered) const {
76  for (size_t i = 0; i < num_values.size(); ++i) {
77  const std::pair<std::string, double>& value = num_values[i];
78  if (matcher_->match(value.first)) {
79  double out = filter_->filter(value.second);
80  std::string dest = value.first + suffix_;
81  filtered.push_back(std::make_pair(dest, out));
82  }
83  }
84  }
85  };
86 
88  std::string name_;
89  jubatus::util::lang::shared_ptr<key_matcher> matcher_;
90  jubatus::util::lang::shared_ptr<string_feature> splitter_;
91  std::vector<splitter_weight_type> weights_;
92 
94  const std::string& name,
95  jubatus::util::lang::shared_ptr<key_matcher> matcher,
96  jubatus::util::lang::shared_ptr<string_feature> splitter,
97  const std::vector<splitter_weight_type>& weights)
98  : name_(name),
99  matcher_(matcher),
100  splitter_(splitter),
101  weights_(weights) {
102  }
103  };
104 
106  std::string name_;
107  jubatus::util::lang::shared_ptr<key_matcher> matcher_;
108  jubatus::util::lang::shared_ptr<num_feature> feature_func_;
109 
111  const std::string& name,
112  jubatus::util::lang::shared_ptr<key_matcher> matcher,
113  jubatus::util::lang::shared_ptr<num_feature> feature_func)
114  : name_(name),
115  matcher_(matcher),
116  feature_func_(feature_func) {
117  }
118  };
119 
121  std::string name_;
122  jubatus::util::lang::shared_ptr<key_matcher> matcher_;
123  jubatus::util::lang::shared_ptr<binary_feature> feature_func_;
124 
126  const std::string& name,
127  jubatus::util::lang::shared_ptr<key_matcher> matcher,
128  jubatus::util::lang::shared_ptr<binary_feature> feature_func)
129  : name_(name),
130  matcher_(matcher),
131  feature_func_(feature_func) {
132  }
133  };
134 
136  std::string name_;
137  jubatus::util::lang::shared_ptr<key_matcher> matcher_left_;
138  jubatus::util::lang::shared_ptr<key_matcher> matcher_right_;
139  jubatus::util::lang::shared_ptr<combination_feature> feature_func_;
140 
142  const std::string& name,
143  jubatus::util::lang::shared_ptr<key_matcher> matcher_left,
144  jubatus::util::lang::shared_ptr<key_matcher> matcher_right,
145  jubatus::util::lang::shared_ptr<combination_feature> feature_func)
146  : name_(name),
147  matcher_left_(matcher_left),
148  matcher_right_(matcher_right),
149  feature_func_(feature_func) {
150  }
151  };
152 
153  // binarys
154  std::vector<binary_feature_rule> binary_rules_;
155  std::vector<combination_feature_rule> combination_rules_;
156 
157  std::vector<string_filter_rule> string_filter_rules_;
158  std::vector<num_filter_rule> num_filter_rules_;
159  std::vector<string_feature_rule> string_rules_;
160  std::vector<num_feature_rule> num_rules_;
161 
162  jubatus::util::lang::shared_ptr<mixable_weight_manager> mixable_weights_;
163 
164  jubatus::util::data::optional<feature_hasher> hasher_;
165 
166  public:
168  : mixable_weights_(
170  jubatus::util::lang::shared_ptr<weight_manager>(
171  new weight_manager))) {
172  }
173 
174  void clear_rules() {
175  string_filter_rules_.clear();
176  num_filter_rules_.clear();
177  string_rules_.clear();
178  num_rules_.clear();
179  binary_rules_.clear();
180  combination_rules_.clear();
181  }
182 
184  jubatus::util::lang::shared_ptr<key_matcher> matcher,
185  jubatus::util::lang::shared_ptr<string_filter> filter,
186  const std::string& suffix) {
187  string_filter_rule rule = { matcher, filter, suffix };
188  string_filter_rules_.push_back(rule);
189  }
190 
192  jubatus::util::lang::shared_ptr<key_matcher> matcher,
193  jubatus::util::lang::shared_ptr<num_filter> filter,
194  const std::string& suffix) {
195  num_filter_rule rule = { matcher, filter, suffix };
196  num_filter_rules_.push_back(rule);
197  }
198 
200  const std::string& name,
201  jubatus::util::lang::shared_ptr<key_matcher> matcher,
202  jubatus::util::lang::shared_ptr<string_feature> splitter,
203  const std::vector<splitter_weight_type>& weights) {
204  string_rules_.push_back(
205  string_feature_rule(name, matcher, splitter, weights));
206  }
207 
209  const std::string& name,
210  jubatus::util::lang::shared_ptr<key_matcher> matcher,
211  jubatus::util::lang::shared_ptr<num_feature> feature_func) {
212  num_rules_.push_back(num_feature_rule(name, matcher, feature_func));
213  }
214 
216  const std::string& name,
217  jubatus::util::lang::shared_ptr<key_matcher> matcher,
218  jubatus::util::lang::shared_ptr<binary_feature> feature_func) {
219  binary_rules_.push_back(binary_feature_rule(name, matcher, feature_func));
220  }
221 
223  const std::string& name,
224  jubatus::util::lang::shared_ptr<key_matcher> matcher_left,
225  jubatus::util::lang::shared_ptr<key_matcher> matcher_right,
226  jubatus::util::lang::shared_ptr<combination_feature> feature_func) {
227  combination_rules_.push_back(
229  name,
230  matcher_left,
231  matcher_right,
232  feature_func));
233  }
234 
235  void add_weight(const std::string& key, float weight) {
236  jubatus::util::lang::shared_ptr<weight_manager> weights =
237  mixable_weights_->get_model();
238  if (weights) {
239  (*weights).add_weight(key, weight);
240  }
241  }
242 
243  void convert(const datum& datum, common::sfv_t& ret_fv) const {
244  common::sfv_t fv;
245  convert_unweighted(datum, fv);
246  jubatus::util::lang::shared_ptr<weight_manager> weights =
247  mixable_weights_->get_model();
248  if (weights) {
249  weights->get_weight(fv);
250  }
251 
253 
254  if (hasher_) {
255  hasher_->hash_feature_keys(fv);
256  }
257 
258  fv.swap(ret_fv);
259  }
260 
262  common::sfv_t fv;
263  convert_unweighted(datum, fv);
264  jubatus::util::lang::shared_ptr<weight_manager> weights =
265  mixable_weights_->get_model();
266  if (weights) {
267  weights->update_weight(fv);
268  weights->get_weight(fv);
269  }
270 
272 
273  if (hasher_) {
274  hasher_->hash_feature_keys(fv);
275  }
276 
277  fv.swap(ret_fv);
278  }
279 
280  void convert_unweighted(const datum& datum, common::sfv_t& ret_fv) const {
281  common::sfv_t fv;
282 
283  std::vector<std::pair<std::string, std::string> > filtered_strings;
284  filter_strings(datum.string_values_, filtered_strings);
285  convert_strings(datum.string_values_, fv);
286  convert_strings(filtered_strings, fv);
287 
288  std::vector<std::pair<std::string, double> > filtered_nums;
289  filter_nums(datum.num_values_, filtered_nums);
290  convert_nums(datum.num_values_, fv);
291  convert_nums(filtered_nums, fv);
292 
293  convert_binaries(datum.binary_values_, fv);
294 
295  fv.swap(ret_fv);
296  }
297 
299  const std::string& feature,
300  std::pair<std::string, std::string>& expect) const {
301  // format of string feature is
302  // "<KEY_NAME>$<VALUE>@<FEATURE_TYPE>#<SAMPLE_WEIGHT>/<GLOBAL_WEIGHT>"
303  size_t sharp = feature.rfind('#');
304  if (sharp == std::string::npos) {
305  throw JUBATUS_EXCEPTION(
306  converter_exception("this feature is not string feature"));
307  }
308  size_t at = feature.rfind('@', sharp);
309  if (at == std::string::npos) {
310  throw JUBATUS_EXCEPTION(
311  converter_exception("this feature is not valid feature"));
312  }
313  size_t dollar = feature.rfind('$', at);
314  if (dollar == std::string::npos) {
315  throw JUBATUS_EXCEPTION(
316  converter_exception("this feature is not valid feature"));
317  }
318  if (feature.substr(at + 1, sharp - at - 1) != "str") {
319  throw JUBATUS_EXCEPTION(
320  converter_exception("this feature is not revertible"));
321  }
322 
323  std::string key(feature.substr(0, dollar));
324  std::string value(feature.substr(dollar + 1, at - dollar - 1));
325 
326  expect.first.swap(key);
327  expect.second.swap(value);
328  }
329 
330  void set_hash_max_size(uint64_t hash_max_size) {
331  hasher_ = feature_hasher(hash_max_size);
332  }
333 
334  void set_weight_manager(jubatus::util::lang::shared_ptr<weight_manager> wm) {
335  mixable_weights_->set_model(wm);
336  }
337 
338  void clear_weights() {
339  jubatus::util::lang::shared_ptr<weight_manager> weights =
340  mixable_weights_->get_model();
341  if (weights) {
342  weights->clear();
343  }
344  }
345 
346  private:
348  const datum::sv_t& string_values,
349  datum::sv_t& filtered_values) const {
350  for (size_t i = 0; i < string_filter_rules_.size(); ++i) {
351  datum::sv_t update;
352  string_filter_rules_[i].filter(string_values, update);
353  string_filter_rules_[i].filter(filtered_values, update);
354 
355  filtered_values.insert(filtered_values.end(), update.begin(),
356  update.end());
357  }
358  }
359 
361  const datum::nv_t& num_values,
362  datum::nv_t& filtered_values) const {
363  for (size_t i = 0; i < num_filter_rules_.size(); ++i) {
364  datum::nv_t update;
365  num_filter_rules_[i].filter(num_values, update);
366  num_filter_rules_[i].filter(filtered_values, update);
367 
368  filtered_values.insert(
369  filtered_values.end(), update.begin(), update.end());
370  }
371  }
372 
373  void convert_strings(const datum::sv_t& string_values,
374  common::sfv_t& ret_fv) const {
375  for (size_t i = 0; i < string_rules_.size(); ++i) {
376  convert_strings(string_rules_[i], string_values, ret_fv);
377  }
378  }
379 
380  bool contains_idf(const string_feature_rule& s) const {
381  for (size_t i = 0; i < s.weights_.size(); ++i) {
382  if (s.weights_[i].term_weight_type_ == IDF) {
383  return true;
384  }
385  }
386  return false;
387  }
388 
390  const string_feature_rule& splitter,
391  const datum::sv_t& string_values,
392  common::sfv_t& ret_fv) const {
393  for (size_t j = 0; j < string_values.size(); ++j) {
394  const std::string& key = string_values[j].first;
395  const std::string& value = string_values[j].second;
397  count_words(splitter, key, value, counter);
398  for (size_t i = 0; i < splitter.weights_.size(); ++i) {
400  key, splitter.name_, splitter.weights_[i], counter, ret_fv);
401  }
402  }
403  }
404 
405  void convert_binaries(const datum::sv_t& binary_values,
406  common::sfv_t& ret_fv) const {
407  for (size_t i = 0; i < binary_rules_.size(); ++i) {
408  convert_binaries(binary_rules_[i], binary_values, ret_fv);
409  }
410  }
411 
413  const binary_feature_rule& feature,
414  const datum::sv_t& binary_values,
415  common::sfv_t& ret_fv) const {
416  for (size_t j = 0; j < binary_values.size(); ++j) {
417  const std::string& key = binary_values[j].first;
418  const std::string& value = binary_values[j].second;
419  if (feature.matcher_->match(key)) {
420  check_key(key);
421  feature.feature_func_->add_feature(key, value, ret_fv);
422  }
423  }
424  }
425 
426  static std::string make_feature(
427  const std::string& key,
428  const std::string& value,
429  const std::string& splitter,
430  const std::string& sample_weight,
431  const std::string& global_weight) {
432  check_key(key);
433  return key + "$" + value + "@" + splitter + "#" + sample_weight + "/" +
434  global_weight;
435  }
436 
437  static std::string make_feature_key(
438  const std::string& key,
439  const std::string& value,
440  const std::string& splitter) {
441  check_key(key);
442  return key + "$" + value + "@" + splitter;
443  }
444 
445  static void check_key(const std::string& key) {
446  if (key.find('$') != std::string::npos) {
447  throw JUBATUS_EXCEPTION(
448  converter_exception("feature key cannot contain '$': " + key));
449  }
450  }
451 
453  const string_feature_rule& splitter,
454  const std::string& key,
455  const std::string& value,
456  counter<std::string>& counter) const {
457  if (splitter.matcher_->match(key)) {
458  std::vector<string_feature_element> elements;
459  splitter.splitter_->extract(value, elements);
460 
461  for (size_t i = 0; i < elements.size(); i++) {
462  counter[elements[i].value] += elements[i].score;
463  }
464  }
465  }
466 
469  double tf,
470  std::string& name) const {
471  switch (type) {
472  case FREQ_BINARY:
473  name = "bin";
474  return 1.0;
475 
476  case TERM_FREQUENCY:
477  name = "tf";
478  return tf;
479 
480  case LOG_TERM_FREQUENCY:
481  name = "log_tf";
482  return std::log(1. + tf);
483 
484  default:
485  return 0;
486  }
487  }
488 
489  std::string get_global_weight_name(term_weight_type type) const {
490  switch (type) {
491  case TERM_BINARY:
492  return "bin";
493  case IDF:
494  return "idf";
495  case WITH_WEIGHT_FILE:
496  return "weight";
497  default:
498  throw JUBATUS_EXCEPTION(
500  "unknown global weight type"));
501  }
502  }
503 
505  const std::string& key,
506  const std::string& splitter_name,
507  const splitter_weight_type& weight_type,
508  const counter<std::string>& count,
509  common::sfv_t& ret_fv) const {
511  it != count.end(); ++it) {
512  std::string sample_weight_name;
513  double sample_weight = get_sample_weight(
514  weight_type.freq_weight_type_, it->second, sample_weight_name);
515 
516  std::string global_weight_name = get_global_weight_name(
517  weight_type.term_weight_type_);
518  float v = static_cast<float>(sample_weight);
519  if (v != 0.0) {
520  std::string f = make_feature(
521  key, it->first, splitter_name, sample_weight_name,
522  global_weight_name);
523  ret_fv.push_back(std::make_pair(f, v));
524  }
525  }
526  }
527 
528  void convert_nums(const datum::nv_t& num_values,
529  common::sfv_t& ret_fv) const {
530  for (size_t i = 0; i < num_values.size(); ++i) {
531  convert_num(num_values[i].first, num_values[i].second, ret_fv);
532  }
533  }
534 
535  void convert_num(const std::string& key,
536  double value,
537  common::sfv_t& ret_fv) const {
538  for (size_t i = 0; i < num_rules_.size(); ++i) {
539  const num_feature_rule& r = num_rules_[i];
540  if (r.matcher_->match(key)) {
541  check_key(key);
542  std::string k = key + "@" + r.name_;
543  r.feature_func_->add_feature(k, value, ret_fv);
544  }
545  }
546  }
547 
548  void convert_combinations(common::sfv_t& ret_fv) const {
549  const size_t original_size = ret_fv.size();
550 
551  if (original_size < 2) {
552  // Must have at least 2 features to generate combinations.
553  return;
554  }
555 
556  for (size_t i = 0; i < combination_rules_.size(); ++i) {
557  const combination_feature_rule& r = combination_rules_[i];
558  for (size_t j = 0 ; j < original_size - 1; ++j) {
559  for (size_t m = j + 1; m < original_size; ++m) {
560  if (r.matcher_left_->match(ret_fv[j].first)
561  && r.matcher_right_->match(ret_fv[m].first)) {
562  r.feature_func_->add_feature(
563  ret_fv[j].first + "&" + ret_fv[m].first + "/" + r.name_,
564  ret_fv[j].second,
565  ret_fv[m].second,
566  ret_fv);
567  }
568  }
569  }
570  }
571  }
572 };
573 
575  : pimpl_(new datum_to_fv_converter_impl()) {
576 }
577 
579 }
580 
582  common::sfv_t& ret_fv) const {
583  pimpl_->convert(datum, ret_fv);
584 }
585 
587  const datum& datum,
588  common::sfv_t& ret_fv) {
589  pimpl_->convert_and_update_weight(datum, ret_fv);
590 }
591 
593  pimpl_->clear_rules();
594 }
595 
597  jubatus::util::lang::shared_ptr<key_matcher> matcher,
598  jubatus::util::lang::shared_ptr<string_filter> filter,
599  const std::string& suffix) {
600  pimpl_->register_string_filter(matcher, filter, suffix);
601 }
602 
604  jubatus::util::lang::shared_ptr<key_matcher> matcher,
605  jubatus::util::lang::shared_ptr<num_filter> filter,
606  const std::string& suffix) {
607  pimpl_->register_num_filter(matcher, filter, suffix);
608 }
609 
611  const std::string& name,
612  jubatus::util::lang::shared_ptr<key_matcher> matcher,
613  jubatus::util::lang::shared_ptr<string_feature> splitter,
614  const std::vector<splitter_weight_type>& weights) {
615  pimpl_->register_string_rule(name, matcher, splitter, weights);
616 }
617 
619  const std::string& name,
620  jubatus::util::lang::shared_ptr<key_matcher> matcher,
621  jubatus::util::lang::shared_ptr<num_feature> feature_func) {
622  pimpl_->register_num_rule(name, matcher, feature_func);
623 }
624 
626  const std::string& name,
627  jubatus::util::lang::shared_ptr<key_matcher> matcher,
628  jubatus::util::lang::shared_ptr<binary_feature> feature_func) {
629  pimpl_->register_binary_rule(name, matcher, feature_func);
630 }
631 
633  const std::string& name,
634  jubatus::util::lang::shared_ptr<key_matcher> matcher_left,
635  jubatus::util::lang::shared_ptr<key_matcher> matcher_right,
636  jubatus::util::lang::shared_ptr<combination_feature> feature_func) {
637  pimpl_->register_combination_rule(
638  name,
639  matcher_left,
640  matcher_right,
641  feature_func);
642 }
643 
644 void datum_to_fv_converter::add_weight(const std::string& key, float weight) {
645  pimpl_->add_weight(key, weight);
646 }
647 
649  const std::string& feature,
650  std::pair<std::string, std::string>& expect) const {
651  pimpl_->revert_feature(feature, expect);
652 }
653 
654 void datum_to_fv_converter::set_hash_max_size(uint64_t hash_max_size) {
655  pimpl_->set_hash_max_size(hash_max_size);
656 }
657 
659  jubatus::util::lang::shared_ptr<weight_manager> wm) {
660  pimpl_->set_weight_manager(wm);
661 }
662 
664  pimpl_->clear_weights();
665 }
666 
667 } // namespace fv_converter
668 } // namespace core
669 } // namespace jubatus
void convert(const datum &datum, common::sfv_t &ret_fv) const
void make_string_features(const std::string &key, const std::string &splitter_name, const splitter_weight_type &weight_type, const counter< std::string > &count, common::sfv_t &ret_fv) const
void revert_feature(const std::string &feature, std::pair< std::string, std::string > &expect) const
string_feature_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< string_feature > splitter, const std::vector< splitter_weight_type > &weights)
bool contains_idf(const string_feature_rule &s) const
void convert(const datum &datum, common::sfv_t &ret_fv) const
void convert_strings(const string_feature_rule &splitter, const datum::sv_t &string_values, common::sfv_t &ret_fv) const
void convert_and_update_weight(const datum &datum, common::sfv_t &ret_fv)
void filter_strings(const datum::sv_t &string_values, datum::sv_t &filtered_values) const
void convert_binaries(const binary_feature_rule &feature, const datum::sv_t &binary_values, common::sfv_t &ret_fv) const
void set_weight_manager(jubatus::util::lang::shared_ptr< weight_manager > wm)
void register_string_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< string_feature > splitter, const std::vector< splitter_weight_type > &weights)
static std::string make_feature(const std::string &key, const std::string &value, const std::string &splitter, const std::string &sample_weight, const std::string &global_weight)
void register_num_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< num_feature > feature_func)
const_iterator begin() const
Definition: counter.hpp:59
void register_num_filter(jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< num_filter > filter, const std::string &suffix)
void register_string_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< string_feature > splitter, const std::vector< splitter_weight_type > &weights)
void register_binary_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< binary_feature > feature_func)
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
void register_string_filter(jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< string_filter > filter, const std::string &suffix)
void revert_feature(const std::string &feature, std::pair< std::string, std::string > &expect) const
jubatus::util::data::optional< feature_hasher > hasher_
void register_combination_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher_left, jubatus::util::lang::shared_ptr< key_matcher > matcher_right, jubatus::util::lang::shared_ptr< combination_feature > feature_func)
num_feature_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< num_feature > feature_func)
void convert_and_update_weight(const datum &datum, common::sfv_t &ret_fv)
void add_weight(const std::string &key, float weight)
combination_feature_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher_left, jubatus::util::lang::shared_ptr< key_matcher > matcher_right, jubatus::util::lang::shared_ptr< combination_feature > feature_func)
void filter(const datum::nv_t &num_values, datum::nv_t &filtered) const
void register_num_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< num_feature > feature_func)
void convert_num(const std::string &key, double value, common::sfv_t &ret_fv) const
void add_weight(const std::string &key, float weight)
std::vector< std::pair< std::string, std::string > > sv_t
Definition: datum.hpp:32
std::string get_global_weight_name(term_weight_type type) const
jubatus::util::lang::shared_ptr< mixable_weight_manager > mixable_weights_
void set_weight_manager(jubatus::util::lang::shared_ptr< weight_manager > wm)
std::vector< T > v(size)
void register_binary_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< binary_feature > feature_func)
void register_combination_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher_left, jubatus::util::lang::shared_ptr< key_matcher > matcher_right, jubatus::util::lang::shared_ptr< combination_feature > feature_func)
std::vector< combination_feature_rule > combination_rules_
std::vector< std::pair< std::string, float > > sfv_t
Definition: type.hpp:29
void convert_binaries(const datum::sv_t &binary_values, common::sfv_t &ret_fv) const
double get_sample_weight(frequency_weight_type type, double tf, std::string &name) const
void register_num_filter(jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< num_filter > filter, const std::string &suffix)
jubatus::util::lang::scoped_ptr< datum_to_fv_converter_impl > pimpl_
void filter_nums(const datum::nv_t &num_values, datum::nv_t &filtered_values) const
std::vector< std::pair< std::string, double > > nv_t
Definition: datum.hpp:33
const_iterator end() const
Definition: counter.hpp:67
binary_feature_rule(const std::string &name, jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< binary_feature > feature_func)
void count_words(const string_feature_rule &splitter, const std::string &key, const std::string &value, counter< std::string > &counter) const
jubatus::util::data::unordered_map< std::string, float > weight_t
void register_string_filter(jubatus::util::lang::shared_ptr< key_matcher > matcher, jubatus::util::lang::shared_ptr< string_filter > filter, const std::string &suffix)
void convert_strings(const datum::sv_t &string_values, common::sfv_t &ret_fv) const
static std::string make_feature_key(const std::string &key, const std::string &value, const std::string &splitter)
void filter(const datum::sv_t &string_values, datum::sv_t &filtered) const
void convert_nums(const datum::nv_t &num_values, common::sfv_t &ret_fv) const
void convert_unweighted(const datum &datum, common::sfv_t &ret_fv) const