jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
Classes | Typedefs | Enumerations | Functions
jubatus::core::fv_converter Namespace Reference

Classes

class  add_filter
 
class  binary_feature
 
class  binary_feature_factory
 
struct  binary_rule
 
class  character_ngram
 
class  combination_add_feature
 
class  combination_feature
 
class  combination_feature_factory
 
class  combination_mul_feature
 
struct  combination_rule
 
struct  converter_config
 
class  converter_exception
 
class  counter
 
struct  datum
 
class  datum_to_fv_converter
 
class  datum_to_fv_converter_impl
 impl More...
 
class  exact_match
 
class  except_match
 
class  factory_extender
 
class  feature_hasher
 
struct  filter_rule
 
class  gaussian_normalization_filter
 
class  json_converter
 
class  key_matcher
 
class  key_matcher_factory
 
class  keyword_weights
 
class  libsvm_converter
 
class  linear_normalization_filter
 
class  match_all
 
class  msgpack_converter
 
class  num_feature
 
class  num_feature_factory
 
class  num_filter
 
class  num_filter_factory
 
class  num_log_feature
 
struct  num_rule
 
class  num_string_feature
 
class  num_value_feature
 
class  prefix_match
 
class  regexp_filter
 
class  regexp_match
 
class  regexp_splitter
 
class  sigmoid_normalization_filter
 
class  space_splitter
 
struct  splitter_weight_type
 
class  string_feature
 
struct  string_feature_element
 
class  string_feature_factory
 
class  string_filter
 
class  string_filter_factory
 
struct  string_rule
 
class  suffix_match
 
struct  versioned_weight_diff
 
class  weight_manager
 
class  without_split
 
class  word_splitter
 

Typedefs

typedef framework::linear_mixable_helper< weight_manager, versioned_weight_diffmixable_weight_manager
 
typedef std::map< std::string, std::string > param_t
 

Enumerations

enum  frequency_weight_type { FREQ_BINARY, TERM_FREQUENCY, LOG_TERM_FREQUENCY }
 
enum  term_weight_type { TERM_BINARY, IDF, WITH_WEIGHT_FILE }
 

Functions

int get_int_or_die (const std::map< std::string, std::string > &params, const std::string &key)
 
const std::string & get_or_die (const std::map< std::string, std::string > &params, const std::string &key)
 
std::string get_with_default (const std::map< std::string, std::string > &params, const std::string &key, const std::string &default_value)
 
void initialize_converter (const converter_config &config, datum_to_fv_converter &conv, const factory_extender *ext)
 
jubatus::util::lang::shared_ptr< datum_to_fv_convertermake_fv_converter (const converter_config &config, const factory_extender *extender)
 
void revert_feature (const common::sfv_t &fv, fv_converter::datum &data)
 
bool revert_num_value (const std::pair< std::string, float > &feature, std::pair< std::string, float > &num_value)
 
bool revert_string_value (const std::pair< std::string, float > &feature, std::pair< std::string, std::string > &string_value)
 

Typedef Documentation

Definition at line 29 of file mixable_weight_manager.hpp.

typedef std::map<std::string, std::string> jubatus::core::fv_converter::param_t

Definition at line 28 of file type.hpp.

Enumeration Type Documentation

Function Documentation

int jubatus::core::fv_converter::get_int_or_die ( const std::map< std::string, std::string > &  params,
const std::string &  key 
)

Definition at line 51 of file util.cpp.

References get_or_die(), and JUBATUS_EXCEPTION.

53  {
54  const std::string& s = get_or_die(params, key);
55  try {
56  return jubatus::util::lang::lexical_cast<int>(s);
57  } catch (const std::bad_cast&) {
58  throw JUBATUS_EXCEPTION(converter_exception(
59  std::string("\"" + key + "\" must be an integer value: " + s)));
60  }
61 }
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
const std::string & get_or_die(const std::map< std::string, std::string > &params, const std::string &key)
Definition: util.cpp:28

Here is the call graph for this function:

const std::string & jubatus::core::fv_converter::get_or_die ( const std::map< std::string, std::string > &  params,
const std::string &  key 
)

Definition at line 28 of file util.cpp.

References JUBATUS_EXCEPTION.

Referenced by get_int_or_die().

30  {
31  std::map<std::string, std::string>::const_iterator it = params.find(key);
32  if (it == params.end()) {
33  throw JUBATUS_EXCEPTION(converter_exception(
34  std::string("\"" + key + "\" is not specified in parameters")));
35  }
36  return it->second;
37 }
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79

Here is the caller graph for this function:

std::string jubatus::core::fv_converter::get_with_default ( const std::map< std::string, std::string > &  params,
const std::string &  key,
const std::string &  default_value 
)

Definition at line 39 of file util.cpp.

42  {
43  std::map<std::string, std::string>::const_iterator it = params.find(key);
44  if (it == params.end()) {
45  return default_value;
46  } else {
47  return it->second;
48  }
49 }
void jubatus::core::fv_converter::initialize_converter ( const converter_config config,
datum_to_fv_converter conv,
const factory_extender ext 
)

Definition at line 363 of file converter_config.cpp.

References jubatus::core::fv_converter::converter_config::binary_rules, jubatus::core::fv_converter::converter_config::binary_types, jubatus::core::fv_converter::datum_to_fv_converter::clear_rules(), jubatus::core::fv_converter::converter_config::combination_rules, jubatus::core::fv_converter::converter_config::combination_types, jubatus::core::fv_converter::factory_extender::create_binary_feature(), jubatus::core::fv_converter::factory_extender::create_combination_feature(), jubatus::core::fv_converter::factory_extender::create_num_feature(), jubatus::core::fv_converter::factory_extender::create_num_filter(), jubatus::core::fv_converter::factory_extender::create_string_feature(), jubatus::core::fv_converter::factory_extender::create_string_filter(), jubatus::core::fv_converter::converter_config::hash_max_size, JUBATUS_EXCEPTION, jubatus::core::fv_converter::converter_config::num_filter_rules, jubatus::core::fv_converter::converter_config::num_filter_types, jubatus::core::fv_converter::converter_config::num_rules, jubatus::core::fv_converter::converter_config::num_types, jubatus::core::fv_converter::datum_to_fv_converter::set_hash_max_size(), jubatus::core::fv_converter::converter_config::string_filter_rules, jubatus::core::fv_converter::converter_config::string_filter_types, jubatus::core::fv_converter::converter_config::string_rules, and jubatus::core::fv_converter::converter_config::string_types.

Referenced by make_fv_converter(), make_fv_converter(), and make_tf_idf_fv_converter().

366  {
367  using jubatus::util::lang::bind;
368  using jubatus::util::lang::_1;
369  using jubatus::util::lang::_2;
370 
371  if (config.hash_max_size.bool_test() && *config.hash_max_size.get() <= 0) {
372  stringstream msg;
373  msg << "hash_max_size must be positive, but is "
374  << *config.hash_max_size.get();
375  throw JUBATUS_EXCEPTION(converter_exception(msg.str()));
376  }
377 
378  map<string, string_filter_ptr> string_filters;
379  if (config.string_filter_types) {
380  string_filter_factory::create_function f;
381  if (ext) {
382  f = bind(&factory_extender::create_string_filter, ext, _1, _2);
383  }
384  init_string_filter_types(*config.string_filter_types, string_filters, f);
385  }
386 
387  map<string, num_filter_ptr> num_filters;
388  if (config.num_filter_types) {
389  num_filter_factory::create_function f;
390  if (ext) {
391  f = bind(&factory_extender::create_num_filter, ext, _1, _2);
392  }
393  init_num_filter_types(*config.num_filter_types, num_filters, f);
394  }
395 
396  map<string, string_feature_ptr> splitters;
397  register_default_string_types(splitters);
398  if (config.string_types) {
399  string_feature_factory::create_function f;
400  if (ext) {
401  f = bind(&factory_extender::create_string_feature, ext, _1, _2);
402  }
403  init_string_types(*config.string_types, splitters, f);
404  }
405 
406  map<string, num_feature_ptr> num_features;
407  register_default_num_types(num_features);
408  if (config.num_types) {
409  num_feature_factory::create_function f;
410  if (ext) {
411  f = bind(&factory_extender::create_num_feature, ext, _1, _2);
412  }
413  init_num_types(*config.num_types, num_features, f);
414  }
415 
416  map<string, binary_feature_ptr> binary_features;
417  if (config.binary_types) {
418  binary_feature_factory::create_function f;
419  if (ext) {
420  f = bind(&factory_extender::create_binary_feature, ext, _1, _2);
421  }
422  init_binary_types(*config.binary_types, binary_features, f);
423  }
424 
425  map<string, combination_feature_ptr> combination_features;
426  register_default_combination_types(combination_features);
427  if (config.combination_types) {
428  combination_feature_factory::create_function f;
429  if (ext) {
430  f = bind(&factory_extender::create_combination_feature, ext, _1, _2);
431  }
432  init_combination_types(*config.combination_types, combination_features, f);
433  }
434 
435  conv.clear_rules();
436  if (config.string_filter_rules) {
437  init_string_filter_rules(*config.string_filter_rules, string_filters, conv);
438  }
439  if (config.num_filter_rules) {
440  init_num_filter_rules(*config.num_filter_rules, num_filters, conv);
441  }
442  if (config.string_rules) {
443  init_string_rules(*config.string_rules, splitters, conv);
444  }
445  if (config.num_rules) {
446  init_num_rules(*config.num_rules, num_features, conv);
447  }
448  if (config.binary_rules) {
449  init_binary_rules(*config.binary_rules, binary_features, conv);
450  }
451  if (config.combination_rules) {
452  init_combination_rules(
453  *config.combination_rules,
454  combination_features, conv);
455  }
456 
457  if (config.hash_max_size.bool_test()) {
458  conv.set_hash_max_size(*config.hash_max_size.get());
459  }
460 }
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79

Here is the call graph for this function:

Here is the caller graph for this function:

jubatus::util::lang::shared_ptr< datum_to_fv_converter > jubatus::core::fv_converter::make_fv_converter ( const converter_config config,
const factory_extender extender 
)

Definition at line 462 of file converter_config.cpp.

References initialize_converter().

463  {
464  jubatus::util::lang::shared_ptr<fv_converter::datum_to_fv_converter>
465  converter(new fv_converter::datum_to_fv_converter);
466  fv_converter::initialize_converter(config, *converter, extender);
467  return converter;
468 }
void initialize_converter(const converter_config &config, datum_to_fv_converter &conv, const factory_extender *ext)

Here is the call graph for this function:

void jubatus::core::fv_converter::revert_feature ( const common::sfv_t fv,
fv_converter::datum data 
)

Definition at line 31 of file revert.cpp.

References jubatus::core::fv_converter::datum::num_values_, revert_num_value(), revert_string_value(), and jubatus::core::fv_converter::datum::string_values_.

Referenced by jubatus::core::driver::recommender::complete_row_from_datum(), jubatus::core::driver::recommender::complete_row_from_id(), jubatus::core::driver::recommender::decode_row(), and jubatus::core::driver::clustering::to_datum().

31  {
32  for (size_t i = 0; i < fv.size(); ++i) {
33  std::pair<std::string, float> num_value;
34  std::pair<std::string, std::string> string_value;
35  if (revert_num_value(fv[i], num_value)) {
36  data.num_values_.push_back(num_value);
37  } else if (revert_string_value(fv[i], string_value)) {
38  data.string_values_.push_back(string_value);
39  }
40  }
41 }
bool revert_string_value(const std::pair< std::string, float > &feature, std::pair< std::string, std::string > &string_value)
Definition: revert.cpp:78
bool revert_num_value(const std::pair< std::string, float > &feature, std::pair< std::string, float > &num_value)
Definition: revert.cpp:43

Here is the call graph for this function:

Here is the caller graph for this function:

bool jubatus::core::fv_converter::revert_num_value ( const std::pair< std::string, float > &  feature,
std::pair< std::string, float > &  num_value 
)

Definition at line 43 of file revert.cpp.

Referenced by revert_feature().

45  {
46  // Only 'num' features and 'str' features can be reverted.
47  // Formats of two features are below:
48  // ("<KEY_NAME>@num", value)
49  // ("<KEY_NAME>@str$<VALUE>", 1)
50  const std::string& key = feature.first;
51  float value = feature.second;
52  size_t at = key.rfind('@');
53  if (at == std::string::npos) {
54  return false;
55  }
56  std::string num_value_key(key.substr(0, at));
57  std::string feature_value(key.substr(at + 1));
58  const std::string str_prefix = "str$";
59  if (feature_value == "num") {
60  num_value.first.swap(num_value_key);
61  num_value.second = value;
62  return true;
63  } else if (starts_with(feature_value, str_prefix)) {
64  std::string val_string(feature_value.substr(str_prefix.size()));
65  try {
66  float val = jubatus::util::lang::lexical_cast<float>(val_string);
67  num_value.first.swap(num_value_key);
68  num_value.second = val;
69  return true;
70  } catch (const std::bad_cast&) {
71  return false;
72  }
73  } else {
74  return false;
75  }
76 }

Here is the caller graph for this function:

bool jubatus::core::fv_converter::revert_string_value ( const std::pair< std::string, float > &  feature,
std::pair< std::string, std::string > &  string_value 
)

Definition at line 78 of file revert.cpp.

Referenced by revert_feature().

80  {
81  // Format of string feature is
82  // "<KEY_NAME>$<VALUE>@<FEATURE_TYPE>#<SAMPLE_WEIGHT>/<GLOBAL_WEIGHT>"
83  const std::string& f = feature.first;
84  size_t sharp = f.rfind('#');
85  if (sharp == std::string::npos) {
86  return false;
87  }
88  size_t at = f.rfind('@', sharp);
89  if (at == std::string::npos) {
90  return false;
91  }
92  size_t dollar = f.find('$');
93  if (dollar == std::string::npos) {
94  return false;
95  }
96  if (f.substr(at + 1, sharp - at - 1) != "str") {
97  return false;
98  }
99 
100  std::string key(f.substr(0, dollar));
101  std::string value(f.substr(dollar + 1, at - dollar - 1));
102 
103  string_value.first.swap(key);
104  string_value.second.swap(value);
105  return true;
106 }

Here is the caller graph for this function: