jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
string_feature_factory.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2014 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
18 
19 #include <map>
20 #include <string>
21 #include "character_ngram.hpp"
22 #include "regexp_splitter.hpp"
23 #include "exception.hpp"
24 #include "util.hpp"
25 
26 using jubatus::util::lang::lexical_cast;
27 using jubatus::util::lang::shared_ptr;
28 
29 namespace jubatus {
30 namespace core {
31 namespace fv_converter {
32 
33 namespace {
34 
35 shared_ptr<character_ngram> create_character_ngram(
36  const param_t& params) {
37  int n = get_int_or_die(params, "char_num");
38  if (n <= 0) {
39  throw JUBATUS_EXCEPTION(converter_exception(
40  "char_num must be positive integer: " + lexical_cast<std::string>(n)));
41  }
42  size_t m = static_cast<size_t>(n);
43  return shared_ptr<character_ngram>(new character_ngram(m));
44 }
45 
46 const std::string& get(
47  const std::map<std::string, std::string>& args,
48  const std::string& key) {
49  std::map<std::string, std::string>::const_iterator it = args.find(key);
50  if (it == args.end()) {
51  throw JUBATUS_EXCEPTION(converter_exception("not found: " + key));
52  } else {
53  return it->second;
54  }
55 }
56 
57 int get_int_with_default(
58  const std::map<std::string, std::string>& args,
59  const std::string& key,
60  int default_value) {
61  if (args.count(key) == 0) {
62  return default_value;
63  }
64  std::string s = get(args, key);
65  try {
66  return jubatus::util::lang::lexical_cast<int>(s);
67  } catch (const std::bad_cast&) {
68  throw JUBATUS_EXCEPTION(
69  converter_exception("is not integer: " + key + " = " + s));
70  }
71 }
72 
73 shared_ptr<regexp_splitter >create_regexp(
74  const std::map<std::string, std::string>& args) {
75  std::string pattern = get(args, "pattern");
76  int group = get_int_with_default(args, "group", 0);
77  return shared_ptr<regexp_splitter>(new regexp_splitter(pattern, group));
78 }
79 
80 } // namespace
81 
82 shared_ptr<string_feature> string_feature_factory::create(
83  const std::string& name,
84  const param_t& params) const {
85  string_feature* p;
86  if (name == "ngram") {
87  return create_character_ngram(params);
88  } else if (name == "regexp") {
89  return create_regexp(params);
90  } else if (ext_ && (p = ext_(name, params))) {
91  return shared_ptr<string_feature>(p);
92  } else {
93  throw JUBATUS_EXCEPTION(
94  converter_exception(std::string("unknown splitter name: ") + name));
95  }
96 }
97 
98 } // namespace fv_converter
99 } // namespace core
100 } // namespace jubatus
std::map< std::string, std::string > param_t
Definition: type.hpp:28
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
jubatus::util::lang::shared_ptr< string_feature > create(const std::string &name, const param_t &params) const
int get_int_or_die(const std::map< std::string, std::string > &params, const std::string &key)
Definition: util.cpp:51