jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
word_splitter.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_FV_CONVERTER_WORD_SPLITTER_HPP_
18 #define JUBATUS_CORE_FV_CONVERTER_WORD_SPLITTER_HPP_
19 
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "string_feature.hpp"
25 
26 namespace jubatus {
27 namespace core {
28 namespace fv_converter {
29 
30 class word_splitter : public string_feature {
31  public:
33  }
34  virtual ~word_splitter() {
35  }
36 
37  // Returns all word boundaries this splitter found.
38  // Each baoudary is represented as a pair of a beginning position
39  // and its length.
40  virtual void split(
41  const std::string& string,
42  std::vector<std::pair<size_t, size_t> >& ret_boundaries) const = 0;
43 
44  void extract(
45  const std::string& text,
46  std::vector<string_feature_element>& result) const;
47 };
48 
49 } // namespace fv_converter
50 } // namespace core
51 } // namespace jubatus
52 
53 #endif // JUBATUS_CORE_FV_CONVERTER_WORD_SPLITTER_HPP_
virtual void split(const std::string &string, std::vector< std::pair< size_t, size_t > > &ret_boundaries) const =0
void extract(const std::string &text, std::vector< string_feature_element > &result) const