jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
word_splitter.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2014 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "word_splitter.hpp"
18 
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 namespace jubatus {
24 namespace core {
25 namespace fv_converter {
26 
28  const std::string& text,
29  std::vector<string_feature_element>& result) const {
30  std::vector<std::pair<size_t, size_t> > boundaries;
31  split(text, boundaries);
32 
33  std::vector<string_feature_element> elements;
34  for (size_t i = 0; i < boundaries.size(); ++i) {
35  size_t begin = boundaries[i].first;
36  size_t length = boundaries[i].second;
37  elements.push_back(
39  begin,
40  length,
41  text.substr(begin, length),
42  1.0));
43  }
44  elements.swap(result);
45 }
46 
47 } // namespace fv_converter
48 } // namespace core
49 } // namespace jubatus
virtual void split(const std::string &string, std::vector< std::pair< size_t, size_t > > &ret_boundaries) const =0
void extract(const std::string &text, std::vector< string_feature_element > &result) const