jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
onig_filter.cpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2013 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 
18 #include "onig_filter.hpp"
19 #include <string>
20 #include <sstream>
21 #include "exception.hpp"
22 
23 namespace jubatus {
24 namespace core {
25 namespace fv_converter {
26 
27 regexp_filter::regexp_filter(const std::string& regexp,
28  const std::string& replace)
29  : reg_(NULL),
30  replace_(replace) {
31  const UChar* pattern = reinterpret_cast<const UChar*>(regexp.c_str());
32  if (ONIG_NORMAL != onig_new(&reg_, pattern, pattern + regexp.size(),
33  ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PERL, NULL)) {
34  throw JUBATUS_EXCEPTION(converter_exception(
35  "invalid regular expression: " + regexp));
36  }
37 
38  int num_captures = onig_number_of_captures(reg_);
39  for (std::size_t i = 0; i < replace.size(); ++i) {
40  char c = replace[i];
41  if (c == '\\') {
42  if (i == replace.size() - 1) {
43  throw JUBATUS_EXCEPTION(converter_exception(
44  "invalid replacement expression. 0-9 or \\ are required after \\"));
45  }
46  ++i;
47  c = replace[i];
48  if (c == '\\') {
49  } else if ('0' <= c && c <= '9') {
50  int group = c - '0';
51  if (group > num_captures) {
52  throw JUBATUS_EXCEPTION(converter_exception(
53  "invalid number of capture group"));
54  }
55  } else {
56  throw JUBATUS_EXCEPTION(converter_exception(
57  "invalid replacement expression. 0-9 or \\ are required after \\"));
58  }
59  }
60  }
61 }
62 
64  if (reg_) {
65  onig_free(reg_);
66  }
67 }
68 
70  const std::string& input,
71  const OnigRegion* region,
72  std::ostream& out) const {
73  for (std::size_t i = 0; i < replace_.size(); ++i) {
74  char c = replace_[i];
75  if (c == '\\') {
76  ++i;
77  if (i > replace_.size() - 1) {
78  // This exception must not be called, because replace string is checked
79  // in the constructor
81  "invalid replacement expression. 0-9 or \\ are required after \\"));
82  }
83  c = replace_[i];
84  if (c == '\\') {
85  out << '\\';
86  } else if ('0' <= c && c <= '9') {
87  int group = c - '0';
88  std::size_t len = region->end[group] - region->beg[group];
89  out << input.substr(region->beg[group], len);
90  } else {
91  // This exception must not be called, because replace string is checked
92  // in the constructor
94  "invalid replacement expression. 0-9 or \\ are required after \\"));
95  }
96  } else {
97  out << c;
98  }
99  }
100 }
101 
103  const std::string& input, std::string& output) const {
104  std::stringstream ss;
105 
106  const UChar* head = reinterpret_cast<const UChar*>(input.c_str());
107  const UChar* cur = head, *end = head + input.size();
108 
109  OnigRegion* region = onig_region_new();
110 
111  // We need to check when cur == end as "$" matches to the eos.
112  while (cur <= end) {
113  int r = onig_match(reg_, head, end, cur, region, ONIG_OPTION_NONE);
114  if (r >= 0) {
115  replace(input, region, ss);
116  cur += r;
117  }
118  onig_region_clear(region);
119  // If the pattern didn't match or mached an empty string, proceed the
120  // pointer forcely.
121  if (r <= 0) {
122  if (cur < end)
123  ss << *cur;
124  ++cur;
125  }
126  }
127  onig_region_free(region, 1);
128 
129  output = ss.str();
130 }
131 
132 } // namespace fv_converter
133 } // namespace core
134 } // namespace jubatus
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79
void replace(const std::string &input, const OnigRegion *region, std::ostream &out) const
Definition: onig_filter.cpp:69
void filter(const std::string &input, std::string &output) const