22 #include "jubatus/util/lang/cast.h"
25 using jubatus::util::lang::lexical_cast;
30 namespace fv_converter {
37 "'group' must be positive: " + lexical_cast<std::string>(group)));
40 const UChar* pattern =
reinterpret_cast<const UChar*
>(regexp.data());
41 if (ONIG_NORMAL != onig_new(&
reg_, pattern, pattern + regexp.size(),
42 ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_PERL, NULL)) {
44 "invalid regular expression: " + regexp));
47 const int num_capture = onig_number_of_captures(
reg_);
48 if (group > num_capture) {
49 std::string msg =
"regexp '" + regexp +
"' only contains "
50 + lexical_cast<std::string>(num_capture)
51 +
" groups, but 'group' is " + lexical_cast<std::string>(group);
63 const std::string& str,
64 std::vector<std::pair<size_t, size_t> >& bounds)
const {
66 const UChar* head =
reinterpret_cast<const UChar*
>(str.data());
67 const UChar* end = head + str.size();
69 OnigRegion* region = onig_region_new();
72 while (head + cur < end) {
74 = onig_match(
reg_, head, end, head + cur, region, ONIG_OPTION_NONE);
81 const int pos = region->beg[
group_];
82 const int len = region->end[
group_] - pos;
83 bounds.push_back(std::make_pair(pos, len));
91 onig_region_free(region, 1);
93 onig_region_free(region, 1);
regexp_splitter(const std::string ®exp, int group)
#define JUBATUS_EXCEPTION(e)
void split(const std::string &str, std::vector< std::pair< size_t, size_t > > &bounds) const