24 #include "../storage/column_table.hpp"
25 #include "jubatus/util/concurrent/lock.h"
27 using jubatus::util::lang::shared_ptr;
28 using jubatus::util::data::unordered_set;
29 using jubatus::util::concurrent::scoped_lock;
33 namespace classifier {
36 std::string make_id_from_label(
const std::string& label,
37 jubatus::util::math::random::mtrand& rand) {
39 std::string result = label;
40 result.reserve(label.size() + 1 + n);
41 result.push_back(
'_');
42 for (
size_t i = 0; i < n; ++i) {
43 int r = rand.next_int(26 * 2 + 10);
45 result.push_back(
'a' + r);
46 }
else if (r < 26 * 2) {
47 result.push_back(
'A' + (r - 26));
49 result.push_back(
'0' + (r - 26 * 2));
55 std::string get_label_from_id(
const std::string&
id) {
56 size_t pos =
id.find_last_of(
"_");
57 return id.substr(0, pos);
76 shared_ptr<nearest_neighbor::nearest_neighbor_base> engine,
79 : nearest_neighbor_engine_(engine), k_(k), alpha_(alpha) {
82 "local_sensitivity should >= 0"));
91 id = make_id_from_label(label,
rand_);
97 "no more space available to add new ID: " +
id));
105 shared_ptr<unlearner::unlearner_base> label_unlearner) {
114 float max_score = -FLT_MAX;
115 std::string max_class;
116 for (std::vector<classify_result_elem>::const_iterator it = result.begin();
117 it != result.end(); ++it) {
118 if (it == result.begin() || it->score > max_score) {
119 max_score = it->score;
120 max_class = it->label;
128 std::vector<std::pair<std::string, float> > ids;
131 std::map<std::string, float> m;
132 for (unordered_set<std::string>::const_iterator iter =
labels_.begin();
133 iter !=
labels_.end(); ++iter) {
134 m.insert(std::make_pair(*iter, 0));
136 for (
size_t i = 0; i < ids.size(); ++i) {
137 std::string label = get_label_from_id(ids[i].first);
138 m[label] += std::exp(-
alpha_ * ids[i].second);
142 for (std::map<std::string, float>::const_iterator iter = m.begin();
143 iter != m.end(); ++iter) {
145 scores.push_back(elem);
150 if (
labels_.erase(label) == 0) {
154 shared_ptr<storage::column_table> table =
157 std::vector<std::string> ids_to_be_deleted;
158 for (
size_t i = 0, n = table->size(); i < n; ++i) {
159 std::string
id = table->get_key(i);
160 std::string l = get_label_from_id(
id);
162 ids_to_be_deleted.push_back(
id);
166 for (
size_t i = 0, n = ids_to_be_deleted.size(); i < n; ++i) {
167 const std::string&
id = ids_to_be_deleted[i];
168 table->delete_row(
id);
186 std::vector<std::string> result;
187 for (unordered_set<std::string>::const_iterator iter =
labels_.begin();
188 iter !=
labels_.end(); ++iter) {
189 result.push_back(*iter);
195 return labels_.insert(label).second;
203 std::map<std::string, std::string>& status)
const {
212 for (unordered_set<std::string>::const_iterator iter =
labels_.begin();
213 iter !=
labels_.end(); ++iter) {
219 if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
220 throw msgpack::type_error();
224 msgpack::object labels = o.via.array.ptr[1];
225 if (labels.type != msgpack::type::ARRAY) {
226 throw msgpack::type_error();
228 for (
size_t i = 0; i < labels.via.array.size; ++i) {
230 labels.via.array.ptr[i].convert(&label);
jubatus::util::concurrent::mutex unlearner_mutex_
std::vector< classify_result_elem > classify_result
jubatus::util::lang::shared_ptr< unlearner::unlearner_base > unlearner_
nearest_neighbor_classifier * classifier_
nearest_neighbor_classifier(jubatus::util::lang::shared_ptr< nearest_neighbor::nearest_neighbor_base > nearest_neighbor_engine, size_t k, float alpha)
bool set_label(const std::string &label)
jubatus::util::math::random::mtrand rand_
#define JUBATUS_EXCEPTION(e)
std::vector< std::string > get_labels() const
void get_status(std::map< std::string, std::string > &status) const
bool delete_label(const std::string &label)
void train(const common::sfv_t &fv, const std::string &label)
void pack(framework::packer &pk) const
void unpack(msgpack::object o)
void operator()(const std::string &id)
jubatus::util::concurrent::mutex rand_mutex_
framework::mixable * get_mixable()
void set_label_unlearner(jubatus::util::lang::shared_ptr< unlearner::unlearner_base > label_unlearner)
unlearning_callback(nearest_neighbor_classifier *classifier)
jubatus::util::data::unordered_set< std::string > labels_
void classify_with_scores(const common::sfv_t &fv, classify_result &scores) const
std::vector< std::pair< std::string, float > > sfv_t
std::string classify(const common::sfv_t &fv) const
jubatus::util::lang::function< void(std::string)> unlearning_callback
void unlearn_id(const std::string &id)
jubatus::util::lang::shared_ptr< nearest_neighbor::nearest_neighbor_base > nearest_neighbor_engine_