26 #include "../storage/fixed_size_heap.hpp"
28 using std::istringstream;
30 using std::ostringstream;
48 const std::string& row,
49 const std::string& column,
56 float cur_val =
get(row, column);
68 const string& column)
const {
91 const std::string& row,
100 tbl_t::const_iterator it = tbl.find(row);
101 if (it == tbl.end()) {
105 if (it_row == it->second.end()) {
109 return it_row->second;
115 const std::string& row,
116 const std::string& column) {
122 set(row, column, 0.f);
134 tbl_t::iterator it =
inv_diff_.find(row);
137 if (it_row != it->second.end()) {
138 it->second.erase(it_row);
139 if (it->second.empty()) {
158 std::vector<std::string>& ids)
const {
160 for (imap_float_t::const_iterator it =
column2norm_.begin();
175 vector<pair<string, float> > columns;
177 it2 != it->second.end(); ++it2) {
193 for (
size_t i = 0; i < ids.size(); ++i) {
194 const string& row = ids[i];
196 vector<pair<string, float> > columns;
198 for (
size_t j = 0; j < columns.size(); ++j) {
200 if (columns[j].second == 0.f) {
203 v[id] = columns[j].second;
209 for (map_float_t::const_iterator it = mixed_diff.
column2norm.begin();
225 for (
size_t i = 0; i < ids.size(); ++i) {
226 const string& row = ids[i];
228 vector<pair<string, float> > columns;
234 for (map_float_t::const_iterator it = lhs.
column2norm.begin();
250 vector<pair<string, float> >& scores,
251 size_t ret_num)
const {
253 if (query_norm == 0.f) {
258 for (
size_t i = 0; i < query.size(); ++i) {
259 const string& fid = query[i].first;
260 float val = query[i].second;
265 std::greater<pair<float, uint64_t> > > heap(ret_num);
266 for (
size_t i = 0; i < i_scores.size(); ++i) {
267 float score = i_scores[i];
273 float normed_score = score / norm / query_norm;
274 heap.
push(make_pair(normed_score, i));
276 vector<pair<float, uint64_t> > sorted_scores;
279 for (
size_t i = 0; i < sorted_scores.size() && i < ret_num; ++i) {
282 sorted_scores[i].first));
288 for (
size_t i = 0; i < sfv.size(); ++i) {
289 ret += sfv[i].second * sfv[i].second;
291 return std::sqrt(ret);
298 ret += it_diff->second;
300 imap_float_t::const_iterator it =
column2norm_.find(column_id);
304 return std::sqrt(ret);
308 const std::string& row,
310 std::vector<float>& scores)
const {
311 tbl_t::const_iterator it_diff =
inv_diff_.find(row);
313 const row_t& row_v = it_diff->second;
316 scores[row_it->first] += row_it->second * val;
320 tbl_t::const_iterator it =
inv_.find(row);
321 if (it !=
inv_.end()) {
322 const row_t& row_v = it->second;
326 scores[row_it->first] += row_it->second * val;
329 const row_t& row_diff_v = it_diff->second;
332 if (row_diff_v.
find(row_it->first) == row_diff_v.
end()) {
333 scores[row_it->first] += row_it->second * val;
341 return string(
"inverted_index_storage");
uint64_t get_id_const(const std::string &key) const
const_iterator find(const K &key) const
void get_row(const std::string &row, std::vector< std::pair< std::string, float > > &columns) const
void get_sorted(std::vector< T > &v) const
float calc_columnl2norm(uint64_t column_id) const
void calc_scores(const common::sfv_t &sfv, std::vector< std::pair< std::string, float > > &scores, size_t ret_num) const
void unpack(msgpack::object o)
void set(const std::string &row, const std::string &column, float val)
common::key_manager column2id_
imap_float_t column2norm_diff_
bool put_diff(const diff_type &mixed_diff)
const_iterator begin() const
iterator erase(const K &key)
uint64_t get_max_id() const
data_type::const_iterator const_iterator
void pack(framework::packer &packer) const
void remove(const std::string &row, const std::string &column)
const std::string & get_key(const uint64_t id) const
float get_from_tbl(const std::string &row, uint64_t column_id, const tbl_t &tbl, bool &exist) const
jubatus::util::data::unordered_map< uint64_t, float > imap_float_t
static float calc_l2norm(const common::sfv_t &sfv)
msgpack::packer< jubatus_packer > packer
imap_float_t column2norm_
void get_all_column_ids(std::vector< std::string > &ids) const
uint64_t get_id(const std::string &key)
sparse_matrix_storage inv
jubatus::util::data::unordered_map< std::string, row_t > tbl_t
std::vector< std::pair< std::string, float > > sfv_t
void mix(const diff_type &lhs_str, diff_type &rhs_str) const
const_iterator end() const
void get_all_row_ids(std::vector< std::string > &ids) const
data_type::iterator iterator
~inverted_index_storage()
float get(const std::string &row, const std::string &column) const
void get_diff(diff_type &diff_str) const
void swap(key_manager &km)
void set_row(const std::string &row, const std::vector< std::pair< std::string, float > > &columns)
void add_inp_scores(const std::string &row, float val, std::vector< float > &scores) const