jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
bit_vector.hpp
Go to the documentation of this file.
1 // Jubatus: Online machine learning framework for distributed environment
2 // Copyright (C) 2011 Preferred Networks and Nippon Telegraph and Telephone Corporation.
3 //
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License version 2.1 as published by the Free Software Foundation.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 // Lesser General Public License for more details.
12 //
13 // You should have received a copy of the GNU Lesser General Public
14 // License along with this library; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #ifndef JUBATUS_CORE_STORAGE_BIT_VECTOR_HPP_
18 #define JUBATUS_CORE_STORAGE_BIT_VECTOR_HPP_
19 
20 #include <stdint.h>
21 #include <algorithm>
22 #include <string>
23 #include <vector>
24 
25 #include <msgpack.hpp>
26 #include "jubatus/util/lang/cast.h"
27 #include "../common/assert.hpp"
28 #include "../common/exception.hpp"
29 #include "../common/big_endian.hpp"
30 #include "storage_exception.hpp"
31 
32 namespace jubatus {
33 namespace core {
34 namespace storage {
35 namespace detail {
36 
37 template <typename T, size_t N> struct bitcount_impl;
38 template <typename T>
39 struct bitcount_impl<T, 1> {
40  static int call(T bits) {
41  bits = (bits & 0x55) + (bits >> 1 & 0x55);
42  bits = (bits & 0x33) + (bits >> 2 & 0x33);
43  return (bits & 0x0f) + (bits >> 4 & 0x0f);
44  }
45 };
46 template <typename T>
47 struct bitcount_impl<T, 2> {
48  static int call(T bits) {
49  bits = (bits & 0x5555U) + (bits >> 1 & 0x5555U);
50  bits = (bits & 0x3333U) + (bits >> 2 & 0x3333U);
51  bits = (bits & 0x0f0fU) + (bits >> 4 & 0x0f0fU);
52  return (bits & 0x00ffU) + (bits >> 8 & 0x00ffU);
53  }
54 };
55 template <typename T>
56 struct bitcount_impl<T, 4> {
57  static int call(T bits) {
58  bits = (bits & 0x55555555LU) + (bits >> 1 & 0x55555555LU);
59  bits = (bits & 0x33333333LU) + (bits >> 2 & 0x33333333LU);
60  bits = (bits & 0x0f0f0f0fLU) + (bits >> 4 & 0x0f0f0f0fLU);
61  bits = (bits & 0x00ff00ffLU) + (bits >> 8 & 0x00ff00ffLU);
62  return (bits & 0x0000ffffLU) + (bits >>16 & 0x0000ffffLU);
63  }
64 };
65 template <typename T>
66 struct bitcount_impl<T, 8> {
67  static int call(T bits) {
68  bits = (bits & 0x5555555555555555LLU) + (bits >> 1 & 0x5555555555555555LLU);
69  bits = (bits & 0x3333333333333333LLU) + (bits >> 2 & 0x3333333333333333LLU);
70  bits = (bits & 0x0f0f0f0f0f0f0f0fLLU) + (bits >> 4 & 0x0f0f0f0f0f0f0f0fLLU);
71  bits = (bits & 0x00ff00ff00ff00ffLLU) + (bits >> 8 & 0x00ff00ff00ff00ffLLU);
72  bits = (bits & 0x0000ffff0000ffffLLU) + (bits >>16 & 0x0000ffff0000ffffLLU);
73  return (bits & 0x00000000ffffffffLLU) + (bits >>32 & 0x00000000ffffffffLLU);
74  }
75 };
76 
77 #ifdef __GNUG__
78 
79 inline int fast_bitcount(unsigned bits) {
80  return __builtin_popcount(bits);
81 }
82 
83 inline int fast_bitcount(unsigned long bits) {
84  return __builtin_popcountl(bits);
85 }
86 
87 inline int fast_bitcount(unsigned long long bits) {
88  return __builtin_popcountll(bits);
89 }
90 
91 #endif
92 
93 template <class T>
94 inline int bitcount_dispatcher(T bits) {
95 #ifdef __GNUG__
96  return fast_bitcount(bits);
97 #else
99 #endif
100 }
101 
102 inline int bitcount(unsigned bits) {
103  return bitcount_dispatcher(bits);
104 }
105 
106 inline int bitcount(unsigned long bits) {
107  return bitcount_dispatcher(bits);
108 }
109 
110 inline int bitcount(unsigned long long bits) {
111  return bitcount_dispatcher(bits);
112 }
113 
114 template <class T>
115 inline int bitcount(T); // = delete;
116 
117 } // namespace detail
118 
120  : public common::exception::jubaexception<bit_vector_unmatch_exception> {
121  public:
122  explicit bit_vector_unmatch_exception(const std::string &msg)
123  : msg_(msg) {}
125  const char* what() const throw() {
126  return msg_.c_str();
127  }
128 
129  private:
130  std::string msg_;
131 };
132 
133 template <typename bit_base>
134 struct bit_vector_base {
135  static const size_t BASE_BITS = sizeof(bit_base) * 8;
136  static const size_t BLOCKSIZE = sizeof(bit_base);
137 
139  : bits_(NULL), bit_num_(0), own_(false) {
140  }
141 
142  bit_vector_base(const bit_base* bits, size_t bit_num)
143  : bits_(NULL),
144  bit_num_(bit_num),
145  own_(false) {
146  alloc_memory();
147  memcpy(bits_, bits, used_bytes());
148  }
149 
150  explicit bit_vector_base(int bit_num)
151  : bits_(NULL),
152  bit_num_(bit_num),
153  own_(false) {
154  }
155 
157  : bits_(NULL),
158  bit_num_(orig.bit_num_),
159  own_(false) {
160  if (orig.bits_ == NULL) {
161  return;
162  }
163  alloc_memory();
164  memcpy(bits_, orig.bits_, used_bytes());
165  }
166 
168  if (bits_ && own_) {
169  release();
170  return;
171  }
172  }
173 
174  void resize_and_clear(uint64_t bit_num) {
175  release();
176  own_ = false;
177  bit_num_ = bit_num;
178  }
179 
180  bool operator==(const bit_vector_base& rhs) const {
181  if (bit_num_ != rhs.bit_num_) {
182  return false;
183  }
184  if (is_empty() && rhs.is_empty()) {
185  return true;
186  }
187  if (is_empty() && !rhs.is_empty()) {
188  return false;
189  }
190  if (!is_empty() && rhs.is_empty()) {
191  return false;
192  }
193  return memcmp(bits_, rhs.bits_, used_bytes()) == 0;
194  }
195  bool operator!=(const bit_vector_base& rhs) const {
196  return !this->operator==(rhs);
197  }
198 
199  // deep copy (In case not own memory, it alloc memory)
201  if (&orig != this) {
202  bit_num_ = orig.bit_num_;
203  if (bits_ == NULL) {
204  alloc_memory();
205  }
206  if (orig.bits_ == NULL) {
207  memset(bits_, 0, used_bytes());
208  } else {
209  memcpy(bits_, orig.bits_, used_bytes());
210  }
211  }
212  return *this;
213  }
214 
216  using std::swap;
217  swap(bit_num_, x.bit_num_);
218  swap(bits_, x.bits_);
219  swap(own_, x.own_);
220  }
221  friend void swap(bit_vector_base& l, bit_vector_base& r) {
222  l.swap(r);
223  }
224 
225  void clear_bit(size_t pos) {
226  if (bits_ == NULL) {
227  return;
228  }
229  bits_[pos / BASE_BITS] &= ~(1LLU << (pos % BASE_BITS));
230  }
231  void set_bit(size_t pos) {
232  if (bits_ == NULL) {
233  alloc_memory();
234  }
235  if (static_cast<size_t>(bit_num_) < pos) {
237  "set_bit(): invalid posison " +
238  jubatus::util::lang::lexical_cast<std::string>(pos) +
239  " for length: " +
240  jubatus::util::lang::lexical_cast<std::string>(bit_num_));
241  }
242  bits_[pos / BASE_BITS] |= (1LLU << (pos % BASE_BITS));
243  }
244  void reverse_bit(size_t pos) {
245  if (bits_ == NULL) {
246  alloc_memory();
247  }
248  if (bit_num_ < pos) {
250  "reverse_bit(): invalid posison " +
251  jubatus::util::lang::lexical_cast<std::string>(pos) +
252  " for length: " +
253  jubatus::util::lang::lexical_cast<std::string>(bit_num_));
254  }
255  bits_[pos / BASE_BITS] ^= (1LLU << (pos % BASE_BITS));
256  }
257 
258  bool get_bit(size_t pos) const {
259  if (bits_ == NULL) {
260  return false;
261  }
262  return bits_[pos / BASE_BITS] & (1LLU << (pos % BASE_BITS));
263  }
264  bool is_empty() const {
265  return bit_count() == 0;
266  }
267 
268  void clear() {
269  for (size_t i = 0; i < bit_num_; ++i) {
270  clear_bit(i);
271  }
272  }
273  uint64_t calc_hamming_similarity(const bit_vector_base& bv) const {
274  return bit_num() - calc_hamming_distance(bv);
275  }
276  uint64_t calc_hamming_distance(const bit_vector_base& bv) const {
277  if (bit_num() != bv.bit_num()) {
279  "calc_hamming_similarity(): bit_vector length unmatch! " +
280  jubatus::util::lang::lexical_cast<std::string>(bit_num()) + " with " +
281  jubatus::util::lang::lexical_cast<std::string>(bv.bit_num()));
282  }
283  if (is_empty() && bv.is_empty()) {
284  return 0;
285  } else if (is_empty()) {
286  return bv.bit_count();
287  } else if (bv.is_empty()) {
288  return bit_count();
289  }
290  size_t match_num = 0;
291  for (size_t i = 0; i < used_bytes() / sizeof(bit_base); ++i) {
292  match_num += detail::bitcount(bits_[i] ^ bv.bits_[i]);
293  }
294  return match_num;
295  }
296  size_t bit_count() const {
297  if (bits_ == NULL) {
298  return 0;
299  }
300  size_t result = 0;
301  for (int64_t i = (used_bytes() / sizeof(bit_base)) - 1; i >= 0; --i) {
302  result += detail::bitcount(bits_[i]);
303  }
304  return result;
305  }
306 
307  bit_base* raw_data_unsafe() {
308  return bits_;
309  }
310  const bit_base* raw_data_unsafe() const {
311  return bits_;
312  }
313  size_t bit_num() const {
314  return bit_num_;
315  }
316  size_t used_bytes() const {
317  return memory_size(bit_num_);
318  }
319  static size_t memory_size(size_t bit_width) {
320  return ((((bit_width + 7) / 8) + BLOCKSIZE - 1) / BLOCKSIZE) * BLOCKSIZE;
321  }
322 
323  void debug_print(std::ostream& os) const {
324  if (bits_ == NULL) {
325  os << "(unallocated)";
326  return;
327  }
328  for (uint64_t i = 0; i < used_bytes() * 8; ++i) {
329  if ((bits_[i / (sizeof(bit_base) * 8)] >> (i % (sizeof(bit_base) * 8))) &
330  1LLU) {
331  os << "1";
332  } else {
333  os << "0";
334  }
335  }
336  }
337  friend std::ostream& operator<<(std::ostream& os, const bit_vector_base& bv) {
338  bv.debug_print(os);
339  return os;
340  }
341  void status(std::ostream& os) const {
342  os << "status():";
343  if (bits_ == NULL) {
344  os << "(unallocated)" << " owns_" << own_;
345  return;
346  }
347  os << (own_ ? "[own]" : "[other]") << std::endl;
348  }
349 
350  template <typename packable>
351  void pack(packable& buffer) const {
352  msgpack::pack(buffer, *this);
353  }
354  template<typename Buffer>
356  packer.pack_array(2);
357  packer.pack(static_cast<uint64_t>(bit_num_));
358  packer.pack_raw(used_bytes());
359  if (bits_) {
360  const size_t n = used_bytes() / BLOCKSIZE;
361  for (size_t i = 0; i < n; ++i) {
362  char buf[BLOCKSIZE];
364  packer.pack_raw_body(buf, BLOCKSIZE);
365  }
366  } else {
367  const char c = 0;
368  for (size_t i = 0; i < used_bytes(); ++i) {
369  packer.pack_raw_body(&c, 1);
370  }
371  }
372  }
373  void msgpack_unpack(msgpack::object o) {
374  if (o.type != msgpack::type::ARRAY || o.via.array.size != 2) {
375  throw msgpack::type_error(); // like MSGPACK_DEFINE
376  }
377  const msgpack::object* objs = o.via.array.ptr;
378  const uint64_t bit_num = objs[0].as<uint64_t>();
379  const msgpack::type::raw_ref data = objs[1].as<msgpack::type::raw_ref>();
380  if (data.size != memory_size(bit_num)) {
382  "msgpack_unpack(): invalid length of packed data: "
383  "expected: " +
384  jubatus::util::lang::lexical_cast<std::string>(bit_num_) +
385  ", got: " +
386  jubatus::util::lang::lexical_cast<std::string>(data.size));
387  }
388  std::vector<bit_base> buf;
389  for (size_t i = 0; i < data.size; i += BLOCKSIZE) {
390  buf.push_back(common::read_big_endian<bit_base>(&data.ptr[i]));
391  }
392  bit_vector_base(&buf[0], bit_num).swap(*this);
393  JUBATUS_ASSERT(own_ || bits_ == NULL);
394  }
395 
396  void alloc_memory() {
398  bits_ = new bit_base[used_bytes()];
399  own_ = true;
400  memset(bits_, 0, used_bytes());
401  }
402 
403  private:
404  bool release() {
405  if (bits_ && own_) {
406  delete[] bits_;
407  own_ = false;
408  bits_ = NULL;
409  return true;
410  }
411  return false;
412  }
413 
414  // deep copy
415  void duplicate(const bit_vector_base& orig) {
416  if (bit_num_ != orig.bit_num_) {
418  "failed copy bit vector from " +
419  jubatus::util::lang::lexical_cast<std::string>(orig.bit_num_) +
420  " to " +
421  jubatus::util::lang::lexical_cast<std::string>(bit_num_));
422  }
423  if (!own_) {
424  alloc_memory();
425  }
426  memcpy(bits_, orig.bits_, used_bytes());
427  }
428 
429  bit_base* bits_;
430  size_t bit_num_;
431  bool own_;
432 };
434 
435 } // namespace storage
436 } // namespace core
437 } // namespace jubatus
438 
439 #endif // JUBATUS_CORE_STORAGE_BIT_VECTOR_HPP_
void debug_print(std::ostream &os) const
Definition: bit_vector.hpp:323
uint64_t calc_hamming_distance(const bit_vector_base &bv) const
Definition: bit_vector.hpp:276
static size_t memory_size(size_t bit_width)
Definition: bit_vector.hpp:319
void resize_and_clear(uint64_t bit_num)
Definition: bit_vector.hpp:174
bool operator==(const bit_vector_base &rhs) const
Definition: bit_vector.hpp:180
friend std::ostream & operator<<(std::ostream &os, const bit_vector_base &bv)
Definition: bit_vector.hpp:337
bit_vector_base(const bit_vector_base &orig)
Definition: bit_vector.hpp:156
void pack(packable &buffer) const
Definition: bit_vector.hpp:351
#define JUBATUS_ASSERT(expr)
Definition: assert.hpp:55
void write_big_endian(uint32_t x, char *buf)
Definition: big_endian.hpp:34
void status(std::ostream &os) const
Definition: bit_vector.hpp:341
void msgpack_pack(msgpack::packer< Buffer > &packer) const
Definition: bit_vector.hpp:355
int bitcount(unsigned bits)
Definition: bit_vector.hpp:102
void swap(weighted_point &p1, weighted_point &p2)
Definition: types.hpp:47
bool operator!=(const bit_vector_base &rhs) const
Definition: bit_vector.hpp:195
void duplicate(const bit_vector_base &orig)
Definition: bit_vector.hpp:415
msgpack::packer< jubatus_packer > packer
Definition: bandit_base.hpp:31
bit_vector_base< uint64_t > bit_vector
vector< msgpack::object > objs
bit_vector_base(const bit_base *bits, size_t bit_num)
Definition: bit_vector.hpp:142
void msgpack_unpack(msgpack::object o)
Definition: bit_vector.hpp:373
bit_vector_base & operator=(const bit_vector_base &orig)
Definition: bit_vector.hpp:200
uint64_t calc_hamming_similarity(const bit_vector_base &bv) const
Definition: bit_vector.hpp:273
friend void swap(bit_vector_base &l, bit_vector_base &r)
Definition: bit_vector.hpp:221
const bit_base * raw_data_unsafe() const
Definition: bit_vector.hpp:310