jubatus_core  0.1.2
Jubatus: Online machine learning framework for distributed environment
Classes | Typedefs | Functions | Variables
jubatus::core::burst Namespace Reference

Classes

class  aggregator
 
class  basic_window
 
struct  batch_input
 
struct  batch_result
 
class  burst
 
struct  burst_options
 
class  burst_result
 
class  input_window
 
class  intersection_helper
 
struct  keyword_params
 
struct  keyword_with_params
 
class  result_storage
 
class  result_window
 

Typedefs

typedef framework::linear_mixable_helper< burst, burst::diff_tmixable_burst
 

Functions

void burst_detect (const std::vector< uint32_t > &d_vector, const std::vector< uint32_t > &r_vector, std::vector< double > &batch_weights, double scaling_param, double gamma, double burst_cut_threshold)
 
template<class W1 , class W2 >
std::pair< int, int > get_intersection (const W1 &w1, const W2 &w2)
 
bool window_position_near (double pos0, double pos1, double batch_interval)
 

Variables

const std::vector< batch_resultempty_batch_results
 
int survival_mix_count_from_set_unprocessed = 5
 

Typedef Documentation

Definition at line 129 of file burst.hpp.

Function Documentation

void jubatus::core::burst::burst_detect ( const std::vector< uint32_t > &  d_vector,
const std::vector< uint32_t > &  r_vector,
std::vector< double > &  batch_weights,
double  scaling_param,
double  gamma,
double  burst_cut_threshold 
)

Definition at line 159 of file engine.cpp.

References JUBATUS_EXCEPTION.

Referenced by jubatus::core::burst::burst_result::burst_result().

164  {
165  const int window_size = d_vector.size();
166  if (gamma <= 0) {
167  throw JUBATUS_EXCEPTION(
168  common::invalid_parameter("gamma must be > 0."));
169  }
170  if (scaling_param <= 1) {
171  throw JUBATUS_EXCEPTION(
172  common::invalid_parameter("scaling_param must be > 1."));
173  }
174  if (burst_cut_threshold <= 0) {
175  throw JUBATUS_EXCEPTION(
176  common::invalid_parameter("burst_cut_threshold must be > 0."));
177  }
178  if (d_vector.size() != r_vector.size()) {
179  throw JUBATUS_EXCEPTION(
180  common::invalid_parameter("d_vector.size() != r_vector.size()"));
181  }
182  for (int batch_id = 0; batch_id < window_size; batch_id++) {
183  if (d_vector[batch_id] < r_vector[batch_id]) {
184  throw JUBATUS_EXCEPTION(
185  common::invalid_parameter(
186  "d_vector[batch_id] < r_vector[batch_id]"));
187  }
188  }
189  const std::vector<double> p_vector
190  = get_p_vector(d_vector, r_vector, scaling_param);
191 
192  erase_uncalc_batches(batch_weights);
193 
194  // exception handling of
195  // - "p_{burst_state} > 1"
196  // - "p_{base_state} = 0"
197  if (1 < p_vector[kBurstState]) {
198  batch_weights.resize(window_size, INFINITY);
199  return;
200  } else if (p_vector[kBaseState] == 0) {
201  batch_weights.resize(window_size, 0);
202  return;
203  }
204 
205  const int reuse_batch_size = batch_weights.size();
206 
207  // the optimal costvals
208  // - index 0: [1st batch - prev batch] optimal seq -> [now] base
209  // - index 1: [1st batch - prev batch] optimal seq -> [now] burst
210  double prev_optimal_in_now_states_costs[] = {-1, -1};
211 
212  // the optimal costval from 1st batch to previuous batch.
213  // - index 0: previous : base
214  // - index 1: previous : burst
215  // To avoid "burst state in 1st batch",
216  // we must set to INFINITY the "cost val of burst state".
217  double prev_optimal_costs[] = {0, INFINITY};
218  if (batch_weights.size() != 0 && 0 < batch_weights.back()) {
219  // To avoid "base state in 1st batch",
220  // we must set to INFINITY the "cost val of base state".
221  prev_optimal_costs[kBaseState] = INFINITY;
222  prev_optimal_costs[kBurstState] = 0;
223  }
224 
225  // state sequences from 1st batch to now batch.
226  // - index 0: [1st batch - prev batch] optimal seq -> [now] base
227  // - index 1: [1st batch - prev batch] optimal seq -> [now] burst
228  std::vector<std::vector<int> > prev_optimal_in_now_states_seq(kStatesNum);
229 
230  // state sequences from 1st batch to previous batch.
231  // - index 0: [1st batch - prev batch] optimal seq & [prev] base
232  // - index 1: [1st batch - prev batch] optimal seq & [prev] burst
233  std::vector<std::vector<int> > prev_optimal_states_seq(kStatesNum);
234 
235  for (int update_batch_id = 0;
236  update_batch_id < window_size - reuse_batch_size;
237  update_batch_id++) {
238  for (int now_state = kBaseState; now_state < kStatesNum; now_state++) {
239  std::pair<int, double> prev_optimal_pair;
240 
241  if ((0 < update_batch_id + reuse_batch_size) &&
242  (d_vector[update_batch_id + reuse_batch_size - 1] == 0)) {
243  // exception handling
244  // in prev batch,
245  // (d, r) = (0, 0)
246  prev_optimal_pair.first = kBaseState;
247  prev_optimal_pair.second =
248  prev_optimal_costs[kBaseState] +
249  tau(kBaseState, now_state, gamma, window_size);
250  } else if (0 < update_batch_id + reuse_batch_size &&
251  check_branch_cuttable(d_vector, r_vector, p_vector,
252  update_batch_id + reuse_batch_size - 1,
253  burst_cut_threshold)) {
254  prev_optimal_pair.first = kBaseState;
255  prev_optimal_pair.second =
256  prev_optimal_costs[kBaseState] +
257  tau(kBaseState, now_state, gamma, window_size);
258  } else {
259  prev_optimal_pair =
260  calc_previous_optimal_state(now_state,
261  prev_optimal_costs[kBaseState],
262  prev_optimal_costs[kBurstState],
263  gamma, window_size);
264  }
265 
266  prev_optimal_in_now_states_costs[now_state] =
267  prev_optimal_pair.second +
268  sigma(p_vector[now_state],
269  d_vector[update_batch_id + reuse_batch_size],
270  r_vector[update_batch_id + reuse_batch_size]);
271 
272  prev_optimal_in_now_states_seq[now_state] =
273  prev_optimal_states_seq[prev_optimal_pair.first];
274  prev_optimal_in_now_states_seq[now_state].push_back(now_state);
275  }
276 
277  //
278  // ready for precessing the next batch.
279  //
280  for (int state = kBaseState; state < kStatesNum; state++) {
281  prev_optimal_costs[state] = prev_optimal_in_now_states_costs[state];
282  prev_optimal_states_seq[state] =
283  prev_optimal_in_now_states_seq[state];
284  }
285  }
286 
287  std::vector<int> optimal_states_seq;
288 
289  if (d_vector[window_size - 1] == 0) {
290  // exception handling
291  // in prev batch,
292  // (d, r) = (0, 0)
293  optimal_states_seq = prev_optimal_in_now_states_seq[kBaseState];
294  } else if (check_branch_cuttable(d_vector, r_vector, p_vector,
295  window_size - 1,
296  burst_cut_threshold)) {
297  optimal_states_seq = prev_optimal_in_now_states_seq[kBaseState];
298  } else {
299  optimal_states_seq =
300  prev_optimal_in_now_states_costs[kBaseState] <=
301  prev_optimal_in_now_states_costs[kBurstState] ?
302  prev_optimal_in_now_states_seq[kBaseState] :
303  prev_optimal_in_now_states_seq[kBurstState];
304  }
305 
306  //
307  // calculation of batch_weights
308  //
309 
310  // reuse of past results
311  for (int batch_id = 0; batch_id < reuse_batch_size; batch_id++) {
312  if (0 < batch_weights[batch_id]) {
313  batch_weights[batch_id] =
314  get_batch_weight(d_vector, r_vector, p_vector, batch_id);
315  }
316  }
317  // new calculation
318  for (int batch_id = reuse_batch_size; batch_id < window_size; batch_id++) {
319  int state = optimal_states_seq[batch_id - reuse_batch_size];
320  batch_weights.push_back(state == kBurstState ?
321  get_batch_weight(d_vector, r_vector,
322  p_vector, batch_id) :
323  0);
324  }
325 }
#define JUBATUS_EXCEPTION(e)
Definition: exception.hpp:79

Here is the caller graph for this function:

template<class W1 , class W2 >
std::pair<int, int> jubatus::core::burst::get_intersection ( const W1 &  w1,
const W2 &  w2 
)

Definition at line 118 of file window_intersection.hpp.

References jubatus::core::burst::intersection_helper::get_intersection().

Referenced by jubatus::core::burst::burst_result::burst_result(), jubatus::core::burst::aggregator::impl_::flush_results(), and jubatus::core::burst::aggregator::impl_::make_new_window_().

118  {
119  return intersection_helper(w1).get_intersection(w2);
120 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool jubatus::core::burst::window_position_near ( double  pos0,
double  pos1,
double  batch_interval 
)
inline

Variable Documentation

const std::vector<batch_result> jubatus::core::burst::empty_batch_results

Definition at line 162 of file burst_result.cpp.

Referenced by jubatus::core::burst::burst_result::get_batches().

int jubatus::core::burst::survival_mix_count_from_set_unprocessed = 5