37 template <
typename INPUT_TYPE,
typename AGG_TYPE>
42 int32_t
const* original_input_col_idx_buf,
43 int64_t
const* ordered_input_col_idx_buf,
48 ? reinterpret_cast<const INPUT_TYPE*>(input_col_bufs.front())
50 ,
cond_col_buf_(input_col_bufs.size() == 2 ? input_col_bufs[1] : nullptr)
61 ? std::numeric_limits<INPUT_TYPE>::max()
63 ? std::numeric_limits<INPUT_TYPE>::min()
68 leaf_range_ = max_tree_height_and_leaf_range.second;
100 if (query_range.first > query_range.second || query_range.first < 0 ||
110 }
else if (sum_and_count_pair.
count == 0) {
114 return (static_cast<double>(sum_and_count_pair.
sum) /
116 sum_and_count_pair.
count;
118 return sum_and_count_pair.
sum / sum_and_count_pair.
count;
174 AGG_TYPE
build(int64_t cur_node_idx,
size_t cur_node_depth) {
177 int64_t input_col_idx = cur_node_idx -
leaf_range_.first;
185 const auto refined_input_col_idx =
195 std::vector<AGG_TYPE> child_vals =
208 int64_t input_col_idx = cur_node_idx -
leaf_range_.first;
216 const auto refined_input_col_idx =
225 std::vector<AGG_TYPE> child_vals =
238 int64_t input_col_idx = cur_node_idx -
leaf_range_.first;
246 const auto refined_input_col_idx =
255 std::vector<AGG_TYPE> child_vals =
268 size_t cur_node_depth) {
270 int64_t input_col_idx = cur_node_idx -
leaf_range_.first;
275 const auto refined_input_col_idx =
287 std::vector<SumAndCountPair<AGG_TYPE>> child_vals =
296 size_t cur_node_depth) {
297 std::vector<AGG_TYPE> child_vals(
fan_out_);
298 size_t next_node_depth = cur_node_depth + 1;
299 if (cur_node_depth == 0) {
300 for (
size_t i = 0; i <
fan_out_; ++i) {
301 child_vals[i] =
build(i + 1, next_node_depth);
304 int64_t cur_depth_start_offset = parent_idx *
fan_out_ + 1;
305 for (
size_t i = 0; i <
fan_out_; ++i) {
306 child_vals[i] =
build(cur_depth_start_offset + i, next_node_depth);
315 size_t cur_node_depth) {
316 std::vector<AGG_TYPE> child_vals(
fan_out_);
317 size_t next_node_depth = cur_node_depth + 1;
318 if (cur_node_depth == 0) {
319 for (
size_t i = 0; i <
fan_out_; ++i) {
323 int64_t cur_depth_start_offset = parent_idx *
fan_out_ + 1;
324 for (
size_t i = 0; i <
fan_out_; ++i) {
325 child_vals[i] =
buildForCondAgg(cur_depth_start_offset + i, next_node_depth);
333 size_t cur_node_depth) {
334 std::vector<AGG_TYPE> child_vals(
fan_out_);
335 size_t next_node_depth = cur_node_depth + 1;
336 if (cur_node_depth == 0) {
337 for (
size_t i = 0; i <
fan_out_; ++i) {
341 int64_t cur_depth_start_offset = parent_idx *
fan_out_ + 1;
342 for (
size_t i = 0; i <
fan_out_; ++i) {
343 child_vals[i] =
buildForCount(cur_depth_start_offset + i, next_node_depth);
351 size_t cur_node_depth) {
352 std::vector<SumAndCountPair<AGG_TYPE>> child_vals(
fan_out_);
353 size_t next_node_depth = cur_node_depth + 1;
354 if (cur_node_depth == 0) {
355 for (
size_t i = 0; i <
fan_out_; ++i) {
359 int64_t cur_depth_start_offset = parent_idx *
fan_out_ + 1;
360 for (
size_t i = 0; i <
fan_out_; ++i) {
371 bool all_nulls =
true;
373 AGG_TYPE min_val = std::numeric_limits<AGG_TYPE>::max();
375 vals.begin(), vals.end(), [&min_val, &all_nulls,
this](
const AGG_TYPE val) {
378 min_val = std::min(min_val, val);
386 AGG_TYPE max_val = std::numeric_limits<AGG_TYPE>::min();
388 vals.begin(), vals.end(), [&max_val, &all_nulls,
this](
const AGG_TYPE val) {
391 max_val = std::max(max_val, val);
399 AGG_TYPE agg_val = 0;
401 vals.begin(), vals.end(), [&agg_val, &all_nulls,
this](
const AGG_TYPE val) {
416 bool all_nulls =
true;
417 auto end_idx = cur_col_idx + num_visits;
419 AGG_TYPE min_val = std::numeric_limits<AGG_TYPE>::max();
420 for (
size_t i = cur_col_idx; i < end_idx; ++i) {
424 min_val = std::min(min_val, val);
432 AGG_TYPE max_val = std::numeric_limits<AGG_TYPE>::min();
433 for (
size_t i = cur_col_idx; i < end_idx; ++i) {
437 max_val = std::max(max_val, val);
445 AGG_TYPE agg_val = 0;
446 for (
size_t i = cur_col_idx; i < end_idx; ++i) {
463 bool all_nulls =
true;
464 std::for_each(vals.begin(),
469 res.count += pair.count;
481 size_t num_visits)
const {
483 bool all_nulls =
true;
484 auto end_idx = cur_col_idx + num_visits;
485 for (
size_t i = cur_col_idx; i < end_idx; ++i) {
488 res.sum += cur_pair_val.
sum;
502 int64_t cur_node_idx,
503 size_t cur_node_depth,
504 int64_t search_range_start_idx,
505 int64_t search_range_end_idx)
const {
509 if (search_range_end_idx < query_range.first ||
510 query_range.second < search_range_start_idx) {
513 }
else if (query_range.first <= search_range_start_idx &&
514 search_range_end_idx <= query_range.second) {
522 size_t num_visits = query_range.second - search_range_start_idx + 1;
527 int64_t pivot_idx = search_range_start_idx +
528 ((search_range_end_idx - search_range_start_idx) /
fan_out_);
529 int64_t child_search_start_idx = search_range_start_idx;
530 int64_t child_search_end_idx = pivot_idx;
531 std::vector<size_t> child_indexes(
fan_out_);
533 std::vector<AGG_TYPE> child_vals(
fan_out_);
534 for (
size_t i = 0; i < child_indexes.size(); ++i) {
535 child_vals[i] =
search(query_range,
538 child_search_start_idx,
539 child_search_end_idx);
540 child_search_start_idx = child_search_end_idx + 1;
541 child_search_end_idx = child_search_start_idx + pivot_idx;
542 if (child_search_end_idx > search_range_end_idx) {
543 child_search_end_idx = search_range_end_idx;
553 int64_t cur_node_idx,
554 size_t cur_node_depth,
555 int64_t search_range_start_idx,
556 int64_t search_range_end_idx)
const {
557 if (search_range_end_idx < query_range.first ||
558 query_range.second < search_range_start_idx) {
560 }
else if (query_range.first <= search_range_start_idx &&
561 search_range_end_idx <= query_range.second) {
565 size_t num_visits = query_range.second - search_range_start_idx + 1;
570 std::vector<int64_t> child_indexes(
fan_out_);
573 int64_t pivot_idx = search_range_start_idx +
574 ((search_range_end_idx - search_range_start_idx) /
fan_out_);
575 int64_t child_search_start_idx = search_range_start_idx;
576 int64_t child_search_end_idx = pivot_idx;
577 for (
size_t i = 0; i < child_indexes.size(); ++i) {
581 child_search_start_idx,
582 child_search_end_idx);
583 child_search_start_idx = child_search_end_idx + 1;
584 child_search_end_idx = child_search_start_idx + pivot_idx;
585 if (child_search_end_idx > search_range_end_idx) {
586 child_search_end_idx = search_range_end_idx;
599 size_t parent_tree_depth)
const {
600 if (parent_tree_depth == 0) {
601 for (
size_t i = 0; i <
fan_out_; ++i) {
602 child_indexes[i] = i + 1;
605 int64_t cur_depth_start_offset = parent_idx *
fan_out_ + 1;
606 for (
size_t i = 0; i <
fan_out_; ++i) {
607 child_indexes[i] = cur_depth_start_offset + i;
610 return child_indexes;
617 return std::make_pair(0, std::make_pair(0, 0));
619 int64_t cur_level_start_offset = 0;
621 IndexPair index_pair = std::make_pair(0, 0);
623 size_t maximum_node_at_next_level = pow(fan_out, depth);
624 if (num_elem < maximum_node_at_next_level) {
625 index_pair = std::make_pair(cur_level_start_offset,
626 cur_level_start_offset + maximum_node_at_next_level);
627 return std::make_pair(depth, index_pair);
630 cur_level_start_offset += maximum_node_at_next_level;
const int64_t *const ordered_input_col_idx_buf_
AGG_TYPE aggregateValueViaColumnAccess(int64_t cur_col_idx, size_t num_visits) const
SumAndCountPair< AGG_TYPE > * derived_aggregated_
IndexPair getLeafRange() const
SQLTypeInfo const input_col_ti_
size_t getLeafDepth() const
const INPUT_TYPE *const input_col_buf_
HOST DEVICE int get_scale() const
std::vector< AGG_TYPE > prepareChildValuesforConditionalAggregation(int64_t parent_idx, size_t cur_node_depth)
size_t getTreeSize() const
size_t getLeafSize() const
AGG_TYPE search(const IndexPair &query_range, int64_t cur_node_idx, size_t cur_node_depth, int64_t search_range_start_idx, int64_t search_range_end_idx) const
Constants for Builtin SQL Types supported by HEAVY.AI.
SumAndCountPair< AGG_TYPE > buildForDerivedAggregate(int64_t cur_node_idx, size_t cur_node_depth)
AGG_TYPE buildForCount(int64_t cur_node_idx, size_t cur_node_depth)
size_t getTreeFanout() const
bool const is_conditional_agg_
AGG_TYPE fetch_col_for_non_cond_agg(size_t const idx)
std::vector< int64_t > computeChildIndexes(std::vector< int64_t > &child_indexes, int64_t parent_idx, size_t parent_tree_depth) const
SumAndCountPair< AGG_TYPE > searchForDerivedAggregate(const IndexPair &query_range, int64_t cur_node_idx, size_t cur_node_depth, int64_t search_range_start_idx, int64_t search_range_end_idx) const
SegmentTree(std::vector< const int8_t * > const &input_col_bufs, SQLTypeInfo const &input_col_ti, int32_t const *original_input_col_idx_buf, int64_t const *ordered_input_col_idx_buf, int64_t num_elems, SqlWindowFunctionKind agg_type, size_t fan_out)
DEVICE TextEncodingDict inline_null_value()
size_t getNumElems() const
void * checked_malloc(const size_t size)
SumAndCountPair< AGG_TYPE > aggregateValueForDerivedAggregateViaColumnAccess(int64_t cur_col_idx, size_t num_visits) const
AGG_TYPE buildForCondAgg(int64_t cur_node_idx, size_t cur_node_depth)
SumAndCountPair< AGG_TYPE > aggregateValueForDerivedAggregate(const std::vector< SumAndCountPair< AGG_TYPE >> &vals) const
AGG_TYPE query(const IndexPair &query_range) const
std::pair< int64_t, int64_t > IndexPair
AGG_TYPE * getAggregatedValues() const
INPUT_TYPE const input_type_null_val_
std::vector< SumAndCountPair< AGG_TYPE > > prepareChildValuesforDerivedAggregate(int64_t parent_idx, size_t cur_node_depth)
const int32_t *const original_input_col_idx_buf_
std::vector< AGG_TYPE > prepareChildValuesforAggregation(int64_t parent_idx, size_t cur_node_depth)
AGG_TYPE fetch_col_for_cond_agg(size_t const idx)
SqlWindowFunctionKind const agg_type_
AGG_TYPE fetch_col_for_count(size_t const idx)
AGG_TYPE build(int64_t cur_node_idx, size_t cur_node_depth)
Common Enum definitions for SQL processing.
std::pair< size_t, IndexPair > findMaxTreeHeight(int64_t num_elem, int fan_out)
SumAndCountPair< AGG_TYPE > * getDerivedAggregatedValues() const
AGG_TYPE aggregateValue(const std::vector< AGG_TYPE > &vals) const
const int8_t *const cond_col_buf_
std::vector< AGG_TYPE > prepareChildValuesforAggregationForCount(int64_t parent_idx, size_t cur_node_depth)
AGG_TYPE * aggregated_values_
INPUT_TYPE const invalid_val_