17 #include "../Parser/ParserNode.h"
31 const bool fetch_columns,
57 if (ti.get_type() ==
kNULLT) {
58 throw std::runtime_error(
59 "NULL type literals are not currently supported in this context.");
61 if (constant->get_is_null()) {
65 std::vector<llvm::Value*> null_target_lvs;
70 null_target_lvs.push_back(
72 null_target_lvs.push_back(llvm::ConstantPointerNull::get(
75 return null_target_lvs;
76 }
else if (ti.is_geometry()) {
77 std::vector<llvm::Value*> ret_lvs;
86 switch (ti.get_type()) {
94 return {llvm::ConstantPointerNull::get(i8p_ty)};
97 return {llvm::ConstantPointerNull::get(i8p_ty),
98 llvm::ConstantPointerNull::get(i8p_ty)};
100 return {llvm::ConstantPointerNull::get(i8p_ty),
101 llvm::ConstantPointerNull::get(i8p_ty),
102 llvm::ConstantPointerNull::get(i8p_ty)};
107 }
else if (ti.is_array()) {
110 return {llvm::ConstantPointerNull::get(i8p_ty)};
113 ?
static_cast<llvm::Value*
>(
executor_->cgen_state_->inlineFpNull(ti))
114 : static_cast<llvm::Value*>(
executor_->cgen_state_->inlineIntNull(ti))};
119 CHECK_NE(ti.getStringDictKey().dict_id, 0);
120 return {
codegen(constant, ti.get_compression(), ti.getStringDictKey(), co)};
122 return {
codegen(constant, ti.get_compression(), {}, co)};
126 return {
codegen(case_expr, co)};
130 return {
codegen(extract_expr, co)};
134 return {
codegen(dateadd_expr, co)};
138 return {
codegen(datediff_expr, co)};
141 if (datetrunc_expr) {
142 return {
codegen(datetrunc_expr, co)};
145 if (charlength_expr) {
146 return {
codegen(charlength_expr, co)};
149 if (keyforstring_expr) {
150 return {
codegen(keyforstring_expr, co)};
153 if (sample_ratio_expr) {
154 return {
codegen(sample_ratio_expr, co)};
157 if (string_oper_expr) {
158 return {
codegen(string_oper_expr, co)};
161 if (cardinality_expr) {
162 return {
codegen(cardinality_expr, co)};
166 return {
codegen(like_expr, co)};
170 return {
codegen(regexp_expr, co)};
178 if (ml_predict_expr) {
179 return {
codegen(ml_predict_expr, co)};
183 if (pca_project_expr) {
184 return {
codegen(pca_project_expr, co)};
188 if (likelihood_expr) {
189 return {
codegen(likelihood_expr->get_arg(), fetch_columns, co)};
196 if (in_integer_set_expr) {
197 return {
codegen(in_integer_set_expr, co)};
199 auto function_oper_with_custom_type_handling_expr =
201 if (function_oper_with_custom_type_handling_expr) {
203 function_oper_with_custom_type_handling_expr, co)};
206 if (array_oper_expr) {
218 if (function_oper_expr) {
225 if (dynamic_cast<const Analyzer::OffsetInFragment*>(expr)) {
228 if (dynamic_cast<const Analyzer::WindowFunction*>(expr)) {
281 auto input_expr = expr->
get_arg();
284 auto double_lv =
codegen(input_expr,
true, co);
285 CHECK_EQ(
size_t(1), double_lv.size());
287 std::unique_ptr<CodeGenerator::NullCheckCodegen> nullcheck_codegen;
288 const bool is_nullable = !input_expr->get_type_info().get_notnull();
290 nullcheck_codegen = std::make_unique<NullCheckCodegen>(
cgen_state_,
293 input_expr->get_type_info(),
294 "sample_ratio_nullcheck");
297 std::vector<llvm::Value*>
args{double_lv[0],
posArg(
nullptr)};
299 if (nullcheck_codegen) {
312 CHECK(target_value_expr);
313 CHECK(lower_bound_expr);
314 CHECK(upper_bound_expr);
315 CHECK(partition_count_expr);
318 auto target_expr = expr;
319 if (
auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr)) {
321 target_expr = cast_expr->get_operand();
333 if (is_constant_expr(lower_bound_expr) && is_constant_expr(upper_bound_expr) &&
334 is_constant_expr(partition_count_expr)) {
348 if (!col_range.hasNulls()) {
373 if (num_partitions < 1 || num_partitions > INT32_MAX) {
374 throw std::runtime_error(
375 "PARTITION_COUNT expression of width_bucket function should be in a valid "
376 "range: 0 < PARTITION_COUNT <= 2147483647");
380 if (lower == upper) {
381 throw std::runtime_error(
382 "LOWER_BOUND and UPPER_BOUND expressions of width_bucket function cannot have "
383 "the same constant value");
386 throw std::runtime_error(
387 "Both LOWER_BOUND and UPPER_BOUND of width_bucket function should be finite "
388 "numeric constants.");
391 bool const reversed = lower > upper;
392 double const scale_factor = num_partitions / (reversed ? lower - upper : upper - lower);
393 std::string func_name = reversed ?
"width_bucket_reversed" :
"width_bucket";
395 auto get_double_constant_lvs = [
this, &co](
double const_val) {
398 auto double_const_expr =
400 return codegen(double_const_expr.get(),
false, co);
403 auto target_value_ti = target_value_expr->get_type_info();
404 auto target_value_expr_lvs =
codegen(target_value_expr,
true, co);
405 CHECK_EQ(
size_t(1), target_value_expr_lvs.size());
406 auto lower_expr_lvs =
codegen(lower_bound_expr,
true, co);
407 CHECK_EQ(
size_t(1), lower_expr_lvs.size());
408 auto scale_factor_lvs = get_double_constant_lvs(scale_factor);
409 CHECK_EQ(
size_t(1), scale_factor_lvs.size());
411 std::vector<llvm::Value*> width_bucket_args{target_value_expr_lvs[0],
414 func_name +=
"_no_oob_check";
415 width_bucket_args.push_back(scale_factor_lvs[0]);
417 auto upper_expr_lvs =
codegen(upper_bound_expr,
true, co);
418 CHECK_EQ(
size_t(1), upper_expr_lvs.size());
419 auto partition_count_expr_lvs =
codegen(partition_count_expr,
true, co);
420 CHECK_EQ(
size_t(1), partition_count_expr_lvs.size());
421 width_bucket_args.push_back(upper_expr_lvs[0]);
422 width_bucket_args.push_back(scale_factor_lvs[0]);
423 width_bucket_args.push_back(partition_count_expr_lvs[0]);
424 if (!target_value_ti.get_notnull()) {
425 func_name +=
"_nullable";
426 auto translated_null_value = target_value_ti.is_fp()
429 auto null_value_lvs = get_double_constant_lvs(translated_null_value);
430 CHECK_EQ(
size_t(1), null_value_lvs.size());
431 width_bucket_args.push_back(null_value_lvs[0]);
444 std::string func_name =
"width_bucket_expr";
445 bool nullable_expr =
false;
447 func_name +=
"_no_oob_check";
448 }
else if (!target_value_expr->get_type_info().get_notnull()) {
449 func_name +=
"_nullable";
450 nullable_expr =
true;
453 auto target_value_expr_lvs =
codegen(target_value_expr,
true, co);
454 CHECK_EQ(
size_t(1), target_value_expr_lvs.size());
455 auto lower_bound_expr_lvs =
codegen(lower_bound_expr,
true, co);
456 CHECK_EQ(
size_t(1), lower_bound_expr_lvs.size());
457 auto upper_bound_expr_lvs =
codegen(upper_bound_expr,
true, co);
458 CHECK_EQ(
size_t(1), upper_bound_expr_lvs.size());
459 auto partition_count_expr_lvs =
codegen(partition_count_expr,
true, co);
460 CHECK_EQ(
size_t(1), partition_count_expr_lvs.size());
461 auto target_value_ti = target_value_expr->get_type_info();
466 auto partition_count_ti = partition_count_expr->get_type_info();
467 CHECK(partition_count_ti.is_integer());
469 auto partition_count_expr_lv =
473 partition_count_ti.get_size() < int32_ti.get_size());
475 llvm::Value* partition_count_min =
477 llvm::BasicBlock* width_bucket_partition_count_ok_bb =
479 "width_bucket_partition_count_ok_bb",
481 llvm::BasicBlock* width_bucket_argument_check_fail_bb =
483 "width_bucket_argument_check_fail_bb",
486 width_bucket_argument_check_fail_bb,
487 width_bucket_partition_count_ok_bb);
493 llvm::BasicBlock* width_bucket_bound_check_ok_bb =
495 "width_bucket_bound_check_ok_bb",
497 llvm::Value* bound_check{
nullptr};
498 if (lower_bound_expr->get_type_info().get_notnull() &&
499 upper_bound_expr->get_type_info().get_notnull()) {
501 lower_bound_expr_lvs[0], upper_bound_expr_lvs[0],
"bound_check");
503 std::vector<llvm::Value*> bound_check_args{
504 lower_bound_expr_lvs[0],
505 upper_bound_expr_lvs[0],
511 bound_check, width_bucket_argument_check_fail_bb, width_bucket_bound_check_ok_bb);
516 lower_bound_expr_lvs,
517 lower_bound_expr->get_type_info(),
520 auto lower_bound_expr_lv = lower_bound_expr_lvs[0];
521 auto upper_bound_expr_lv = upper_bound_expr_lvs[0];
522 std::vector<llvm::Value*> width_bucket_args{target_value_expr_lvs[0],
526 partition_count_expr_lv};
528 width_bucket_args.push_back(null_value_lv);
536 const std::shared_ptr<Analyzer::Expr>& qual) {
539 qual_cf.simple_quals.begin(),
540 qual_cf.simple_quals.end());
541 ra_exe_unit.
quals.insert(
542 ra_exe_unit.
quals.end(), qual_cf.quals.begin(), qual_cf.quals.end());
547 const std::vector<InputTableInfo>& query_infos,
548 const size_t level_idx,
549 const std::string& fail_reason) {
553 throw std::runtime_error(
"Hash join failed, reason(s): " + fail_reason +
554 " | Incorrect # tables for executing loop join");
557 const bool has_loop_size_hint =
559 const size_t loop_join_size_threshold =
563 if (has_loop_size_hint && loop_join_size_threshold < loop_join_size) {
564 throw std::runtime_error(
565 "Hash join failed, reason(s): " + fail_reason +
566 " | Cannot fall back to loop join for non-trivial inner table size");
570 if (level_idx + 1 != ra_exe_unit.
join_quals.size()) {
571 throw std::runtime_error(
572 "Hash join failed, reason(s): " + fail_reason +
573 " | Cannot fall back to loop join for intermediate join quals");
575 if (loop_join_size_threshold < loop_join_size) {
576 throw std::runtime_error(
577 "Hash join failed, reason(s): " + fail_reason +
578 " | Cannot fall back to loop join for non-trivial inner table size");
581 throw std::runtime_error(
"Hash join failed, reason(s): " + fail_reason +
582 " | Loop join is disabled by user");
591 if (lhs_cv && rhs_cv && !bin_oper->is_bbox_intersect_oper()) {
593 auto rhs_type = rhs_cv->get_type_info().get_type();
596 throw std::runtime_error(
597 "Join operation between full array columns (i.e., R.arr = S.arr) instead of "
598 "indexed array columns (i.e., R.arr[1] = S.arr[2]) is not supported yet.");
609 const std::vector<InputTableInfo>& query_infos,
613 std::vector<JoinLoop> join_loops;
614 for (
size_t level_idx = 0, current_hash_table_idx = 0;
617 const auto& current_level_join_conditions = ra_exe_unit.
join_quals[level_idx];
618 std::vector<std::string> fail_reasons;
619 const auto current_level_hash_table =
620 buildCurrentLevelHashTable(current_level_join_conditions,
627 const auto found_outer_join_matches_cb =
628 [
this, level_idx](llvm::Value* found_outer_join_matches) {
632 found_outer_join_matches;
634 const auto is_deleted_cb = buildIsDeletedCb(ra_exe_unit, level_idx, co);
635 auto rem_left_join_quals_it =
637 bool has_remaining_left_join_quals =
639 !rem_left_join_quals_it->second.empty();
640 const auto outer_join_condition_remaining_quals_cb =
641 [
this, level_idx, &co](
const std::vector<llvm::Value*>& prev_iters) {
646 addJoinLoopIterator(prev_iters, level_idx + 1);
651 for (
auto expr : it->second) {
655 code_generator.
codegen(expr.get(),
true, co).front()));
658 return left_join_cond;
660 if (current_level_hash_table) {
661 const auto hoisted_filters_cb = buildHoistLeftHandSideFiltersCb(
662 ra_exe_unit, level_idx, current_level_hash_table->getInnerTableId(), co);
664 join_loops.emplace_back(
666 current_level_join_conditions.type,
668 [
this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
669 const std::vector<llvm::Value*>& prev_iters) {
670 addJoinLoopIterator(prev_iters, level_idx);
673 current_level_hash_table->codegenSlot(co, current_hash_table_idx);
678 has_remaining_left_join_quals
679 ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
680 outer_join_condition_remaining_quals_cb)
683 ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
688 }
else if (
auto range_join_table =
689 dynamic_cast<RangeJoinHashTable*>(current_level_hash_table.get())) {
690 join_loops.emplace_back(
692 current_level_join_conditions.type,
696 current_hash_table_idx,
698 current_level_hash_table,
699 &co](
const std::vector<llvm::Value*>& prev_iters) {
700 addJoinLoopIterator(prev_iters, level_idx);
702 CHECK(!prev_iters.empty());
703 const auto matching_set = range_join_table->codegenMatchingSetWithOffset(
704 co, current_hash_table_idx, prev_iters.back());
705 domain.values_buffer = matching_set.elements;
706 domain.element_count = matching_set.count;
711 ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
712 outer_join_condition_remaining_quals_cb)
716 ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
722 join_loops.emplace_back(
724 current_level_join_conditions.type,
726 [
this, current_hash_table_idx, level_idx, current_level_hash_table, &co](
727 const std::vector<llvm::Value*>& prev_iters) {
728 addJoinLoopIterator(prev_iters, level_idx);
730 const auto matching_set = current_level_hash_table->codegenMatchingSet(
731 co, current_hash_table_idx);
733 domain.element_count = matching_set.count;
738 ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
739 outer_join_condition_remaining_quals_cb)
742 ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
748 ++current_hash_table_idx;
750 const auto fail_reasons_str = current_level_join_conditions.quals.empty()
751 ?
"No equijoin expression found"
754 ra_exe_unit, eo, query_infos, level_idx, fail_reasons_str);
757 VLOG(1) <<
"Unable to build hash table, falling back to loop join: "
759 const auto outer_join_condition_cb =
760 [
this, level_idx, &co, ¤t_level_join_conditions](
761 const std::vector<llvm::Value*>& prev_iters) {
766 addJoinLoopIterator(prev_iters, level_idx + 1);
769 for (
auto expr : current_level_join_conditions.quals) {
772 code_generator.toBool(
773 code_generator.codegen(expr.get(),
true, co).front()));
775 return left_join_cond;
777 join_loops.emplace_back(
779 current_level_join_conditions.type,
781 [
this, level_idx](
const std::vector<llvm::Value*>& prev_iters) {
782 addJoinLoopIterator(prev_iters, level_idx);
786 arg->getType()->getScalarType()->getPointerElementType(),
790 rows_per_scan_ptr->getType()->getPointerElementType(),
792 "num_rows_per_scan");
797 ? std::function<llvm::Value*(const std::vector<llvm::Value*>&)>(
798 outer_join_condition_cb)
802 ? std::function<void(llvm::Value*)>(found_outer_join_matches_cb)
818 return {col_expr->getTableKey()};
823 std::set<shared::TableKey> ret;
824 for (
size_t i = 0; i < func_expr->getArity(); i++) {
825 ret = aggregateResult(ret, visit(func_expr->getArg(i)));
831 std::set<shared::TableKey> ret;
832 ret = aggregateResult(ret, visit(bin_oper->get_left_operand()));
833 return aggregateResult(ret, visit(bin_oper->get_right_operand()));
837 return visit(u_oper->get_operand());
841 const std::set<shared::TableKey>& aggregate,
842 const std::set<shared::TableKey>& next_result)
const final {
843 auto ret = aggregate;
844 for (
const auto& el : next_result) {
855 const size_t level_idx,
862 const auto& current_level_join_conditions = ra_exe_unit.
join_quals[level_idx];
863 if (level_idx == 0 && current_level_join_conditions.type ==
JoinType::LEFT) {
864 const auto& condition = current_level_join_conditions.quals.front();
866 CHECK(bin_oper) << condition->toString();
871 if (lhs && rhs && lhs->getTableKey() != rhs->getTableKey()) {
876 if (lhs->getTableKey() == inner_table_id) {
878 }
else if (rhs->getTableKey() == inner_table_id) {
882 std::list<std::shared_ptr<Analyzer::Expr>> hoisted_quals;
884 auto should_hoist_qual = [&hoisted_quals](
const auto& qual,
888 ExprTableIdVisitor visitor;
889 const auto table_keys = visitor.visit(qual.get());
890 if (table_keys.size() == 1 && table_keys.find(table_key) != table_keys.end()) {
891 hoisted_quals.push_back(qual);
895 should_hoist_qual(qual, selected_lhs->getTableKey());
897 for (
const auto& qual : ra_exe_unit.
quals) {
898 should_hoist_qual(qual, selected_lhs->getTableKey());
902 if (!hoisted_quals.empty()) {
903 return [
this, hoisted_quals, co](llvm::BasicBlock* true_bb,
904 llvm::BasicBlock* exit_bb,
905 const std::string& loop_name,
906 llvm::Function* parent_func,
907 CgenState* cgen_state) -> llvm::BasicBlock* {
909 bool has_quals_to_hoist =
false;
910 for (
const auto& qual : hoisted_quals) {
913 if (plan_state_->hoisted_filters_.count(qual) == 0) {
914 has_quals_to_hoist =
true;
919 if (!has_quals_to_hoist) {
925 llvm::IRBuilder<>& builder = cgen_state->ir_builder_;
926 auto& context = builder.getContext();
928 const auto filter_bb =
929 llvm::BasicBlock::Create(context,
930 "hoisted_left_join_filters_" + loop_name,
933 builder.SetInsertPoint(filter_bb);
935 llvm::Value* filter_lv = cgen_state_->llBool(
true);
938 for (
const auto& qual : hoisted_quals) {
939 if (plan_state_->hoisted_filters_.insert(qual).second) {
942 VLOG(1) <<
"Generating code for hoisted left hand side qualifier "
944 auto cond = code_generator.
toBool(
945 code_generator.
codegen(qual.get(),
true, co).front());
946 filter_lv = builder.CreateAnd(filter_lv, cond);
949 CHECK(filter_lv->getType()->isIntegerTy(1));
951 builder.CreateCondBr(filter_lv, true_bb, exit_bb);
961 std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
963 const size_t level_idx,
970 const auto input_desc = ra_exe_unit.
input_descs[level_idx + 1];
975 const auto deleted_cd = plan_state_->getDeletedColForTable(input_desc.getTableKey());
979 CHECK(deleted_cd->columnType.is_boolean());
980 const auto deleted_expr = makeExpr<Analyzer::ColumnVar>(
981 deleted_cd->columnType,
983 input_desc.getNestLevel());
984 return [
this, deleted_expr, level_idx, &co](
const std::vector<llvm::Value*>& prev_iters,
985 llvm::Value* have_more_inner_rows) {
986 const auto matching_row_index = addJoinLoopIterator(prev_iters, level_idx + 1);
990 llvm::Value* is_valid_it{
nullptr};
991 if (have_more_inner_rows) {
992 is_valid_it = have_more_inner_rows;
994 is_valid_it = cgen_state_->ir_builder_.CreateICmp(
995 llvm::ICmpInst::ICMP_SGE, matching_row_index, cgen_state_->llInt<int64_t>(0));
997 const auto it_valid_bb = llvm::BasicBlock::Create(
998 cgen_state_->context_,
"it_valid", cgen_state_->current_func_);
999 const auto it_not_valid_bb = llvm::BasicBlock::Create(
1000 cgen_state_->context_,
"it_not_valid", cgen_state_->current_func_);
1001 cgen_state_->ir_builder_.CreateCondBr(is_valid_it, it_valid_bb, it_not_valid_bb);
1002 const auto row_is_deleted_bb = llvm::BasicBlock::Create(
1003 cgen_state_->context_,
"row_is_deleted", cgen_state_->current_func_);
1004 cgen_state_->ir_builder_.SetInsertPoint(it_valid_bb);
1006 const auto row_is_deleted = code_generator.
toBool(
1007 code_generator.
codegen(deleted_expr.get(),
true, co).front());
1008 cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1009 cgen_state_->ir_builder_.SetInsertPoint(it_not_valid_bb);
1010 const auto row_is_deleted_default = cgen_state_->llBool(
false);
1011 cgen_state_->ir_builder_.CreateBr(row_is_deleted_bb);
1012 cgen_state_->ir_builder_.SetInsertPoint(row_is_deleted_bb);
1013 auto row_is_deleted_or_default =
1014 cgen_state_->ir_builder_.CreatePHI(row_is_deleted->getType(), 2);
1015 row_is_deleted_or_default->addIncoming(row_is_deleted, it_valid_bb);
1016 row_is_deleted_or_default->addIncoming(row_is_deleted_default, it_not_valid_bb);
1017 return row_is_deleted_or_default;
1026 const std::vector<InputTableInfo>& query_infos,
1028 std::vector<std::string>& fail_reasons) {
1030 std::shared_ptr<HashJoin> current_level_hash_table;
1031 auto handleNonHashtableQual = [&ra_exe_unit, &level_idx,
this](
1033 std::shared_ptr<Analyzer::Expr> qual) {
1035 plan_state_->addNonHashtableQualForLeftJoin(level_idx, qual);
1040 for (
const auto& join_qual : current_level_join_conditions.
quals) {
1042 if (current_level_hash_table || !qual_bin_oper ||
1044 handleNonHashtableQual(current_level_join_conditions.
type, join_qual);
1045 if (!current_level_hash_table) {
1046 fail_reasons.emplace_back(
"No equijoin expression found");
1052 if (!current_level_hash_table) {
1053 hash_table_or_error = buildHashTableForQualifier(
1058 current_level_join_conditions.
type,
1064 current_level_hash_table = hash_table_or_error.
hash_table;
1067 plan_state_->join_info_.join_hash_tables_.push_back(hash_table_or_error.
hash_table);
1068 plan_state_->join_info_.equi_join_tautologies_.push_back(qual_bin_oper);
1070 fail_reasons.push_back(hash_table_or_error.
fail_reason);
1071 if (!current_level_hash_table) {
1072 VLOG(2) <<
"Building a hashtable based on a qual " << qual_bin_oper->toString()
1075 handleNonHashtableQual(current_level_join_conditions.
type, qual_bin_oper);
1078 return current_level_hash_table;
1082 if (!cgen_state_->filter_func_) {
1090 for (
auto bb_it = cgen_state_->filter_func_->begin();
1091 bb_it != cgen_state_->filter_func_->end();
1093 for (
auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
1095 for (
auto op_it = instr_it->value_op_begin(); op_it != instr_it->value_op_end();
1097 llvm::Value* v = *op_it;
1101 if (llvm::dyn_cast<const llvm::CallInst>(instr_it) &&
1102 op_it == instr_it->value_op_end() - 1) {
1107 if (
auto* instr = llvm::dyn_cast<llvm::Instruction>(v);
1108 instr && instr->getParent() &&
1109 instr->getParent()->getParent() == cgen_state_->row_func_) {
1111 cgen_state_->filter_func_args_[v] =
nullptr;
1112 }
else if (
auto* argum = llvm::dyn_cast<llvm::Argument>(v);
1113 argum && argum->getParent() == cgen_state_->row_func_) {
1115 cgen_state_->filter_func_args_[v] =
nullptr;
1123 std::vector<llvm::Type*> filter_func_arg_types;
1124 filter_func_arg_types.reserve(cgen_state_->filter_func_args_.v_.size());
1125 for (
auto& arg : cgen_state_->filter_func_args_.v_) {
1126 filter_func_arg_types.push_back(arg->getType());
1128 auto ft = llvm::FunctionType::get(
1129 get_int_type(32, cgen_state_->context_), filter_func_arg_types,
false);
1130 cgen_state_->filter_func_->setName(
"old_filter_func");
1131 auto filter_func2 = llvm::Function::Create(ft,
1132 llvm::Function::ExternalLinkage,
1134 cgen_state_->filter_func_->getParent());
1135 CHECK_EQ(filter_func2->arg_size(), cgen_state_->filter_func_args_.v_.size());
1136 auto arg_it = cgen_state_->filter_func_args_.begin();
1138 for (llvm::Function::arg_iterator I = filter_func2->arg_begin(),
1139 E = filter_func2->arg_end();
1142 arg_it->second = &*I;
1143 if (arg_it->first->hasName()) {
1144 I->setName(arg_it->first->getName());
1153 filter_func2->getBasicBlockList().splice(
1154 filter_func2->begin(), cgen_state_->filter_func_->getBasicBlockList());
1156 if (cgen_state_->current_func_ == cgen_state_->filter_func_) {
1157 cgen_state_->current_func_ = filter_func2;
1159 cgen_state_->filter_func_ = filter_func2;
1162 for (
auto bb_it = cgen_state_->filter_func_->begin();
1163 bb_it != cgen_state_->filter_func_->end();
1165 for (
auto instr_it = bb_it->begin(); instr_it != bb_it->end(); ++instr_it) {
1167 for (
auto op_it = instr_it->op_begin(); op_it != instr_it->op_end(); ++op_it, ++i) {
1168 llvm::Value* v = op_it->get();
1169 if (
auto arg_it = cgen_state_->filter_func_args_.find(v);
1170 arg_it != cgen_state_->filter_func_args_.end()) {
1172 llvm::Use* use = &*op_it;
1173 use->set(arg_it->second);
1181 const size_t level_idx) {
1186 const auto it = cgen_state_->scan_idx_to_hash_pos_.find(level_idx);
1187 if (it != cgen_state_->scan_idx_to_hash_pos_.end()) {
1190 CHECK(!prev_iters.empty());
1191 llvm::Value* matching_row_index = prev_iters.back();
1193 cgen_state_->scan_idx_to_hash_pos_.emplace(level_idx, matching_row_index);
1194 CHECK(it_ok.second);
1195 return matching_row_index;
1201 llvm::Function* query_func,
1202 llvm::BasicBlock* entry_bb,
1207 const auto exit_bb =
1208 llvm::BasicBlock::Create(cgen_state_->context_,
"exit", cgen_state_->current_func_);
1209 cgen_state_->ir_builder_.SetInsertPoint(exit_bb);
1210 cgen_state_->ir_builder_.CreateRet(cgen_state_->llInt<int32_t>(0));
1211 cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1214 llvm::BasicBlock* loops_entry_bb{
nullptr};
1215 auto has_range_join =
1216 std::any_of(join_loops.begin(), join_loops.end(), [](
const auto& join_loop) {
1219 if (has_range_join) {
1220 CHECK_EQ(join_loops.size(), size_t(1));
1221 const auto element_count =
1222 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_), 9);
1224 auto compute_packed_offset = [](
const int32_t x,
const int32_t y) -> uint64_t {
1225 const uint64_t y_shifted =
static_cast<uint64_t
>(y) << 32;
1226 return y_shifted |
static_cast<uint32_t
>(x);
1229 const auto values_arr = std::vector<llvm::Constant*>{
1230 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_), 0),
1231 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1232 compute_packed_offset(0, 1)),
1233 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1234 compute_packed_offset(0, -1)),
1235 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1236 compute_packed_offset(1, 0)),
1237 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1238 compute_packed_offset(1, 1)),
1239 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1240 compute_packed_offset(1, -1)),
1241 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1242 compute_packed_offset(-1, 0)),
1243 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1244 compute_packed_offset(-1, 1)),
1245 llvm::ConstantInt::get(
get_int_type(64, cgen_state_->context_),
1246 compute_packed_offset(-1, -1))};
1248 const auto constant_values_array = llvm::ConstantArray::get(
1250 CHECK(cgen_state_->module_);
1252 new llvm::GlobalVariable(*cgen_state_->module_,
1255 llvm::GlobalValue::LinkageTypes::InternalLinkage,
1256 constant_values_array);
1260 [element_count, values](
const std::vector<llvm::Value*>& v) {
1263 domain.values_buffer = values;
1279 &group_by_and_aggregate,
1281 &ra_exe_unit](
const std::vector<llvm::Value*>& prev_iters) {
1282 auto& builder = cgen_state_->ir_builder_;
1285 llvm::BasicBlock::Create(cgen_state_->context_,
1286 "range_key_inner_body_exit",
1287 builder.GetInsertBlock()->getParent());
1289 auto range_key_body_bb =
1290 llvm::BasicBlock::Create(cgen_state_->context_,
1291 "range_key_loop_body",
1292 builder.GetInsertBlock()->getParent());
1293 builder.SetInsertPoint(range_key_body_bb);
1302 &group_by_and_aggregate,
1304 &ra_exe_unit](
const std::vector<llvm::Value*>& prev_iters) {
1305 addJoinLoopIterator(prev_iters, join_loops.size());
1306 auto& builder = cgen_state_->ir_builder_;
1307 const auto loop_body_bb =
1308 llvm::BasicBlock::Create(builder.getContext(),
1310 builder.GetInsertBlock()->getParent());
1311 builder.SetInsertPoint(loop_body_bb);
1312 const bool can_return_error =
1313 compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1314 if (can_return_error || cgen_state_->needs_error_check_ ||
1315 eo.with_dynamic_watchdog || eo.allow_runtime_query_interrupt) {
1316 createErrorCheckControlFlow(query_func,
1317 eo.with_dynamic_watchdog,
1318 eo.allow_runtime_query_interrupt,
1321 group_by_and_aggregate.query_infos_);
1323 return loop_body_bb;
1329 builder.SetInsertPoint(range_key_body_bb);
1330 cgen_state_->ir_builder_.CreateBr(body_loops_entry_bb);
1332 builder.SetInsertPoint(body_exit_bb);
1333 return range_key_body_bb;
1335 code_generator.
posArg(
nullptr),
1347 &group_by_and_aggregate,
1349 &ra_exe_unit](
const std::vector<llvm::Value*>& prev_iters) {
1351 addJoinLoopIterator(prev_iters, join_loops.size());
1352 auto& builder = cgen_state_->ir_builder_;
1353 const auto loop_body_bb = llvm::BasicBlock::Create(
1354 builder.getContext(),
"loop_body", builder.GetInsertBlock()->getParent());
1355 builder.SetInsertPoint(loop_body_bb);
1356 const bool can_return_error =
1357 compileBody(ra_exe_unit, group_by_and_aggregate, query_mem_desc, co);
1358 if (can_return_error || cgen_state_->needs_error_check_ ||
1360 createErrorCheckControlFlow(query_func,
1367 return loop_body_bb;
1369 code_generator.
posArg(
nullptr),
1373 CHECK(loops_entry_bb);
1374 cgen_state_->ir_builder_.SetInsertPoint(entry_bb);
1375 cgen_state_->ir_builder_.CreateBr(loops_entry_bb);
1380 const size_t col_width,
1382 const bool translate_null_val,
1383 const int64_t translated_null_val,
1385 std::stack<llvm::BasicBlock*>& array_loops,
1386 const bool thread_mem_shared) {
1388 CHECK_GE(col_width,
sizeof(int32_t));
1390 auto group_key = code_generator.
codegen(group_by_col,
true, co).front();
1391 auto key_to_cache = group_key;
1392 if (dynamic_cast<Analyzer::UOper*>(group_by_col) &&
1393 static_cast<Analyzer::UOper*>(group_by_col)->get_optype() ==
kUNNEST) {
1394 auto preheader = cgen_state_->ir_builder_.GetInsertBlock();
1395 auto array_loop_head = llvm::BasicBlock::Create(cgen_state_->context_,
1397 cgen_state_->current_func_,
1398 preheader->getNextNode());
1400 const auto ret_ty =
get_int_type(32, cgen_state_->context_);
1401 auto array_idx_ptr = cgen_state_->ir_builder_.CreateAlloca(ret_ty);
1402 CHECK(array_idx_ptr);
1403 cgen_state_->ir_builder_.CreateStore(cgen_state_->llInt(int32_t(0)), array_idx_ptr);
1404 const auto arr_expr =
static_cast<Analyzer::UOper*
>(group_by_col)->get_operand();
1406 CHECK(array_ti.is_array());
1407 const auto& elem_ti = array_ti.get_elem_type();
1409 (array_ti.get_size() > 0)
1410 ? cgen_state_->llInt(array_ti.get_size() / elem_ti.get_size())
1411 : cgen_state_->emitExternalCall(
1415 code_generator.
posArg(arr_expr),
1416 cgen_state_->llInt(
log2_bytes(elem_ti.get_logical_size()))});
1417 cgen_state_->ir_builder_.CreateBr(array_loop_head);
1418 cgen_state_->ir_builder_.SetInsertPoint(array_loop_head);
1420 auto array_idx = cgen_state_->ir_builder_.CreateLoad(
1421 array_idx_ptr->getType()->getPointerElementType(), array_idx_ptr);
1422 auto bound_check = cgen_state_->ir_builder_.CreateICmp(
1423 llvm::ICmpInst::ICMP_SLT, array_idx, array_len);
1424 auto array_loop_body = llvm::BasicBlock::Create(
1425 cgen_state_->context_,
"array_loop_body", cgen_state_->current_func_);
1426 cgen_state_->ir_builder_.CreateCondBr(
1429 array_loops.empty() ? diamond_codegen.
orig_cond_false_ : array_loops.top());
1430 cgen_state_->ir_builder_.SetInsertPoint(array_loop_body);
1431 cgen_state_->ir_builder_.CreateStore(
1432 cgen_state_->ir_builder_.CreateAdd(array_idx, cgen_state_->llInt(int32_t(1))),
1435 if (array_ti.get_size() < 0) {
1436 if (array_ti.get_notnull()) {
1437 array_at_fname =
"notnull_" + array_at_fname;
1439 array_at_fname =
"varlen_" + array_at_fname;
1441 const auto ar_ret_ty =
1443 ? (elem_ti.get_type() ==
kDOUBLE
1444 ? llvm::Type::getDoubleTy(cgen_state_->context_)
1445 : llvm::Type::getFloatTy(cgen_state_->context_))
1446 :
get_int_type(elem_ti.get_logical_size() * 8, cgen_state_->context_);
1447 group_key = cgen_state_->emitExternalCall(
1450 {group_key, code_generator.
posArg(arr_expr), array_idx});
1452 elem_ti, isArchMaxwell(co.
device_type), thread_mem_shared)) {
1453 key_to_cache = spillDoubleElement(group_key, ar_ret_ty);
1455 key_to_cache = group_key;
1457 CHECK(array_loop_head);
1458 array_loops.push(array_loop_head);
1460 cgen_state_->group_by_expr_cache_.push_back(key_to_cache);
1461 llvm::Value* orig_group_key{
nullptr};
1462 if (translate_null_val) {
1463 const std::string translator_func_name(
1464 col_width ==
sizeof(int32_t) ?
"translate_null_key_i32_" :
"translate_null_key_");
1465 const auto& ti = group_by_col->get_type_info();
1466 const auto key_type =
get_int_type(ti.get_logical_size() * 8, cgen_state_->context_);
1467 orig_group_key = group_key;
1468 group_key = cgen_state_->emitCall(
1471 static_cast<llvm::Value*
>(
1473 static_cast<llvm::Value*>(llvm::ConstantInt::get(
1474 llvm::Type::getInt64Ty(cgen_state_->context_), translated_null_val))});
1476 group_key = cgen_state_->ir_builder_.CreateBitCast(
1477 cgen_state_->castToTypeIn(group_key, col_width * 8),
1479 if (orig_group_key) {
1480 orig_group_key = cgen_state_->ir_builder_.CreateBitCast(
1481 cgen_state_->castToTypeIn(orig_group_key, col_width * 8),
1484 return {group_key, orig_group_key};
1489 llvm::Value* nullable_lv,
1491 const std::string&
name)
1492 : cgen_state(cgen_state), name(name) {
1497 llvm::Value* is_null_lv{
nullptr};
1498 if (nullable_ti.
is_fp()) {
1500 llvm::FCmpInst::FCMP_OEQ, nullable_lv, cgen_state->
inlineFpNull(nullable_ti));
1502 nullable_lv->getType()->getIntegerBitWidth() == 1) {
1504 llvm::ICmpInst::ICMP_EQ, nullable_lv, cgen_state->
llBool(
true));
1507 llvm::ICmpInst::ICMP_EQ, nullable_lv, cgen_state->
inlineIntNull(nullable_ti));
1511 std::make_unique<DiamondCodegen>(is_null_lv,
executor,
false,
name,
nullptr,
false);
1525 llvm::Value* notnull_lv) {
1528 cgen_state->ir_builder_.CreateBr(nullcheck_bb);
1530 CHECK_EQ(null_lv->getType(), notnull_lv->getType());
1532 cgen_state->ir_builder_.SetInsertPoint(nullcheck_bb);
1534 cgen_state->ir_builder_.CreatePHI(null_lv->getType(), 2,
name +
"_value");
1535 nullcheck_value->addIncoming(notnull_lv, null_check->cond_false_);
1536 nullcheck_value->addIncoming(null_lv, null_check->cond_true_);
1538 null_check.reset(
nullptr);
1539 cgen_state->ir_builder_.SetInsertPoint(nullcheck_bb);
1540 return nullcheck_value;
bool g_enable_left_join_filter_hoisting
NullCheckCodegen(CgenState *cgen_state, Executor *executor, llvm::Value *nullable_lv, const SQLTypeInfo &nullable_ti, const std::string &name="")
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
const Expr * get_partition_count() const
std::vector< llvm::Value * > outer_join_match_found_per_level_
bool is_constant_expr() const
std::unordered_map< size_t, std::vector< std::shared_ptr< Analyzer::Expr > > > left_join_non_hashtable_quals_
llvm::Value * codegenConstantWidthBucketExpr(const Analyzer::WidthBucketExpr *, const CompilationOptions &)
llvm::BasicBlock * nullcheck_bb
llvm::Value * element_count
llvm::Value * values_buffer
#define IS_EQUIVALENCE(X)
llvm::Value * codegenArith(const Analyzer::BinOper *, const CompilationOptions &)
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
bool with_dynamic_watchdog
llvm::IRBuilder ir_builder_
std::function< llvm::BasicBlock *(llvm::BasicBlock *, llvm::BasicBlock *, const std::string &, llvm::Function *, CgenState *)> HoistedFiltersCallback
llvm::Value * posArg(const Analyzer::Expr *) const
std::vector< InputDescriptor > input_descs
bool need_patch_unnest_double(const SQLTypeInfo &ti, const bool is_maxwell, const bool mem_shared)
llvm::ConstantInt * llBool(const bool v) const
virtual std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &)
void set_constant_expr() const
unsigned g_trivial_loop_join_threshold
llvm::Value * codegenArrayAt(const Analyzer::BinOper *, const CompilationOptions &)
HOST DEVICE SQLTypes get_type() const
void setFalseTarget(llvm::BasicBlock *cond_false)
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::vector< llvm::Value * > codegenGeoBinOper(const Analyzer::GeoBinOper *, const CompilationOptions &)
std::shared_ptr< HashJoin > hash_table
double get_bound_val(const Analyzer::Expr *bound_expr) const
bool filter_on_deleted_column
llvm::Function * row_func_
llvm::Value * codegenIsNull(const Analyzer::UOper *, const CompilationOptions &)
SQLOps get_optype() const
std::vector< llvm::Value * > codegenGeoExpr(const Analyzer::GeoExpr *, const CompilationOptions &)
llvm::LLVMContext & context_
llvm::Function * current_func_
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
std::set< shared::TableKey > visitFunctionOper(const Analyzer::FunctionOper *func_expr) const final
#define INJECT_TIMER(DESC)
const JoinQualsPerNestingLevel join_quals
std::vector< llvm::Value * > codegenGeoUOper(const Analyzer::GeoUOper *, const CompilationOptions &)
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
TableIdToNodeMap table_id_to_node_map
llvm::Value * codegenWidthBucketExpr(const Analyzer::WidthBucketExpr *, const CompilationOptions &)
llvm::Value * codegenCastBetweenIntTypes(llvm::Value *operand_lv, const SQLTypeInfo &operand_ti, const SQLTypeInfo &ti, bool upscale=true)
llvm::Value * codegenFunctionOper(const Analyzer::FunctionOper *, const CompilationOptions &)
static llvm::BasicBlock * codegen(const std::vector< JoinLoop > &join_loops, const std::function< llvm::BasicBlock *(const std::vector< llvm::Value * > &)> &body_codegen, llvm::Value *outer_iter, llvm::BasicBlock *exit_bb, CgenState *cgen_state)
void add_qualifier_to_execution_unit(RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< Analyzer::Expr > &qual)
const std::vector< InputTableInfo > & query_infos_
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
std::vector< llvm::Value * > codegenArrayExpr(const Analyzer::ArrayExpr *, const CompilationOptions &)
llvm::BasicBlock * orig_cond_false_
const SQLTypeInfo & get_type_info() const
llvm::Value * codegenUMinus(const Analyzer::UOper *, const CompilationOptions &)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Value * slot_lookup_result
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
const std::vector< InputTableInfo > & query_infos_
Expression class for string functions The "arg" constructor parameter must be an expression that reso...
std::set< shared::TableKey > aggregateResult(const std::set< shared::TableKey > &aggregate, const std::set< shared::TableKey > &next_result) const final
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition ¤t_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
bool isHintRegistered(const QueryHint hint) const
const Expr * get_arg() const
std::set< shared::TableKey > visitBinOper(const Analyzer::BinOper *bin_oper) const final
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::set< shared::TableKey > visitUOper(const Analyzer::UOper *u_oper) const final
llvm::StructType * createStringViewStructType()
int32_t get_partition_count_val() const
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT
llvm::Value * toBool(llvm::Value *)
std::vector< llvm::Value * > codegenGeoColumnVar(const Analyzer::GeoColumnVar *, const bool fetch_columns, const CompilationOptions &co)
llvm::Value * codegenFunctionOperWithCustomTypeHandling(const Analyzer::FunctionOperWithCustomTypeHandling *, const CompilationOptions &)
llvm::Value * codegenCmp(const Analyzer::BinOper *, const CompilationOptions &)
std::list< std::shared_ptr< Analyzer::Expr > > getSimpleQuals() const
const Expr * get_target_value() const
std::list< std::shared_ptr< Analyzer::Expr > > quals
llvm::ConstantInt * llInt(const T v) const
llvm::Value * codegenUnnest(const Analyzer::UOper *, const CompilationOptions &)
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
llvm::Value * finalize(llvm::Value *null_lv, llvm::Value *notnull_lv)
bool can_skip_out_of_bound_check() const
llvm::Value * codegenLogical(const Analyzer::BinOper *, const CompilationOptions &)
void check_if_loop_join_is_allowed(RelAlgExecutionUnit &ra_exe_unit, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, const size_t level_idx, const std::string &fail_reason)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
llvm::ConstantInt * ll_bool(const bool v, llvm::LLVMContext &context)
size_t loop_join_inner_table_max_num_rows
llvm::Value * codegenCast(const Analyzer::UOper *, const CompilationOptions &)
uint32_t log2_bytes(const uint32_t bytes)
std::string numeric_type_name(const SQLTypeInfo &ti)
bool is_dict_encoded_string() const
void skip_out_of_bound_check() const
void redeclareFilterFunction()
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
const Expr * get_lower_bound() const
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
const Expr * get_upper_bound() const
bool allow_runtime_query_interrupt
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)
SQLOps get_optype() const
std::unique_ptr< DiamondCodegen > null_check
std::set< shared::TableKey > visitColumnVar(const Analyzer::ColumnVar *col_expr) const final
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
HashTableBuildDagMap hash_table_build_plan_dag
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
void check_valid_join_qual(std::shared_ptr< Analyzer::BinOper > &bin_oper)
Executor * executor() const
size_t get_loop_join_size(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)