21 #include <llvm/IR/Constants.h>
22 #include <llvm/IR/IRBuilder.h>
23 #include <llvm/IR/Instructions.h>
24 #include <llvm/IR/Verifier.h>
34 auto type = value->getType();
38 return pointer_type->getElementType();
41 template <
class Attributes>
45 std::vector<Type*> func_args;
46 FunctionType* func_type = FunctionType::get(
47 IntegerType::get(mod->getContext(), 32),
51 auto func_ptr = mod->getFunction(name);
53 func_ptr = Function::Create(
55 GlobalValue::ExternalLinkage,
58 func_ptr->setCallingConv(CallingConv::C);
63 SmallVector<Attributes, 4> Attrs;
67 PAS = Attributes::get(mod->getContext(), ~0U, B);
71 func_pal = Attributes::get(mod->getContext(), Attrs);
73 func_ptr->setAttributes(func_pal);
78 template <
class Attributes>
80 return default_func_builder<Attributes>(mod,
"pos_start");
83 template <
class Attributes>
85 return default_func_builder<Attributes>(mod,
"group_buff_idx");
88 template <
class Attributes>
92 std::vector<Type*> func_args;
93 FunctionType* func_type = FunctionType::get(
94 IntegerType::get(mod->getContext(), 32),
98 auto func_ptr = mod->getFunction(
"pos_step");
100 func_ptr = Function::Create(
102 GlobalValue::ExternalLinkage,
105 func_ptr->setCallingConv(CallingConv::C);
110 SmallVector<Attributes, 4> Attrs;
114 PAS = Attributes::get(mod->getContext(), ~0U, B);
117 Attrs.push_back(PAS);
118 func_pal = Attributes::get(mod->getContext(), Attrs);
120 func_ptr->setAttributes(func_pal);
125 template <
class Attributes>
127 const size_t aggr_col_count,
128 const bool hoist_literals) {
129 using namespace llvm;
131 std::vector<Type*> func_args;
132 auto i8_type = IntegerType::get(mod->getContext(), 8);
133 auto i32_type = IntegerType::get(mod->getContext(), 32);
134 auto i64_type = IntegerType::get(mod->getContext(), 64);
135 auto pi32_type = PointerType::get(i32_type, 0);
136 auto pi64_type = PointerType::get(i64_type, 0);
138 if (aggr_col_count) {
139 for (
size_t i = 0; i < aggr_col_count; ++i) {
140 func_args.push_back(pi64_type);
143 func_args.push_back(pi64_type);
144 func_args.push_back(pi32_type);
145 func_args.push_back(pi32_type);
146 func_args.push_back(pi32_type);
147 func_args.push_back(pi32_type);
150 func_args.push_back(pi64_type);
152 func_args.push_back(i64_type);
153 func_args.push_back(pi64_type);
154 func_args.push_back(pi64_type);
155 if (hoist_literals) {
156 func_args.push_back(PointerType::get(i8_type, 0));
158 FunctionType* func_type = FunctionType::get(
163 std::string func_name{
"row_process"};
164 auto func_ptr = mod->getFunction(func_name);
167 func_ptr = Function::Create(
169 GlobalValue::ExternalLinkage,
172 func_ptr->setCallingConv(CallingConv::C);
176 SmallVector<Attributes, 4> Attrs;
180 PAS = Attributes::get(mod->getContext(), ~0U, B);
183 Attrs.push_back(PAS);
184 func_pal = Attributes::get(mod->getContext(), Attrs);
186 func_ptr->setAttributes(func_pal);
194 template <
class Attributes>
197 const size_t aggr_col_count,
198 const bool hoist_literals,
199 const bool is_estimate_query,
201 using namespace llvm;
203 auto func_pos_start = pos_start<Attributes>(mod);
204 CHECK(func_pos_start);
205 auto func_pos_step = pos_step<Attributes>(mod);
206 CHECK(func_pos_step);
207 auto func_group_buff_idx = group_buff_idx<Attributes>(mod);
208 CHECK(func_group_buff_idx);
209 auto func_row_process = row_process<Attributes>(
210 mod, is_estimate_query ? 1 : aggr_col_count, hoist_literals);
211 CHECK(func_row_process);
213 auto i8_type = IntegerType::get(mod->getContext(), 8);
214 auto i32_type = IntegerType::get(mod->getContext(), 32);
215 auto i64_type = IntegerType::get(mod->getContext(), 64);
216 auto pi8_type = PointerType::get(i8_type, 0);
217 auto ppi8_type = PointerType::get(pi8_type, 0);
218 auto pi32_type = PointerType::get(i32_type, 0);
219 auto pi64_type = PointerType::get(i64_type, 0);
220 auto ppi64_type = PointerType::get(pi64_type, 0);
222 std::vector<Type*> query_args;
223 query_args.push_back(ppi8_type);
224 if (hoist_literals) {
225 query_args.push_back(pi8_type);
227 query_args.push_back(pi64_type);
228 query_args.push_back(pi64_type);
229 query_args.push_back(pi32_type);
231 query_args.push_back(pi64_type);
232 query_args.push_back(ppi64_type);
233 query_args.push_back(i32_type);
234 query_args.push_back(pi64_type);
235 query_args.push_back(pi32_type);
236 query_args.push_back(pi32_type);
238 FunctionType* query_func_type = FunctionType::get(
239 Type::getVoidTy(mod->getContext()),
243 std::string query_template_name{
"query_template"};
244 auto query_func_ptr = mod->getFunction(query_template_name);
245 CHECK(!query_func_ptr);
247 query_func_ptr = Function::Create(
249 GlobalValue::ExternalLinkage,
252 query_func_ptr->setCallingConv(CallingConv::C);
254 Attributes query_func_pal;
256 SmallVector<Attributes, 4> Attrs;
260 B.addAttribute(Attribute::NoCapture);
261 PAS = Attributes::get(mod->getContext(), 1U, B);
264 Attrs.push_back(PAS);
267 B.addAttribute(Attribute::NoCapture);
268 PAS = Attributes::get(mod->getContext(), 2U, B);
271 Attrs.push_back(PAS);
275 B.addAttribute(Attribute::NoCapture);
276 Attrs.push_back(Attributes::get(mod->getContext(), 3U, B));
281 B.addAttribute(Attribute::NoCapture);
282 Attrs.push_back(Attributes::get(mod->getContext(), 4U, B));
285 Attrs.push_back(PAS);
287 query_func_pal = Attributes::get(mod->getContext(), Attrs);
289 query_func_ptr->setAttributes(query_func_pal);
291 Function::arg_iterator query_arg_it = query_func_ptr->arg_begin();
292 Value* byte_stream = &*query_arg_it;
293 byte_stream->setName(
"byte_stream");
294 Value* literals{
nullptr};
295 if (hoist_literals) {
296 literals = &*(++query_arg_it);
297 literals->setName(
"literals");
299 Value* row_count_ptr = &*(++query_arg_it);
300 row_count_ptr->setName(
"row_count_ptr");
301 Value* frag_row_off_ptr = &*(++query_arg_it);
302 frag_row_off_ptr->setName(
"frag_row_off_ptr");
303 Value* max_matched_ptr = &*(++query_arg_it);
304 max_matched_ptr->setName(
"max_matched_ptr");
305 Value* agg_init_val = &*(++query_arg_it);
306 agg_init_val->setName(
"agg_init_val");
307 Value* out = &*(++query_arg_it);
309 Value* frag_idx = &*(++query_arg_it);
310 frag_idx->setName(
"frag_idx");
311 Value* join_hash_tables = &*(++query_arg_it);
312 join_hash_tables->setName(
"join_hash_tables");
313 Value* total_matched = &*(++query_arg_it);
314 total_matched->setName(
"total_matched");
315 Value* error_code = &*(++query_arg_it);
316 error_code->setName(
"error_code");
318 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
320 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
321 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".for.body", query_func_ptr, 0);
323 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
324 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
327 std::vector<Value*> result_ptr_vec;
328 llvm::CallInst* smem_output_buffer{
nullptr};
329 if (!is_estimate_query) {
330 for (
size_t i = 0; i < aggr_col_count; ++i) {
331 auto result_ptr =
new AllocaInst(i64_type, 0,
"result", bb_entry);
333 result_ptr_vec.push_back(result_ptr);
336 auto init_smem_func = mod->getFunction(
"init_shared_mem");
337 CHECK(init_smem_func);
340 smem_output_buffer = CallInst::Create(
342 std::vector<llvm::Value*>{
344 llvm::ConstantInt::get(i32_type, aggr_col_count *
sizeof(int64_t))},
356 row_count->setName(
"row_count");
357 std::vector<Value*> agg_init_val_vec;
358 if (!is_estimate_query) {
359 for (
size_t i = 0; i < aggr_col_count; ++i) {
360 auto idx_lv = ConstantInt::get(i32_type, i);
362 GetElementPtrInst::CreateInBounds(agg_init_val, idx_lv,
"", bb_entry);
363 auto agg_init_val =
new LoadInst(
366 agg_init_val_vec.push_back(agg_init_val);
367 auto init_val_st =
new StoreInst(agg_init_val, result_ptr_vec[i],
false, bb_entry);
372 CallInst*
pos_start = CallInst::Create(func_pos_start,
"pos_start", bb_entry);
373 pos_start->setCallingConv(CallingConv::C);
375 Attributes pos_start_pal;
378 CallInst*
pos_step = CallInst::Create(func_pos_step,
"pos_step", bb_entry);
379 pos_step->setCallingConv(CallingConv::C);
381 Attributes pos_step_pal;
382 pos_step->setAttributes(pos_step_pal);
385 if (!is_estimate_query) {
386 group_buff_idx = CallInst::Create(func_group_buff_idx,
"group_buff_idx", bb_entry);
389 Attributes group_buff_idx_pal;
393 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
394 ICmpInst* enter_or_not =
395 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
396 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
399 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
400 BranchInst::Create(bb_forbody, bb_preheader);
404 PHINode* pos = PHINode::Create(i64_type, 2,
"pos", bb_forbody);
405 pos->addIncoming(pos_start_i64, bb_preheader);
406 pos->addIncoming(pos_inc_pre, bb_forbody);
408 std::vector<Value*> row_process_params;
409 row_process_params.insert(
410 row_process_params.end(), result_ptr_vec.begin(), result_ptr_vec.end());
411 if (is_estimate_query) {
412 row_process_params.push_back(
415 row_process_params.push_back(agg_init_val);
416 row_process_params.push_back(pos);
417 row_process_params.push_back(frag_row_off_ptr);
418 row_process_params.push_back(row_count_ptr);
419 if (hoist_literals) {
421 row_process_params.push_back(literals);
424 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
427 Attributes row_process_pal;
431 BinaryOperator::CreateNSW(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
432 ICmpInst* loop_or_exit =
433 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
434 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
437 std::vector<Instruction*> result_vec_pre;
438 if (!is_estimate_query) {
439 for (
size_t i = 0; i < aggr_col_count; ++i) {
446 result_vec_pre.push_back(
result);
450 BranchInst::Create(bb_exit, bb_crit_edge);
464 if (!is_estimate_query) {
465 std::vector<PHINode*> result_vec;
466 for (int64_t i = aggr_col_count - 1; i >= 0; --i) {
468 PHINode::Create(IntegerType::get(mod->getContext(), 64), 2,
"", bb_exit);
469 result->addIncoming(result_vec_pre[i], bb_crit_edge);
470 result->addIncoming(agg_init_val_vec[i], bb_entry);
471 result_vec.insert(result_vec.begin(),
result);
474 for (
size_t i = 0; i < aggr_col_count; ++i) {
475 auto col_idx = ConstantInt::get(i32_type, i);
478 GetElementPtrInst::CreateInBounds(smem_output_buffer, col_idx,
"", bb_exit);
481 auto agg_func = mod->getFunction(
"agg_sum_shared");
484 agg_func, std::vector<llvm::Value*>{target_addr, result_vec[i]},
"", bb_exit);
486 auto out_gep = GetElementPtrInst::CreateInBounds(out, col_idx,
"", bb_exit);
490 auto slot_idx = BinaryOperator::CreateAdd(
492 BinaryOperator::CreateMul(frag_idx,
pos_step,
"", bb_exit),
496 GetElementPtrInst::CreateInBounds(col_buffer, slot_idx,
"", bb_exit);
497 StoreInst* result_st =
new StoreInst(result_vec[i], target_addr,
false, bb_exit);
503 auto sync_thread_func = mod->getFunction(
"sync_threadblock");
504 CHECK(sync_thread_func);
505 CallInst::Create(sync_thread_func, std::vector<llvm::Value*>{},
"", bb_exit);
506 auto reduce_smem_to_gmem_func = mod->getFunction(
"write_back_non_grouped_agg");
507 CHECK(reduce_smem_to_gmem_func);
511 for (
size_t i = 0; i < aggr_col_count; i++) {
512 auto out_gep = GetElementPtrInst::CreateInBounds(
513 out, ConstantInt::get(i32_type, i),
"", bb_exit);
520 reduce_smem_to_gmem_func,
521 std::vector<llvm::Value*>{
522 smem_output_buffer, gmem_output_buffer, ConstantInt::get(i32_type, i)},
529 ReturnInst::Create(mod->getContext(), bb_exit);
532 pos_inc_pre->replaceAllUsesWith(pos_inc);
535 if (verifyFunction(*query_func_ptr)) {
536 LOG(
FATAL) <<
"Generated invalid code. ";
542 template <
class Attributes>
545 const bool hoist_literals,
548 const bool check_scan_limit,
553 using namespace llvm;
555 auto func_pos_start = pos_start<Attributes>(mod);
556 CHECK(func_pos_start);
557 auto func_pos_step = pos_step<Attributes>(mod);
558 CHECK(func_pos_step);
559 auto func_group_buff_idx = group_buff_idx<Attributes>(mod);
560 CHECK(func_group_buff_idx);
561 auto func_row_process = row_process<Attributes>(mod, 0, hoist_literals);
562 CHECK(func_row_process);
564 ? mod->getFunction(
"init_shared_mem")
565 : mod->getFunction(
"init_shared_mem_nop");
566 CHECK(func_init_shared_mem);
568 auto func_write_back = mod->getFunction(
"write_back_nop");
569 CHECK(func_write_back);
571 auto i32_type = IntegerType::get(mod->getContext(), 32);
572 auto i64_type = IntegerType::get(mod->getContext(), 64);
573 auto pi8_type = PointerType::get(IntegerType::get(mod->getContext(), 8), 0);
574 auto pi32_type = PointerType::get(i32_type, 0);
575 auto pi64_type = PointerType::get(i64_type, 0);
576 auto ppi64_type = PointerType::get(pi64_type, 0);
577 auto ppi8_type = PointerType::get(pi8_type, 0);
579 std::vector<Type*> query_args;
580 query_args.push_back(ppi8_type);
581 if (hoist_literals) {
582 query_args.push_back(pi8_type);
584 query_args.push_back(pi64_type);
585 query_args.push_back(pi64_type);
586 query_args.push_back(pi32_type);
587 query_args.push_back(pi64_type);
589 query_args.push_back(ppi64_type);
590 query_args.push_back(i32_type);
591 query_args.push_back(pi64_type);
592 query_args.push_back(pi32_type);
593 query_args.push_back(pi32_type);
595 FunctionType* query_func_type = FunctionType::get(
596 Type::getVoidTy(mod->getContext()),
600 std::string query_name{
"query_group_by_template"};
601 auto query_func_ptr = mod->getFunction(query_name);
602 CHECK(!query_func_ptr);
604 query_func_ptr = Function::Create(
606 GlobalValue::ExternalLinkage,
607 "query_group_by_template",
610 query_func_ptr->setCallingConv(CallingConv::C);
612 Attributes query_func_pal;
614 SmallVector<Attributes, 4> Attrs;
618 B.addAttribute(Attribute::ReadNone);
619 B.addAttribute(Attribute::NoCapture);
620 PAS = Attributes::get(mod->getContext(), 1U, B);
623 Attrs.push_back(PAS);
626 B.addAttribute(Attribute::ReadOnly);
627 B.addAttribute(Attribute::NoCapture);
628 PAS = Attributes::get(mod->getContext(), 2U, B);
631 Attrs.push_back(PAS);
634 B.addAttribute(Attribute::ReadNone);
635 B.addAttribute(Attribute::NoCapture);
636 PAS = Attributes::get(mod->getContext(), 3U, B);
639 Attrs.push_back(PAS);
642 B.addAttribute(Attribute::ReadOnly);
643 B.addAttribute(Attribute::NoCapture);
644 PAS = Attributes::get(mod->getContext(), 4U, B);
647 Attrs.push_back(PAS);
650 B.addAttribute(Attribute::UWTable);
651 PAS = Attributes::get(mod->getContext(), ~0U, B);
654 Attrs.push_back(PAS);
656 query_func_pal = Attributes::get(mod->getContext(), Attrs);
658 query_func_ptr->setAttributes(query_func_pal);
660 Function::arg_iterator query_arg_it = query_func_ptr->arg_begin();
661 Value* byte_stream = &*query_arg_it;
662 byte_stream->setName(
"byte_stream");
663 Value* literals{
nullptr};
664 if (hoist_literals) {
665 literals = &*(++query_arg_it);
667 literals->setName(
"literals");
669 Value* row_count_ptr = &*(++query_arg_it);
670 row_count_ptr->setName(
"row_count_ptr");
671 Value* frag_row_off_ptr = &*(++query_arg_it);
672 frag_row_off_ptr->setName(
"frag_row_off_ptr");
673 Value* max_matched_ptr = &*(++query_arg_it);
674 max_matched_ptr->setName(
"max_matched_ptr");
675 Value* agg_init_val = &*(++query_arg_it);
676 agg_init_val->setName(
"agg_init_val");
677 Value* group_by_buffers = &*(++query_arg_it);
678 group_by_buffers->setName(
"group_by_buffers");
679 Value* frag_idx = &*(++query_arg_it);
680 frag_idx->setName(
"frag_idx");
681 Value* join_hash_tables = &*(++query_arg_it);
682 join_hash_tables->setName(
"join_hash_tables");
683 Value* total_matched = &*(++query_arg_it);
684 total_matched->setName(
"total_matched");
685 Value* error_code = &*(++query_arg_it);
686 error_code->setName(
"error_code");
688 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
690 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
691 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".forbody", query_func_ptr, 0);
693 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
694 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
697 LoadInst* row_count =
new LoadInst(
700 row_count->setName(
"row_count");
702 LoadInst* max_matched =
new LoadInst(
706 auto crt_matched_ptr =
new AllocaInst(i32_type, 0,
"crt_matched", bb_entry);
707 auto old_total_matched_ptr =
new AllocaInst(i32_type, 0,
"old_total_matched", bb_entry);
708 CallInst*
pos_start = CallInst::Create(func_pos_start,
"", bb_entry);
709 pos_start->setCallingConv(CallingConv::C);
711 Attributes pos_start_pal;
714 CallInst*
pos_step = CallInst::Create(func_pos_step,
"", bb_entry);
715 pos_step->setCallingConv(CallingConv::C);
717 Attributes pos_step_pal;
718 pos_step->setAttributes(pos_step_pal);
720 CallInst*
group_buff_idx = CallInst::Create(func_group_buff_idx,
"", bb_entry);
723 Attributes group_buff_idx_pal;
726 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
727 const PointerType* Ty = dyn_cast<PointerType>(group_by_buffers->getType());
729 GetElementPtrInst* group_by_buffers_gep = GetElementPtrInst::Create(
730 Ty->getElementType(), group_by_buffers,
group_buff_idx,
"", bb_entry);
732 group_by_buffers_gep,
736 col_buffer->setName(
"col_buffer");
739 llvm::ConstantInt* shared_mem_bytes_lv =
741 llvm::CallInst* result_buffer =
742 CallInst::Create(func_init_shared_mem,
743 std::vector<llvm::Value*>{col_buffer, shared_mem_bytes_lv},
748 ICmpInst* enter_or_not =
749 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
750 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
753 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
754 BranchInst::Create(bb_forbody, bb_preheader);
758 PHINode* pos = PHINode::Create(i64_type, check_scan_limit ? 3 : 2,
"pos", bb_forbody);
760 std::vector<Value*> row_process_params;
761 row_process_params.push_back(result_buffer);
762 row_process_params.push_back(crt_matched_ptr);
763 row_process_params.push_back(total_matched);
764 row_process_params.push_back(old_total_matched_ptr);
765 row_process_params.push_back(max_matched_ptr);
766 row_process_params.push_back(agg_init_val);
767 row_process_params.push_back(pos);
768 row_process_params.push_back(frag_row_off_ptr);
769 row_process_params.push_back(row_count_ptr);
770 if (hoist_literals) {
772 row_process_params.push_back(literals);
774 if (check_scan_limit) {
775 new StoreInst(ConstantInt::get(IntegerType::get(mod->getContext(), 32), 0),
780 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
783 Attributes row_process_pal;
788 auto func_sync_warp_protected = mod->getFunction(
"sync_warp_protected");
789 CHECK(func_sync_warp_protected);
790 CallInst::Create(func_sync_warp_protected,
791 std::vector<llvm::Value*>{pos, row_count},
797 BinaryOperator::Create(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
798 ICmpInst* loop_or_exit =
799 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
800 if (check_scan_limit) {
806 auto filter_match = BasicBlock::Create(
807 mod->getContext(),
"filter_match", query_func_ptr, bb_crit_edge);
808 llvm::Value* new_total_matched =
810 old_total_matched_ptr,
815 BinaryOperator::CreateAdd(new_total_matched, crt_matched,
"", filter_match);
816 CHECK(new_total_matched);
817 ICmpInst* limit_not_reached =
new ICmpInst(*filter_match,
821 "limit_not_reached");
825 BinaryOperator::Create(
826 BinaryOperator::And, loop_or_exit, limit_not_reached,
"", filter_match),
828 auto filter_nomatch = BasicBlock::Create(
829 mod->getContext(),
"filter_nomatch", query_func_ptr, bb_crit_edge);
830 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, filter_nomatch);
831 ICmpInst* crt_matched_nz =
new ICmpInst(
832 *bb_forbody, ICmpInst::ICMP_NE, crt_matched, ConstantInt::get(i32_type, 0),
"");
833 BranchInst::Create(filter_match, filter_nomatch, crt_matched_nz, bb_forbody);
834 pos->addIncoming(pos_start_i64, bb_preheader);
835 pos->addIncoming(pos_pre, filter_match);
836 pos->addIncoming(pos_pre, filter_nomatch);
838 pos->addIncoming(pos_start_i64, bb_preheader);
839 pos->addIncoming(pos_pre, bb_forbody);
840 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
844 BranchInst::Create(bb_exit, bb_crit_edge);
847 CallInst::Create(func_write_back,
848 std::vector<Value*>{col_buffer, result_buffer, shared_mem_bytes_lv},
852 ReturnInst::Create(mod->getContext(), bb_exit);
855 pos_pre->replaceAllUsesWith(pos_inc);
858 if (verifyFunction(*query_func_ptr, &llvm::errs())) {
859 LOG(
FATAL) <<
"Generated invalid code. ";
866 llvm::Module* module,
867 const size_t aggr_col_count,
868 const bool hoist_literals,
869 const bool is_estimate_query,
871 return query_template_impl<llvm::AttributeList>(
872 module, aggr_col_count, hoist_literals, is_estimate_query, gpu_smem_context);
875 llvm::Module* module,
876 const bool hoist_literals,
879 const bool check_scan_limit,
881 return query_group_by_template_impl<llvm::AttributeList>(module,
std::tuple< llvm::Function *, llvm::CallInst * > query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context)
std::tuple< llvm::Function *, llvm::CallInst * > query_group_by_template_impl(llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context)
llvm::Function * pos_start(llvm::Module *mod)
size_t getSharedMemorySize() const
Type pointer_type(const Type pointee)
#define LLVM_ALIGN(alignment)
bool isSharedMemoryUsed() const
llvm::Function * group_buff_idx(llvm::Module *mod)
llvm::Function * pos_step(llvm::Module *mod)
llvm::Function * default_func_builder(llvm::Module *mod, const std::string &name)
std::tuple< llvm::Function *, llvm::CallInst * > query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context)
bool isWarpSyncRequired(const ExecutorDeviceType) const
std::tuple< llvm::Function *, llvm::CallInst * > query_template_impl(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context)
llvm::Type * get_pointer_element_type(llvm::Value *value)
llvm::Function * row_process(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals)