20 #include <llvm/IR/Constants.h>
21 #include <llvm/IR/Instructions.h>
22 #include <llvm/IR/Verifier.h>
30 template <
class Attributes>
34 std::vector<Type*> func_args;
35 FunctionType* func_type = FunctionType::get(
36 IntegerType::get(mod->getContext(), 32),
40 auto func_ptr = mod->getFunction(name);
42 func_ptr = Function::Create(
44 GlobalValue::ExternalLinkage,
47 func_ptr->setCallingConv(CallingConv::C);
52 SmallVector<Attributes, 4> Attrs;
56 PAS = Attributes::get(mod->getContext(), ~0U, B);
60 func_pal = Attributes::get(mod->getContext(), Attrs);
62 func_ptr->setAttributes(func_pal);
67 template <
class Attributes>
69 return default_func_builder<Attributes>(mod,
"pos_start");
72 template <
class Attributes>
74 return default_func_builder<Attributes>(mod,
"group_buff_idx");
77 template <
class Attributes>
81 std::vector<Type*> func_args;
82 FunctionType* func_type = FunctionType::get(
83 IntegerType::get(mod->getContext(), 32),
87 auto func_ptr = mod->getFunction(
"pos_step");
89 func_ptr = Function::Create(
91 GlobalValue::ExternalLinkage,
94 func_ptr->setCallingConv(CallingConv::C);
99 SmallVector<Attributes, 4> Attrs;
103 PAS = Attributes::get(mod->getContext(), ~0U, B);
106 Attrs.push_back(PAS);
107 func_pal = Attributes::get(mod->getContext(), Attrs);
109 func_ptr->setAttributes(func_pal);
114 template <
class Attributes>
116 const size_t aggr_col_count,
117 const bool hoist_literals) {
118 using namespace llvm;
120 std::vector<Type*> func_args;
121 auto i8_type = IntegerType::get(mod->getContext(), 8);
122 auto i32_type = IntegerType::get(mod->getContext(), 32);
123 auto i64_type = IntegerType::get(mod->getContext(), 64);
124 auto pi32_type = PointerType::get(i32_type, 0);
125 auto pi64_type = PointerType::get(i64_type, 0);
127 if (aggr_col_count) {
128 for (
size_t i = 0; i < aggr_col_count; ++i) {
129 func_args.push_back(pi64_type);
132 func_args.push_back(pi64_type);
133 func_args.push_back(pi32_type);
134 func_args.push_back(pi32_type);
135 func_args.push_back(pi32_type);
136 func_args.push_back(pi32_type);
139 func_args.push_back(pi64_type);
141 func_args.push_back(i64_type);
142 func_args.push_back(pi64_type);
143 func_args.push_back(pi64_type);
144 if (hoist_literals) {
145 func_args.push_back(PointerType::get(i8_type, 0));
147 FunctionType* func_type = FunctionType::get(
152 std::string func_name{
"row_process"};
153 auto func_ptr = mod->getFunction(func_name);
156 func_ptr = Function::Create(
158 GlobalValue::ExternalLinkage,
161 func_ptr->setCallingConv(CallingConv::C);
165 SmallVector<Attributes, 4> Attrs;
169 PAS = Attributes::get(mod->getContext(), ~0U, B);
172 Attrs.push_back(PAS);
173 func_pal = Attributes::get(mod->getContext(), Attrs);
175 func_ptr->setAttributes(func_pal);
183 template <
class Attributes>
185 const size_t aggr_col_count,
186 const bool hoist_literals,
187 const bool is_estimate_query) {
188 using namespace llvm;
190 auto func_pos_start = pos_start<Attributes>(mod);
191 CHECK(func_pos_start);
192 auto func_pos_step = pos_step<Attributes>(mod);
193 CHECK(func_pos_step);
194 auto func_group_buff_idx = group_buff_idx<Attributes>(mod);
195 CHECK(func_group_buff_idx);
196 auto func_row_process = row_process<Attributes>(
197 mod, is_estimate_query ? 1 : aggr_col_count, hoist_literals);
198 CHECK(func_row_process);
200 auto i8_type = IntegerType::get(mod->getContext(), 8);
201 auto i32_type = IntegerType::get(mod->getContext(), 32);
202 auto i64_type = IntegerType::get(mod->getContext(), 64);
203 auto pi8_type = PointerType::get(i8_type, 0);
204 auto ppi8_type = PointerType::get(pi8_type, 0);
205 auto pi32_type = PointerType::get(i32_type, 0);
206 auto pi64_type = PointerType::get(i64_type, 0);
207 auto ppi64_type = PointerType::get(pi64_type, 0);
209 std::vector<Type*> query_args;
210 query_args.push_back(ppi8_type);
211 if (hoist_literals) {
212 query_args.push_back(pi8_type);
214 query_args.push_back(pi64_type);
215 query_args.push_back(pi64_type);
216 query_args.push_back(pi32_type);
218 query_args.push_back(pi64_type);
219 query_args.push_back(ppi64_type);
220 query_args.push_back(i32_type);
221 query_args.push_back(pi64_type);
222 query_args.push_back(pi32_type);
223 query_args.push_back(pi32_type);
225 FunctionType* query_func_type = FunctionType::get(
226 Type::getVoidTy(mod->getContext()),
230 std::string query_template_name{
"query_template"};
231 auto query_func_ptr = mod->getFunction(query_template_name);
232 CHECK(!query_func_ptr);
234 query_func_ptr = Function::Create(
236 GlobalValue::ExternalLinkage,
239 query_func_ptr->setCallingConv(CallingConv::C);
241 Attributes query_func_pal;
243 SmallVector<Attributes, 4> Attrs;
247 B.addAttribute(Attribute::NoCapture);
248 PAS = Attributes::get(mod->getContext(), 1U, B);
251 Attrs.push_back(PAS);
254 B.addAttribute(Attribute::NoCapture);
255 PAS = Attributes::get(mod->getContext(), 2U, B);
258 Attrs.push_back(PAS);
262 B.addAttribute(Attribute::NoCapture);
263 Attrs.push_back(Attributes::get(mod->getContext(), 3U, B));
268 B.addAttribute(Attribute::NoCapture);
269 Attrs.push_back(Attributes::get(mod->getContext(), 4U, B));
272 Attrs.push_back(PAS);
274 query_func_pal = Attributes::get(mod->getContext(), Attrs);
276 query_func_ptr->setAttributes(query_func_pal);
278 Function::arg_iterator query_arg_it = query_func_ptr->arg_begin();
279 Value* byte_stream = &*query_arg_it;
280 byte_stream->setName(
"byte_stream");
282 if (hoist_literals) {
286 Value* row_count_ptr = &*(++query_arg_it);
287 row_count_ptr->setName(
"row_count_ptr");
288 Value* frag_row_off_ptr = &*(++query_arg_it);
289 frag_row_off_ptr->setName(
"frag_row_off_ptr");
290 Value* max_matched_ptr = &*(++query_arg_it);
291 max_matched_ptr->setName(
"max_matched_ptr");
292 Value* agg_init_val = &*(++query_arg_it);
293 agg_init_val->setName(
"agg_init_val");
300 Value* total_matched = &*(++query_arg_it);
301 total_matched->setName(
"total_matched");
305 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
307 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
308 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".for.body", query_func_ptr, 0);
310 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
311 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
314 std::vector<Value*> result_ptr_vec;
315 if (!is_estimate_query) {
316 for (
size_t i = 0; i < aggr_col_count; ++i) {
317 auto result_ptr =
new AllocaInst(i64_type, 0,
"result", bb_entry);
318 result_ptr->setAlignment(8);
319 result_ptr_vec.push_back(result_ptr);
323 LoadInst* row_count =
new LoadInst(row_count_ptr,
"row_count",
false, bb_entry);
324 row_count->setAlignment(8);
325 row_count->setName(
"row_count");
326 std::vector<Value*> agg_init_val_vec;
327 if (!is_estimate_query) {
328 for (
size_t i = 0; i < aggr_col_count; ++i) {
329 auto idx_lv = ConstantInt::get(i32_type, i);
331 GetElementPtrInst::CreateInBounds(agg_init_val, idx_lv,
"", bb_entry);
332 auto agg_init_val =
new LoadInst(agg_init_gep,
"",
false, bb_entry);
333 agg_init_val->setAlignment(8);
334 agg_init_val_vec.push_back(agg_init_val);
335 auto init_val_st =
new StoreInst(agg_init_val, result_ptr_vec[i],
false, bb_entry);
336 init_val_st->setAlignment(8);
340 CallInst*
pos_start = CallInst::Create(func_pos_start,
"pos_start", bb_entry);
341 pos_start->setCallingConv(CallingConv::C);
343 Attributes pos_start_pal;
346 CallInst*
pos_step = CallInst::Create(func_pos_step,
"pos_step", bb_entry);
347 pos_step->setCallingConv(CallingConv::C);
349 Attributes pos_step_pal;
350 pos_step->setAttributes(pos_step_pal);
353 if (!is_estimate_query) {
354 group_buff_idx = CallInst::Create(func_group_buff_idx,
"group_buff_idx", bb_entry);
357 Attributes group_buff_idx_pal;
361 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
362 ICmpInst* enter_or_not =
363 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
364 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
367 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
368 BranchInst::Create(bb_forbody, bb_preheader);
372 PHINode* pos = PHINode::Create(i64_type, 2,
"pos", bb_forbody);
373 pos->addIncoming(pos_start_i64, bb_preheader);
374 pos->addIncoming(pos_inc_pre, bb_forbody);
376 std::vector<Value*> row_process_params;
377 row_process_params.insert(
378 row_process_params.end(), result_ptr_vec.begin(), result_ptr_vec.end());
379 if (is_estimate_query) {
380 row_process_params.push_back(
new LoadInst(
out,
"",
false, bb_forbody));
382 row_process_params.push_back(agg_init_val);
383 row_process_params.push_back(pos);
384 row_process_params.push_back(frag_row_off_ptr);
385 row_process_params.push_back(row_count_ptr);
386 if (hoist_literals) {
388 row_process_params.push_back(
literals);
391 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
394 Attributes row_process_pal;
398 BinaryOperator::CreateNSW(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
399 ICmpInst* loop_or_exit =
400 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
401 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
404 std::vector<Instruction*> result_vec_pre;
405 if (!is_estimate_query) {
406 for (
size_t i = 0; i < aggr_col_count; ++i) {
407 auto result =
new LoadInst(result_ptr_vec[i],
".pre.result",
false, bb_crit_edge);
409 result_vec_pre.push_back(
result);
413 BranchInst::Create(bb_exit, bb_crit_edge);
416 std::vector<PHINode*> result_vec;
417 if (!is_estimate_query) {
418 for (int64_t i = aggr_col_count - 1; i >= 0; --i) {
420 PHINode::Create(IntegerType::get(mod->getContext(), 64), 2,
"", bb_exit);
421 result->addIncoming(result_vec_pre[i], bb_crit_edge);
422 result->addIncoming(agg_init_val_vec[i], bb_entry);
423 result_vec.insert(result_vec.begin(),
result);
427 if (!is_estimate_query) {
428 for (
size_t i = 0; i < aggr_col_count; ++i) {
429 auto col_idx = ConstantInt::get(i32_type, i);
430 auto out_gep = GetElementPtrInst::CreateInBounds(
out, col_idx,
"", bb_exit);
431 auto col_buffer =
new LoadInst(out_gep,
"",
false, bb_exit);
432 col_buffer->setAlignment(8);
433 auto slot_idx = BinaryOperator::CreateAdd(
439 GetElementPtrInst::CreateInBounds(col_buffer, slot_idx,
"", bb_exit);
440 StoreInst* result_st =
new StoreInst(result_vec[i], target_addr,
false, bb_exit);
441 result_st->setAlignment(8);
445 ReturnInst::Create(mod->getContext(), bb_exit);
448 pos_inc_pre->replaceAllUsesWith(pos_inc);
451 if (verifyFunction(*query_func_ptr)) {
452 LOG(
FATAL) <<
"Generated invalid code. ";
455 return query_func_ptr;
458 template <
class Attributes>
460 const bool hoist_literals,
463 const bool check_scan_limit) {
464 using namespace llvm;
466 auto func_pos_start = pos_start<Attributes>(mod);
467 CHECK(func_pos_start);
468 auto func_pos_step = pos_step<Attributes>(mod);
469 CHECK(func_pos_step);
470 auto func_group_buff_idx = group_buff_idx<Attributes>(mod);
471 CHECK(func_group_buff_idx);
472 auto func_row_process = row_process<Attributes>(mod, 0, hoist_literals);
473 CHECK(func_row_process);
474 auto func_init_shared_mem = query_mem_desc.
sharedMemBytes(device_type)
475 ? mod->getFunction(
"init_shared_mem")
476 : mod->getFunction(
"init_shared_mem_nop");
479 func_init_shared_mem = mod->getFunction(
"init_shared_mem_dynamic");
481 CHECK(func_init_shared_mem);
484 ? mod->getFunction(
"write_back")
485 : mod->getFunction(
"write_back_nop");
488 func_write_back = mod->getFunction(
"write_back_smem_nop");
490 CHECK(func_write_back);
492 auto i32_type = IntegerType::get(mod->getContext(), 32);
493 auto i64_type = IntegerType::get(mod->getContext(), 64);
494 auto pi8_type = PointerType::get(IntegerType::get(mod->getContext(), 8), 0);
495 auto pi32_type = PointerType::get(i32_type, 0);
496 auto pi64_type = PointerType::get(i64_type, 0);
497 auto ppi64_type = PointerType::get(pi64_type, 0);
498 auto ppi8_type = PointerType::get(pi8_type, 0);
500 std::vector<Type*> query_args;
501 query_args.push_back(ppi8_type);
502 if (hoist_literals) {
503 query_args.push_back(pi8_type);
505 query_args.push_back(pi64_type);
506 query_args.push_back(pi64_type);
507 query_args.push_back(pi32_type);
508 query_args.push_back(pi64_type);
510 query_args.push_back(ppi64_type);
511 query_args.push_back(i32_type);
512 query_args.push_back(pi64_type);
513 query_args.push_back(pi32_type);
514 query_args.push_back(pi32_type);
516 FunctionType* query_func_type = FunctionType::get(
517 Type::getVoidTy(mod->getContext()),
521 std::string query_name{
"query_group_by_template"};
522 auto query_func_ptr = mod->getFunction(query_name);
523 CHECK(!query_func_ptr);
525 query_func_ptr = Function::Create(
527 GlobalValue::ExternalLinkage,
528 "query_group_by_template",
531 query_func_ptr->setCallingConv(CallingConv::C);
533 Attributes query_func_pal;
535 SmallVector<Attributes, 4> Attrs;
539 B.addAttribute(Attribute::ReadNone);
540 B.addAttribute(Attribute::NoCapture);
541 PAS = Attributes::get(mod->getContext(), 1U, B);
544 Attrs.push_back(PAS);
547 B.addAttribute(Attribute::ReadOnly);
548 B.addAttribute(Attribute::NoCapture);
549 PAS = Attributes::get(mod->getContext(), 2U, B);
552 Attrs.push_back(PAS);
555 B.addAttribute(Attribute::ReadNone);
556 B.addAttribute(Attribute::NoCapture);
557 PAS = Attributes::get(mod->getContext(), 3U, B);
560 Attrs.push_back(PAS);
563 B.addAttribute(Attribute::ReadOnly);
564 B.addAttribute(Attribute::NoCapture);
565 PAS = Attributes::get(mod->getContext(), 4U, B);
568 Attrs.push_back(PAS);
571 B.addAttribute(Attribute::UWTable);
572 PAS = Attributes::get(mod->getContext(), ~0U, B);
575 Attrs.push_back(PAS);
577 query_func_pal = Attributes::get(mod->getContext(), Attrs);
579 query_func_ptr->setAttributes(query_func_pal);
581 Function::arg_iterator query_arg_it = query_func_ptr->arg_begin();
582 Value* byte_stream = &*query_arg_it;
583 byte_stream->setName(
"byte_stream");
585 if (hoist_literals) {
590 Value* row_count_ptr = &*(++query_arg_it);
591 row_count_ptr->setName(
"row_count_ptr");
592 Value* frag_row_off_ptr = &*(++query_arg_it);
593 frag_row_off_ptr->setName(
"frag_row_off_ptr");
594 Value* max_matched_ptr = &*(++query_arg_it);
595 max_matched_ptr->setName(
"max_matched_ptr");
596 Value* agg_init_val = &*(++query_arg_it);
597 agg_init_val->setName(
"agg_init_val");
598 Value* group_by_buffers = &*(++query_arg_it);
599 group_by_buffers->setName(
"group_by_buffers");
604 Value* total_matched = &*(++query_arg_it);
605 total_matched->setName(
"total_matched");
609 auto bb_entry = BasicBlock::Create(mod->getContext(),
".entry", query_func_ptr, 0);
611 BasicBlock::Create(mod->getContext(),
".loop.preheader", query_func_ptr, 0);
612 auto bb_forbody = BasicBlock::Create(mod->getContext(),
".forbody", query_func_ptr, 0);
614 BasicBlock::Create(mod->getContext(),
"._crit_edge", query_func_ptr, 0);
615 auto bb_exit = BasicBlock::Create(mod->getContext(),
".exit", query_func_ptr, 0);
618 LoadInst* row_count =
new LoadInst(row_count_ptr,
"",
false, bb_entry);
619 row_count->setAlignment(8);
620 row_count->setName(
"row_count");
622 LoadInst*
max_matched =
new LoadInst(max_matched_ptr,
"",
false, bb_entry);
625 auto crt_matched_ptr =
new AllocaInst(i32_type, 0,
"crt_matched", bb_entry);
626 auto old_total_matched_ptr =
new AllocaInst(i32_type, 0,
"old_total_matched", bb_entry);
627 CallInst*
pos_start = CallInst::Create(func_pos_start,
"", bb_entry);
628 pos_start->setCallingConv(CallingConv::C);
630 Attributes pos_start_pal;
633 CallInst*
pos_step = CallInst::Create(func_pos_step,
"", bb_entry);
634 pos_step->setCallingConv(CallingConv::C);
636 Attributes pos_step_pal;
637 pos_step->setAttributes(pos_step_pal);
639 CallInst*
group_buff_idx = CallInst::Create(func_group_buff_idx,
"", bb_entry);
642 Attributes group_buff_idx_pal;
645 CastInst* pos_start_i64 =
new SExtInst(
pos_start, i64_type,
"", bb_entry);
646 const PointerType* Ty = dyn_cast<PointerType>(group_by_buffers->getType());
648 GetElementPtrInst* group_by_buffers_gep = GetElementPtrInst::Create(
649 Ty->getElementType(), group_by_buffers,
group_buff_idx,
"", bb_entry);
650 LoadInst* col_buffer =
new LoadInst(group_by_buffers_gep,
"",
false, bb_entry);
651 col_buffer->setName(
"col_buffer");
652 col_buffer->setAlignment(8);
654 llvm::ConstantInt* shared_mem_num_elements_lv =
nullptr;
655 llvm::ConstantInt* shared_mem_bytes_lv =
nullptr;
656 llvm::CallInst* result_buffer =
nullptr;
659 int32_t num_shared_mem_buckets = query_mem_desc.
getEntryCount() + 1;
660 shared_mem_bytes_lv =
661 ConstantInt::get(i32_type, query_mem_desc.
sharedMemBytes(device_type));
662 shared_mem_num_elements_lv = ConstantInt::get(i32_type, num_shared_mem_buckets);
663 result_buffer = CallInst::Create(
664 func_init_shared_mem,
665 std::vector<llvm::Value*>{col_buffer, shared_mem_num_elements_lv},
669 shared_mem_bytes_lv =
670 ConstantInt::get(i32_type, query_mem_desc.
sharedMemBytes(device_type));
672 CallInst::Create(func_init_shared_mem,
673 std::vector<llvm::Value*>{col_buffer, shared_mem_bytes_lv},
677 result_buffer->setName(
"result_buffer");
679 ICmpInst* enter_or_not =
680 new ICmpInst(*bb_entry, ICmpInst::ICMP_SLT, pos_start_i64, row_count,
"");
681 BranchInst::Create(bb_preheader, bb_exit, enter_or_not, bb_entry);
684 CastInst* pos_step_i64 =
new SExtInst(
pos_step, i64_type,
"", bb_preheader);
685 BranchInst::Create(bb_forbody, bb_preheader);
689 PHINode* pos = PHINode::Create(i64_type, check_scan_limit ? 3 : 2,
"pos", bb_forbody);
691 std::vector<Value*> row_process_params;
692 row_process_params.push_back(result_buffer);
693 row_process_params.push_back(crt_matched_ptr);
694 row_process_params.push_back(total_matched);
695 row_process_params.push_back(old_total_matched_ptr);
696 row_process_params.push_back(max_matched_ptr);
697 row_process_params.push_back(agg_init_val);
698 row_process_params.push_back(pos);
699 row_process_params.push_back(frag_row_off_ptr);
700 row_process_params.push_back(row_count_ptr);
701 if (hoist_literals) {
703 row_process_params.push_back(
literals);
705 if (check_scan_limit) {
706 new StoreInst(ConstantInt::get(IntegerType::get(mod->getContext(), 32), 0),
711 CallInst::Create(func_row_process, row_process_params,
"", bb_forbody);
714 Attributes row_process_pal;
719 auto func_sync_warp_protected = mod->getFunction(
"sync_warp_protected");
720 CHECK(func_sync_warp_protected);
721 CallInst::Create(func_sync_warp_protected,
722 std::vector<llvm::Value*>{pos, row_count},
728 BinaryOperator::Create(Instruction::Add, pos, pos_step_i64,
"", bb_forbody);
729 ICmpInst* loop_or_exit =
730 new ICmpInst(*bb_forbody, ICmpInst::ICMP_SLT, pos_inc, row_count,
"");
731 if (check_scan_limit) {
732 auto crt_matched =
new LoadInst(crt_matched_ptr,
"crt_matched",
false, bb_forbody);
733 auto filter_match = BasicBlock::Create(
734 mod->getContext(),
"filter_match", query_func_ptr, bb_crit_edge);
735 llvm::Value* new_total_matched =
736 new LoadInst(old_total_matched_ptr,
"",
false, filter_match);
738 BinaryOperator::CreateAdd(new_total_matched, crt_matched,
"", filter_match);
739 CHECK(new_total_matched);
740 ICmpInst* limit_not_reached =
new ICmpInst(*filter_match,
744 "limit_not_reached");
748 BinaryOperator::Create(
749 BinaryOperator::And, loop_or_exit, limit_not_reached,
"", filter_match),
751 auto filter_nomatch = BasicBlock::Create(
752 mod->getContext(),
"filter_nomatch", query_func_ptr, bb_crit_edge);
753 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, filter_nomatch);
754 ICmpInst* crt_matched_nz =
new ICmpInst(
755 *bb_forbody, ICmpInst::ICMP_NE, crt_matched, ConstantInt::get(i32_type, 0),
"");
756 BranchInst::Create(filter_match, filter_nomatch, crt_matched_nz, bb_forbody);
757 pos->addIncoming(pos_start_i64, bb_preheader);
758 pos->addIncoming(pos_pre, filter_match);
759 pos->addIncoming(pos_pre, filter_nomatch);
761 pos->addIncoming(pos_start_i64, bb_preheader);
762 pos->addIncoming(pos_pre, bb_forbody);
763 BranchInst::Create(bb_forbody, bb_crit_edge, loop_or_exit, bb_forbody);
767 BranchInst::Create(bb_exit, bb_crit_edge);
772 result_buffer->setName(
"shared_mem_result");
773 col_buffer->setName(
"col_buffer_global");
778 auto func_agg_from_smem_to_gmem =
780 ? mod->getFunction(
"agg_from_smem_to_gmem_count_binId")
781 : mod->getFunction(
"agg_from_smem_to_gmem_binId_count");
782 CHECK(func_agg_from_smem_to_gmem);
784 func_agg_from_smem_to_gmem,
785 std::vector<Value*>{col_buffer, result_buffer, (shared_mem_num_elements_lv)},
790 CallInst::Create(func_write_back,
791 std::vector<Value*>{col_buffer, result_buffer, shared_mem_bytes_lv},
794 ReturnInst::Create(mod->getContext(), bb_exit);
797 pos_pre->replaceAllUsesWith(pos_inc);
800 if (verifyFunction(*query_func_ptr, &llvm::errs())) {
801 LOG(
FATAL) <<
"Generated invalid code. ";
804 return query_func_ptr;
807 #if LLVM_VERSION_MAJOR >= 6
809 const size_t aggr_col_count,
810 const bool hoist_literals,
811 const bool is_estimate_query) {
812 return query_template_impl<llvm::AttributeList>(
813 module, aggr_col_count, hoist_literals, is_estimate_query);
816 const bool hoist_literals,
819 const bool check_scan_limit) {
820 return query_group_by_template_impl<llvm::AttributeList>(
825 const size_t aggr_col_count,
826 const bool hoist_literals,
827 const bool is_estimate_query) {
828 return query_template_impl<llvm::AttributeSet>(
829 module, aggr_col_count, hoist_literals, is_estimate_query);
832 const bool hoist_literals,
835 const bool check_scan_limit) {
836 return query_group_by_template_impl<llvm::AttributeSet>(
llvm::Function * query_template_impl(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t * join_hash_tables
std::unique_ptr< llvm::Module > module(runtime_module_shallow_copy(cgen_state))
size_t getEntryCount() const
llvm::Function * pos_start(llvm::Module *mod)
llvm::Function * query_template(llvm::Module *module, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query)
llvm::Function * group_buff_idx(llvm::Module *mod)
llvm::Function * pos_step(llvm::Module *mod)
llvm::Function * default_func_builder(llvm::Module *mod, const std::string &name)
llvm::Function * query_group_by_template_impl(llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t const int64_t int32_t * error_code
llvm::Function * query_group_by_template(llvm::Module *module, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit)
const int8_t const int64_t const uint64_t const int32_t * max_matched
size_t sharedMemBytes(const ExecutorDeviceType) const
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t uint32_t frag_idx
bool isWarpSyncRequired(const ExecutorDeviceType) const
GroupByMemSharing getGpuMemSharing() const
const int8_t const int64_t const uint64_t const int32_t const int64_t int64_t ** out
llvm::Function * row_process(llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals)
int32_t getTargetIdxForKey() const