_target_expr_builder_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #include "TargetExprBuilder.h"


 #include "CodeGenerator.h"

 #include "Execute.h"

 #include "GroupByAndAggregate.h"

 #include "Logger/Logger.h"

 #include "MaxwellCodegenPatch.h"

 #include "OutputBufferInitialization.h"


 #define LL_CONTEXT executor->cgen_state_->context_

 #define LL_BUILDER executor->cgen_state_->ir_builder_

 #define LL_BOOL(v) executor->ll_bool(v)

 #define LL_INT(v) executor->cgen_state_->llInt(v)

 #define LL_FP(v) executor->cgen_state_->llFp(v)

 #define ROW_FUNC executor->cgen_state_->row_func_


 namespace {


 inline bool is_varlen_projection(const Analyzer::Expr* target_expr,

                                  const SQLTypeInfo& ti) {

   return dynamic_cast<const Analyzer::GeoExpr*>(target_expr) && ti.get_type() == kPOINT;

 }


 std::vector<std::string> agg_fn_base_names(const TargetInfo& target_info,

                                            const bool is_varlen_projection) {

   const auto& chosen_type = get_compact_type(target_info);

   if (is_varlen_projection) {

     // TODO: support other types here

     CHECK(chosen_type.is_geometry());

     return {"agg_id_varlen"};

   }

   if (!target_info.is_agg || target_info.agg_kind == kSAMPLE) {

     if (chosen_type.is_geometry()) {

       return std::vector<std::string>(2 * chosen_type.get_physical_coord_cols(),

                                       "agg_id");

     }

     if (chosen_type.is_varlen()) {

       // not a varlen projection (not creating new varlen outputs). Just store the pointer

       // and offset into the input buffer in the output slots.

       return {"agg_id", "agg_id"};

     }

     return {"agg_id"};

   }

   switch (target_info.agg_kind) {

     case kAVG:

       return {"agg_sum", "agg_count"};

     case kCOUNT:

       return {target_info.is_distinct ? "agg_count_distinct" : "agg_count"};

     case kCOUNT_IF:

       return {"agg_count_if"};

     case kMAX:

       return {"agg_max"};

     case kMIN:

       return {"agg_min"};

     case kSUM:

       return {"agg_sum"};

     case kSUM_IF:

       return {"agg_sum_if"};

     case kAPPROX_COUNT_DISTINCT:

       return {"agg_approximate_count_distinct"};

     case kAPPROX_QUANTILE:

       return {"agg_approx_quantile"};

     case kSINGLE_VALUE:

       return {"checked_single_agg_id"};

     case kSAMPLE:

       return {"agg_id"};

     case kMODE:

       return {"agg_mode_func"};

     default:

       UNREACHABLE() << "Unrecognized agg kind: " << target_info.agg_kind;

   }

   return {};

 }


 inline bool is_columnar_projection(const QueryMemoryDescriptor& query_mem_desc) {

   return (query_mem_desc.getQueryDescriptionType() == QueryDescriptionType::Projection ||

           query_mem_desc.getQueryDescriptionType() ==

               QueryDescriptionType::TableFunction) &&

          query_mem_desc.didOutputColumnar();

 }


 bool is_simple_count(const TargetInfo& target_info) {

   return target_info.is_agg && shared::is_any<kCOUNT>(target_info.agg_kind) &&

          !target_info.is_distinct;

 }


 bool target_has_geo(const TargetInfo& target_info) {

   return target_info.is_agg ? target_info.agg_arg_type.is_geometry()

                             : target_info.sql_type.is_geometry();

 }


 }  // namespace


 void TargetExprCodegen::codegen(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const GpuSharedMemoryContext& gpu_smem_context,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx_in,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     llvm::Value* varlen_output_buffer,

     DiamondCodegen& diamond_codegen,

     DiamondCodegen* sample_cfg) const {

   CHECK(group_by_and_agg);

   CHECK(executor);

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;

   const auto arg_expr = agg_arg(target_expr);

   const bool varlen_projection = is_varlen_projection(target_expr, target_info.sql_type);

   const bool uses_flatbuffer = target_info.sql_type.usesFlatBuffer();

   const auto agg_fn_names = agg_fn_base_names(target_info, varlen_projection);

   const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);

   WindowProjectNodeContext::resetWindowFunctionContext(executor);

   auto target_lvs =

       window_func

           ? std::vector<llvm::Value*>{executor->codegenWindowFunction(target_idx, co)}

           : group_by_and_agg->codegenAggArg(target_expr, co);

   const auto window_row_ptr = window_func

                                   ? group_by_and_agg->codegenWindowRowPointer(

                                         window_func, query_mem_desc, co, diamond_codegen)

                                   : nullptr;

   if (window_row_ptr) {

     agg_out_ptr_w_idx =

         std::make_tuple(window_row_ptr, std::get<1>(agg_out_ptr_w_idx_in));

     if (window_function_is_aggregate(window_func->getKind())) {

       out_row_idx = window_row_ptr;

     }

   }


   llvm::Value* str_target_lv{nullptr};

   if (target_lvs.size() == 3 && !target_has_geo(target_info)) {

     // none encoding string, pop the packed pointer + length since

     // it's only useful for IS NULL checks and assumed to be only

     // two components (pointer and length) for the purpose of projection

     str_target_lv = target_lvs.front();

     target_lvs.erase(target_lvs.begin());

   }

   if (target_info.sql_type.is_geometry() && !varlen_projection) {

     // Geo cols are expanded to the physical coord cols. Each physical coord col is an

     // array. Ensure that the target values generated match the number of agg

     // functions before continuing

     if (target_lvs.size() < agg_fn_names.size()) {

       if (!uses_flatbuffer) {

         CHECK_EQ(target_lvs.size(), agg_fn_names.size() / 2);

       }

       std::vector<llvm::Value*> new_target_lvs;

       new_target_lvs.reserve(agg_fn_names.size());

       for (const auto& target_lv : target_lvs) {

         new_target_lvs.push_back(target_lv);

         new_target_lvs.push_back(target_lv);

       }

       target_lvs = new_target_lvs;

     }

   }

   if (target_lvs.size() < agg_fn_names.size()) {

     if (!uses_flatbuffer) {

       CHECK_EQ(size_t(1), target_lvs.size());

       CHECK_EQ(size_t(2), agg_fn_names.size());

     }

     for (size_t i = 1; i < agg_fn_names.size(); ++i) {

       target_lvs.push_back(target_lvs.front());

     }

   } else {

     if (target_has_geo(target_info)) {

       if (!target_info.is_agg && !varlen_projection) {

         if (!uses_flatbuffer) {

           CHECK_EQ(

               static_cast<size_t>(2 * target_info.sql_type.get_physical_coord_cols()),

               target_lvs.size());

         }

         CHECK_EQ(agg_fn_names.size(), target_lvs.size());

       }

     } else {

       CHECK(str_target_lv || (agg_fn_names.size() == target_lvs.size()));

       CHECK(target_lvs.size() == 1 || target_lvs.size() == 2);

     }

   }


   int32_t slot_index = base_slot_index;

   CHECK_GE(slot_index, 0);

   CHECK(is_group_by || static_cast<size_t>(slot_index) < agg_out_vec.size());


   uint32_t col_off{0};

   if (co.device_type == ExecutorDeviceType::GPU && query_mem_desc.threadsShareMemory() &&

       is_simple_count(target_info) &&

       (!arg_expr || arg_expr->get_type_info().get_notnull())) {

     CHECK_EQ(size_t(1), agg_fn_names.size());

     const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(slot_index);

     llvm::Value* agg_col_ptr{nullptr};

     if (is_group_by) {

       if (query_mem_desc.didOutputColumnar()) {

         col_off = query_mem_desc.getColOffInBytes(slot_index);

         CHECK_EQ(size_t(0), col_off % chosen_bytes);

         col_off /= chosen_bytes;

         CHECK(std::get<1>(agg_out_ptr_w_idx));

         auto offset =

             LL_BUILDER.CreateAdd(std::get<1>(agg_out_ptr_w_idx), LL_INT(col_off));

         auto* bit_cast = LL_BUILDER.CreateBitCast(

             std::get<0>(agg_out_ptr_w_idx),

             llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));

         agg_col_ptr = LL_BUILDER.CreateGEP(

             bit_cast->getType()->getScalarType()->getPointerElementType(),

             bit_cast,

             offset);

       } else {

         col_off = query_mem_desc.getColOnlyOffInBytes(slot_index);

         CHECK_EQ(size_t(0), col_off % chosen_bytes);

         col_off /= chosen_bytes;

         auto* bit_cast = LL_BUILDER.CreateBitCast(

             std::get<0>(agg_out_ptr_w_idx),

             llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));

         agg_col_ptr = LL_BUILDER.CreateGEP(

             bit_cast->getType()->getScalarType()->getPointerElementType(),

             bit_cast,

             LL_INT(col_off));

       }

     }


     if (chosen_bytes != sizeof(int32_t)) {

       CHECK_EQ(8, chosen_bytes);

       if (g_bigint_count) {

         const auto acc_i64 = LL_BUILDER.CreateBitCast(

             is_group_by ? agg_col_ptr : agg_out_vec[slot_index],

             llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));

         if (gpu_smem_context.isSharedMemoryUsed()) {

           group_by_and_agg->emitCall(

               "agg_count_shared", std::vector<llvm::Value*>{acc_i64, LL_INT(int64_t(1))});

         } else {

           LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,

                                      acc_i64,

                                      LL_INT(int64_t(1)),

 #if LLVM_VERSION_MAJOR > 12

                                      LLVM_ALIGN(8),

 #endif

                                      llvm::AtomicOrdering::Monotonic);

         }

       } else {

         auto acc_i32 = LL_BUILDER.CreateBitCast(

             is_group_by ? agg_col_ptr : agg_out_vec[slot_index],

             llvm::PointerType::get(get_int_type(32, LL_CONTEXT), 0));

         if (gpu_smem_context.isSharedMemoryUsed()) {

           acc_i32 = LL_BUILDER.CreatePointerCast(

               acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));

         }

         LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,

                                    acc_i32,

                                    LL_INT(1),

 #if LLVM_VERSION_MAJOR > 12

                                    LLVM_ALIGN(4),

 #endif

                                    llvm::AtomicOrdering::Monotonic);

       }

     } else {

       const auto acc_i32 = (is_group_by ? agg_col_ptr : agg_out_vec[slot_index]);

       if (gpu_smem_context.isSharedMemoryUsed()) {

         // Atomic operation on address space level 3 (Shared):

         const auto shared_acc_i32 = LL_BUILDER.CreatePointerCast(

             acc_i32, llvm::Type::getInt32PtrTy(LL_CONTEXT, 3));

         LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,

                                    shared_acc_i32,

                                    LL_INT(1),

 #if LLVM_VERSION_MAJOR > 12

                                    LLVM_ALIGN(4),

 #endif

                                    llvm::AtomicOrdering::Monotonic);

       } else {

         LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,

                                    acc_i32,

                                    LL_INT(1),

 #if LLVM_VERSION_MAJOR > 12

                                    LLVM_ALIGN(4),

 #endif

                                    llvm::AtomicOrdering::Monotonic);

       }

     }

     return;

   }


   codegenAggregate(group_by_and_agg,

                    executor,

                    query_mem_desc,

                    co,

                    target_lvs,

                    agg_out_ptr_w_idx,

                    agg_out_vec,

                    output_buffer_byte_stream,

                    out_row_idx,

                    varlen_output_buffer,

                    slot_index);

 }


 void TargetExprCodegen::codegenAggregate(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const std::vector<llvm::Value*>& target_lvs,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     llvm::Value* varlen_output_buffer,

     int32_t slot_index) const {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   size_t target_lv_idx = 0;

   const bool lazy_fetched{executor->plan_state_->isLazyFetchColumn(target_expr)};


   CodeGenerator code_generator(executor);


   const auto agg_fn_names = agg_fn_base_names(

       target_info, is_varlen_projection(target_expr, target_info.sql_type));

   auto arg_expr = agg_arg(target_expr);


   for (const auto& agg_base_name : agg_fn_names) {

     if (target_info.is_distinct && arg_expr->get_type_info().is_array()) {

       CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(slot_index)),

                sizeof(int64_t));

       // TODO(miyu): check if buffer may be columnar here

       CHECK(!query_mem_desc.didOutputColumnar());

       const auto& elem_ti = arg_expr->get_type_info().get_elem_type();

       uint32_t col_off{0};

       if (is_group_by) {

         const auto col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(slot_index);

         CHECK_EQ(size_t(0), col_off_in_bytes % sizeof(int64_t));

         col_off /= sizeof(int64_t);

       }

       executor->cgen_state_->emitExternalCall(

           "agg_count_distinct_array_" + numeric_type_name(elem_ti),

           llvm::Type::getVoidTy(LL_CONTEXT),

           {is_group_by ? LL_BUILDER.CreateGEP(std::get<0>(agg_out_ptr_w_idx)

                                                   ->getType()

                                                   ->getScalarType()

                                                   ->getPointerElementType(),

                                               std::get<0>(agg_out_ptr_w_idx),

                                               LL_INT(col_off))

                        : agg_out_vec[slot_index],

            target_lvs[target_lv_idx],

            code_generator.posArg(arg_expr),

            elem_ti.is_fp()

                ? static_cast<llvm::Value*>(executor->cgen_state_->inlineFpNull(elem_ti))

                : static_cast<llvm::Value*>(

                      executor->cgen_state_->inlineIntNull(elem_ti))});

       ++slot_index;

       ++target_lv_idx;

       continue;

     }


     llvm::Value* agg_col_ptr{nullptr};

     const auto chosen_bytes =

         static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index));

     const auto& chosen_type = get_compact_type(target_info);

     const auto& arg_type =

         ((arg_expr && arg_expr->get_type_info().get_type() != kNULLT) &&

          !target_info.is_distinct)

             ? target_info.agg_arg_type

             : target_info.sql_type;

     const bool is_fp_arg =

         !lazy_fetched && arg_type.get_type() != kNULLT && arg_type.is_fp();

     if (is_group_by) {

       agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,

                                                           out_row_idx,

                                                           agg_out_ptr_w_idx,

                                                           query_mem_desc,

                                                           chosen_bytes,

                                                           slot_index,

                                                           target_idx);

       CHECK(agg_col_ptr);

       agg_col_ptr->setName("agg_col_ptr");

     }


     if (is_varlen_projection(target_expr, target_info.sql_type)) {

       CHECK(!query_mem_desc.didOutputColumnar());


       CHECK_EQ(target_info.sql_type.get_type(), kPOINT);

       CHECK_LT(target_lv_idx, target_lvs.size());

       CHECK(varlen_output_buffer);

       auto target_lv = target_lvs[target_lv_idx];


       std::string agg_fname_suffix = "";

       if (co.device_type == ExecutorDeviceType::GPU &&

           query_mem_desc.threadsShareMemory()) {

         agg_fname_suffix += "_shared";

       }


       // first write the varlen data into the varlen buffer and get the pointer location

       // into the varlen buffer

       auto& builder = executor->cgen_state_->ir_builder_;

       auto orig_bb = builder.GetInsertBlock();

       auto target_ptr_type = llvm::dyn_cast<llvm::PointerType>(target_lv->getType());

       CHECK(target_ptr_type) << "Varlen projections expect a pointer input.";

       auto is_nullptr =

           builder.CreateICmp(llvm::CmpInst::ICMP_EQ,

                              target_lv,

                              llvm::ConstantPointerNull::get(llvm::PointerType::get(

                                  target_ptr_type->getPointerElementType(), 0)));

       llvm::BasicBlock* true_bb{nullptr};

       {

         DiamondCodegen nullcheck_diamond(

             is_nullptr, executor, false, "varlen_null_check", nullptr, false);

         // maintain a reference to the true bb, overriding the diamond codegen destructor

         true_bb = nullcheck_diamond.cond_true_;

         // if not null, process the pointer and insert it into the varlen buffer

         builder.SetInsertPoint(nullcheck_diamond.cond_false_);

         auto arr_ptr_lv = executor->cgen_state_->ir_builder_.CreateBitCast(

             target_lv,

             llvm::PointerType::get(get_int_type(8, executor->cgen_state_->context_), 0));

         const int64_t chosen_bytes =

             target_info.sql_type.get_compression() == kENCODING_GEOINT ? 8 : 16;

         auto* arg = get_arg_by_name(ROW_FUNC, "old_total_matched");

         const auto output_buffer_slot = LL_BUILDER.CreateZExt(

             LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg),

             llvm::Type::getInt64Ty(LL_CONTEXT));

         const auto varlen_buffer_row_sz = query_mem_desc.varlenOutputBufferElemSize();

         CHECK(varlen_buffer_row_sz);

         const auto output_buffer_slot_bytes = LL_BUILDER.CreateAdd(

             LL_BUILDER.CreateMul(output_buffer_slot,

                                  executor->cgen_state_->llInt(

                                      static_cast<int64_t>(*varlen_buffer_row_sz))),

             executor->cgen_state_->llInt(static_cast<int64_t>(

                 query_mem_desc.varlenOutputRowSizeToSlot(slot_index))));


         std::vector<llvm::Value*> varlen_agg_args{

             executor->castToIntPtrTyIn(varlen_output_buffer, 8),

             output_buffer_slot_bytes,

             arr_ptr_lv,

             executor->cgen_state_->llInt(chosen_bytes)};

         auto varlen_offset_ptr =

             group_by_and_agg->emitCall(agg_base_name + agg_fname_suffix, varlen_agg_args);


         // then write that pointer location into the 64 bit slot in the output buffer

         auto varlen_offset_int = LL_BUILDER.CreatePtrToInt(

             varlen_offset_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));

         builder.CreateBr(nullcheck_diamond.cond_true_);


         // use the true block to do the output buffer insertion regardless of nullness

         builder.SetInsertPoint(nullcheck_diamond.cond_true_);

         auto output_phi =

             builder.CreatePHI(llvm::Type::getInt64Ty(executor->cgen_state_->context_), 2);

         output_phi->addIncoming(varlen_offset_int, nullcheck_diamond.cond_false_);

         output_phi->addIncoming(executor->cgen_state_->llInt(static_cast<int64_t>(0)),

                                 orig_bb);


         std::vector<llvm::Value*> agg_args{agg_col_ptr, output_phi};

         group_by_and_agg->emitCall("agg_id" + agg_fname_suffix, agg_args);

       }

       CHECK(true_bb);

       builder.SetInsertPoint(true_bb);


       ++slot_index;

       ++target_lv_idx;

       continue;

     }


     const bool float_argument_input = takes_float_argument(target_info);

     const bool is_count_in_avg = target_info.agg_kind == kAVG && target_lv_idx == 1;

     // The count component of an average should never be compacted.

     const auto agg_chosen_bytes =

         float_argument_input && !is_count_in_avg ? sizeof(float) : chosen_bytes;

     if (float_argument_input) {

       CHECK_GE(chosen_bytes, sizeof(float));

     }


     auto target_lv = target_lvs[target_lv_idx];

     const auto needs_unnest_double_patch = group_by_and_agg->needsUnnestDoublePatch(

         target_lv, agg_base_name, query_mem_desc.threadsShareMemory(), co);

     const auto need_skip_null = !needs_unnest_double_patch && target_info.skip_null_val;

     if (!needs_unnest_double_patch) {

       if (need_skip_null && !is_agg_domain_range_equivalent(target_info.agg_kind)) {

         target_lv = group_by_and_agg->convertNullIfAny(arg_type, target_info, target_lv);

       } else if (is_fp_arg) {

         target_lv = executor->castToFP(target_lv, arg_type, target_info.sql_type);

       }

       if (!dynamic_cast<const Analyzer::AggExpr*>(target_expr) || arg_expr) {

         target_lv =

             executor->cgen_state_->castToTypeIn(target_lv, (agg_chosen_bytes << 3));

       }

     }


     const bool is_simple_count_target = is_simple_count(target_info);

     llvm::Value* str_target_lv{nullptr};

     if (target_lvs.size() == 3 && !target_has_geo(target_info)) {

       // none encoding string

       str_target_lv = target_lvs.front();

     }

     std::vector<llvm::Value*> agg_args{

         executor->castToIntPtrTyIn((is_group_by ? agg_col_ptr : agg_out_vec[slot_index]),

                                    (agg_chosen_bytes << 3)),

         (is_simple_count_target && !arg_expr)

             ? (agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0))

                                                    : LL_INT(int64_t(0)))

             : (is_simple_count_target && arg_expr && str_target_lv ? str_target_lv

                                                                    : target_lv)};

     if (query_mem_desc.isLogicalSizedColumnsAllowed()) {

       if (is_simple_count_target && arg_expr && str_target_lv) {

         agg_args[1] =

             agg_chosen_bytes == sizeof(int32_t) ? LL_INT(int32_t(0)) : LL_INT(int64_t(0));

       }

     }

     std::string agg_fname{agg_base_name};

     if (is_fp_arg) {

       if (!lazy_fetched) {

         if (agg_chosen_bytes == sizeof(float)) {

           CHECK_EQ(arg_type.get_type(), kFLOAT);

           agg_fname += "_float";

         } else {

           CHECK_EQ(agg_chosen_bytes, sizeof(double));

           agg_fname += "_double";

         }

       }

     } else if (agg_chosen_bytes == sizeof(int32_t)) {

       agg_fname += "_int32";

     } else if (agg_chosen_bytes == sizeof(int16_t) &&

                query_mem_desc.didOutputColumnar()) {

       agg_fname += "_int16";

     } else if (agg_chosen_bytes == sizeof(int8_t) && query_mem_desc.didOutputColumnar()) {

       agg_fname += "_int8";

     }


     if (is_distinct_target(target_info)) {

       CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));

       CHECK(!chosen_type.is_fp());

       group_by_and_agg->codegenCountDistinct(

           target_idx, target_expr, agg_args, query_mem_desc, co.device_type);

     } else if (target_info.agg_kind == kAPPROX_QUANTILE) {

       CHECK_EQ(agg_chosen_bytes, sizeof(int64_t));

       group_by_and_agg->codegenApproxQuantile(

           target_idx, target_expr, agg_args, query_mem_desc, co.device_type);

     } else if (target_info.agg_kind == kMODE) {

       group_by_and_agg->codegenMode(

           target_idx, target_expr, agg_args, query_mem_desc, co.device_type);

     } else {

       const auto& arg_ti = target_info.agg_arg_type;

       if (need_skip_null && !arg_ti.is_geometry()) {

         agg_fname += "_skip_val";

       }


       if (target_info.agg_kind == kSINGLE_VALUE ||

           (need_skip_null && !arg_ti.is_geometry())) {

         llvm::Value* null_in_lv{nullptr};

         if (arg_ti.is_fp()) {

           null_in_lv = executor->cgen_state_->inlineFpNull(arg_ti);

         } else {

           null_in_lv = executor->cgen_state_->inlineIntNull(

               is_agg_domain_range_equivalent(target_info.agg_kind)

                   ? arg_ti

                   : target_info.sql_type);

         }

         CHECK(null_in_lv);

         auto null_lv =

             executor->cgen_state_->castToTypeIn(null_in_lv, (agg_chosen_bytes << 3));

         agg_args.push_back(null_lv);

       }

       if (target_info.agg_kind == kSUM_IF) {

         const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);

         auto cond_expr_lv =

             code_generator.codegen(agg_expr->get_arg1().get(), true, co).front();

         auto cond_lv = executor->codegenConditionalAggregateCondValSelector(

             cond_expr_lv, kSUM_IF, co);

         agg_args.push_back(cond_lv);

       }

       if (!target_info.is_distinct) {

         if (co.device_type == ExecutorDeviceType::GPU &&

             query_mem_desc.threadsShareMemory()) {

           agg_fname += "_shared";

           if (needs_unnest_double_patch) {

             agg_fname = patch_agg_fname(agg_fname);

           }

         }

         auto agg_fname_call_ret_lv = group_by_and_agg->emitCall(agg_fname, agg_args);


         if (agg_fname.find("checked") != std::string::npos) {

           group_by_and_agg->checkErrorCode(agg_fname_call_ret_lv);

         }

       }

     }

     const auto window_func = dynamic_cast<const Analyzer::WindowFunction*>(target_expr);

     // window function with framing has a different code path and codegen logic

     if (window_func && !window_func->hasFraming() &&

         window_function_requires_peer_handling(window_func)) {

       const auto window_func_context =

           WindowProjectNodeContext::getActiveWindowFunctionContext(executor);

       const auto pending_outputs =

           LL_INT(window_func_context->aggregateStatePendingOutputs());

       executor->cgen_state_->emitExternalCall("add_window_pending_output",

                                               llvm::Type::getVoidTy(LL_CONTEXT),

                                               {agg_args.front(), pending_outputs});

       const auto& window_func_ti = window_func->get_type_info();

       std::string apply_window_pending_outputs_name = "apply_window_pending_outputs";

       switch (window_func_ti.get_type()) {

         case kFLOAT: {

           apply_window_pending_outputs_name += "_float";

           if (query_mem_desc.didOutputColumnar()) {

             apply_window_pending_outputs_name += "_columnar";

           }

           break;

         }

         case kDOUBLE: {

           apply_window_pending_outputs_name += "_double";

           break;

         }

         default: {

           apply_window_pending_outputs_name += "_int";

           if (query_mem_desc.didOutputColumnar()) {

             apply_window_pending_outputs_name +=

                 std::to_string(window_func_ti.get_size() * 8);

           } else {

             apply_window_pending_outputs_name += "64";

           }

           break;

         }

       }

       const auto partition_end =

           LL_INT(reinterpret_cast<int64_t>(window_func_context->partitionEnd()));

       executor->cgen_state_->emitExternalCall(apply_window_pending_outputs_name,

                                               llvm::Type::getVoidTy(LL_CONTEXT),

                                               {pending_outputs,

                                                target_lvs.front(),

                                                partition_end,

                                                code_generator.posArg(nullptr)});

     }


     ++slot_index;

     ++target_lv_idx;

   }

 }


 void TargetExprCodegenBuilder::operator()(const Analyzer::Expr* target_expr,

                                           const Executor* executor,

                                           QueryMemoryDescriptor& query_mem_desc,

                                           const CompilationOptions& co) {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   if (query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter) == 0) {

     CHECK(!dynamic_cast<const Analyzer::AggExpr*>(target_expr));

     ++slot_index_counter;

     ++target_index_counter;

     return;

   }

   if (dynamic_cast<const Analyzer::UOper*>(target_expr) &&

       static_cast<const Analyzer::UOper*>(target_expr)->get_optype() == kUNNEST) {

     throw std::runtime_error("UNNEST not supported in the projection list yet.");

   }

   if ((executor->plan_state_->isLazyFetchColumn(target_expr) || !is_group_by) &&

       (static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter)) <

        sizeof(int64_t)) &&

       !is_columnar_projection(query_mem_desc)) {

     // TODO(miyu): enable different byte width in the layout w/o padding

     VLOG(2) << "Throw CompilationRetryNoCompaction exception";

     throw CompilationRetryNoCompaction();

   }


   if (is_columnar_projection(query_mem_desc) &&

       executor->plan_state_->isLazyFetchColumn(target_expr)) {

     // For columnar outputs, we need to pad lazy fetched columns to 8 bytes to allow the

     // lazy fetch index to be placed in the column. The QueryMemoryDescriptor is created

     // before Lazy Fetch information is known, therefore we need to update the QMD with

     // the new slot size width bytes for these columns.

     VLOG(2) << "Set padded slot-width byte for the slot-"

             << std::to_string(slot_index_counter) << " to 8";

     query_mem_desc.setPaddedSlotWidthBytes(slot_index_counter, int8_t(8));

     CHECK_EQ(query_mem_desc.getPaddedSlotWidthBytes(slot_index_counter), int8_t(8));

   }


   auto target_info = get_target_info(target_expr, g_bigint_count);

   auto arg_expr = agg_arg(target_expr);

   if (arg_expr) {

     if (target_info.agg_kind == kSINGLE_VALUE || target_info.agg_kind == kSAMPLE ||

         target_info.agg_kind == kAPPROX_QUANTILE) {

       target_info.skip_null_val = false;

     } else if (query_mem_desc.getQueryDescriptionType() ==

                    QueryDescriptionType::NonGroupedAggregate &&

                !arg_expr->get_type_info().is_varlen()) {

       // TODO: COUNT is currently not null-aware for varlen types. Need to add proper code

       // generation for handling varlen nulls.

       target_info.skip_null_val = true;

     } else if (constrained_not_null(arg_expr, ra_exe_unit.quals)) {

       target_info.skip_null_val = false;

     }

   }


   if (!(query_mem_desc.getQueryDescriptionType() ==

         QueryDescriptionType::NonGroupedAggregate) &&

       (co.device_type == ExecutorDeviceType::GPU) && target_info.is_agg &&

       (target_info.agg_kind == kSAMPLE)) {

     sample_exprs_to_codegen.emplace_back(target_expr,

                                          target_info,

                                          slot_index_counter,

                                          target_index_counter++,

                                          is_group_by);

   } else {

     target_exprs_to_codegen.emplace_back(target_expr,

                                          target_info,

                                          slot_index_counter,

                                          target_index_counter++,

                                          is_group_by);

   }


   const auto agg_fn_names = agg_fn_base_names(

       target_info, is_varlen_projection(target_expr, target_info.sql_type));

   slot_index_counter += agg_fn_names.size();

 }


 namespace {


 inline int64_t get_initial_agg_val(const TargetInfo& target_info,

                                    const QueryMemoryDescriptor& query_mem_desc) {

   const bool is_group_by{query_mem_desc.isGroupBy()};

   if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_string() &&

       target_info.sql_type.get_compression() != kENCODING_NONE) {

     return get_agg_initial_val(target_info.agg_kind,

                                target_info.sql_type,

                                is_group_by,

                                query_mem_desc.getCompactByteWidth());

   }

   return 0;

 }


 }  // namespace


 void TargetExprCodegenBuilder::codegen(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const GpuSharedMemoryContext& gpu_smem_context,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     llvm::Value* varlen_output_buffer,

     DiamondCodegen& diamond_codegen) const {

   CHECK(group_by_and_agg);

   CHECK(executor);

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());


   // check the target_exprs and find a set of exprs need non-lazy fetch before entering

   // the expr compilation to avoid a crash during the codegen due to a wrong

   // classification of expr fetch type (lazy vs. non-lazy), and also we can avoid

   // unnecessary query recompilation due to `CompilationRetryNoLazyFetch` exception

   executor->plan_state_->registerNonLazyFetchExpression(target_exprs_to_codegen);


   for (const auto& target_expr_codegen : target_exprs_to_codegen) {

     target_expr_codegen.codegen(group_by_and_agg,

                                 executor,

                                 query_mem_desc,

                                 co,

                                 gpu_smem_context,

                                 agg_out_ptr_w_idx,

                                 agg_out_vec,

                                 output_buffer_byte_stream,

                                 out_row_idx,

                                 varlen_output_buffer,

                                 diamond_codegen);

   }

   if (!sample_exprs_to_codegen.empty()) {

     codegenSampleExpressions(group_by_and_agg,

                              executor,

                              query_mem_desc,

                              co,

                              agg_out_ptr_w_idx,

                              agg_out_vec,

                              output_buffer_byte_stream,

                              out_row_idx,

                              diamond_codegen);

   }

 }


 void TargetExprCodegenBuilder::codegenSampleExpressions(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     DiamondCodegen& diamond_codegen) const {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   CHECK(!sample_exprs_to_codegen.empty());

   CHECK(co.device_type == ExecutorDeviceType::GPU);

   if (sample_exprs_to_codegen.size() == 1 &&

       !sample_exprs_to_codegen.front().target_info.sql_type.is_varlen()) {

     codegenSingleSlotSampleExpression(group_by_and_agg,

                                       executor,

                                       query_mem_desc,

                                       co,

                                       agg_out_ptr_w_idx,

                                       agg_out_vec,

                                       output_buffer_byte_stream,

                                       out_row_idx,

                                       diamond_codegen);

   } else {

     codegenMultiSlotSampleExpressions(group_by_and_agg,

                                       executor,

                                       query_mem_desc,

                                       co,

                                       agg_out_ptr_w_idx,

                                       agg_out_vec,

                                       output_buffer_byte_stream,

                                       out_row_idx,

                                       diamond_codegen);

   }

 }


 void TargetExprCodegenBuilder::codegenSingleSlotSampleExpression(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     DiamondCodegen& diamond_codegen) const {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   CHECK_EQ(size_t(1), sample_exprs_to_codegen.size());

   CHECK(!sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());

   CHECK(co.device_type == ExecutorDeviceType::GPU);

   // no need for the atomic if we only have one SAMPLE target

   sample_exprs_to_codegen.front().codegen(group_by_and_agg,

                                           executor,

                                           query_mem_desc,

                                           co,

                                           {},

                                           agg_out_ptr_w_idx,

                                           agg_out_vec,

                                           output_buffer_byte_stream,

                                           out_row_idx,

                                           /*varlen_output_buffer=*/nullptr,

                                           diamond_codegen);

 }


 void TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(

     GroupByAndAggregate* group_by_and_agg,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const CompilationOptions& co,

     const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,

     const std::vector<llvm::Value*>& agg_out_vec,

     llvm::Value* output_buffer_byte_stream,

     llvm::Value* out_row_idx,

     DiamondCodegen& diamond_codegen) const {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   CHECK(sample_exprs_to_codegen.size() > 1 ||

         sample_exprs_to_codegen.front().target_info.sql_type.is_varlen());

   CHECK(co.device_type == ExecutorDeviceType::GPU);

   const auto& first_sample_expr = sample_exprs_to_codegen.front();

   auto target_lvs = group_by_and_agg->codegenAggArg(first_sample_expr.target_expr, co);

   CHECK_GE(target_lvs.size(), size_t(1));


   const auto init_val =

       get_initial_agg_val(first_sample_expr.target_info, query_mem_desc);


   llvm::Value* agg_col_ptr{nullptr};

   if (is_group_by) {

     const auto agg_column_size_bytes =

         query_mem_desc.isLogicalSizedColumnsAllowed() &&

                 !first_sample_expr.target_info.sql_type.is_varlen()

             ? first_sample_expr.target_info.sql_type.get_size()

             : sizeof(int64_t);

     agg_col_ptr = group_by_and_agg->codegenAggColumnPtr(output_buffer_byte_stream,

                                                         out_row_idx,

                                                         agg_out_ptr_w_idx,

                                                         query_mem_desc,

                                                         agg_column_size_bytes,

                                                         first_sample_expr.base_slot_index,

                                                         first_sample_expr.target_idx);

   } else {

     CHECK_LT(static_cast<size_t>(first_sample_expr.base_slot_index), agg_out_vec.size());

     agg_col_ptr =

         executor->castToIntPtrTyIn(agg_out_vec[first_sample_expr.base_slot_index], 64);

   }


   auto sample_cas_lv =

       codegenSlotEmptyKey(agg_col_ptr, target_lvs, executor, query_mem_desc, init_val);


   DiamondCodegen sample_cfg(

       sample_cas_lv, executor, false, "sample_valcheck", &diamond_codegen, false);


   for (const auto& target_expr_codegen : sample_exprs_to_codegen) {

     target_expr_codegen.codegen(group_by_and_agg,

                                 executor,

                                 query_mem_desc,

                                 co,

                                 {},

                                 agg_out_ptr_w_idx,

                                 agg_out_vec,

                                 output_buffer_byte_stream,

                                 out_row_idx,

                                 /*varlen_output_buffer=*/nullptr,

                                 diamond_codegen,

                                 &sample_cfg);

   }

 }


 llvm::Value* TargetExprCodegenBuilder::codegenSlotEmptyKey(

     llvm::Value* agg_col_ptr,

     std::vector<llvm::Value*>& target_lvs,

     Executor* executor,

     const QueryMemoryDescriptor& query_mem_desc,

     const int64_t init_val) const {

   AUTOMATIC_IR_METADATA(executor->cgen_state_.get());

   const auto& first_sample_expr = sample_exprs_to_codegen.front();

   const auto first_sample_slot_bytes =

       first_sample_expr.target_info.sql_type.is_varlen()

           ? sizeof(int64_t)

           : first_sample_expr.target_info.sql_type.get_size();

   llvm::Value* target_lv_casted{nullptr};

   // deciding whether proper casting is required for the first sample's slot:

   if (first_sample_expr.target_info.sql_type.is_varlen()) {

     target_lv_casted =

         LL_BUILDER.CreatePtrToInt(target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));

   } else if (first_sample_expr.target_info.sql_type.is_fp()) {

     // Initialization value for SAMPLE on a float column should be 0

     CHECK_EQ(init_val, 0);

     if (query_mem_desc.isLogicalSizedColumnsAllowed()) {

       target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(

           target_lvs.front(),

           first_sample_slot_bytes == sizeof(float) ? llvm::Type::getInt32Ty(LL_CONTEXT)

                                                    : llvm::Type::getInt64Ty(LL_CONTEXT));

     } else {

       target_lv_casted = executor->cgen_state_->ir_builder_.CreateFPToSI(

           target_lvs.front(), llvm::Type::getInt64Ty(LL_CONTEXT));

     }

   } else if (first_sample_slot_bytes != sizeof(int64_t) &&

              !query_mem_desc.isLogicalSizedColumnsAllowed()) {

     target_lv_casted =

         executor->cgen_state_->ir_builder_.CreateCast(llvm::Instruction::CastOps::SExt,

                                                       target_lvs.front(),

                                                       llvm::Type::getInt64Ty(LL_CONTEXT));

   } else {

     target_lv_casted = target_lvs.front();

   }


   std::string slot_empty_cas_func_name("slotEmptyKeyCAS");

   llvm::Value* init_val_lv{LL_INT(init_val)};

   if (query_mem_desc.isLogicalSizedColumnsAllowed() &&

       !first_sample_expr.target_info.sql_type.is_varlen()) {

     // add proper suffix to the function name:

     switch (first_sample_slot_bytes) {

       case 1:

         slot_empty_cas_func_name += "_int8";

         break;

       case 2:

         slot_empty_cas_func_name += "_int16";

         break;

       case 4:

         slot_empty_cas_func_name += "_int32";

         break;

       case 8:

         break;

       default:

         UNREACHABLE() << "Invalid slot size for slotEmptyKeyCAS function.";

         break;

     }

     if (first_sample_slot_bytes != sizeof(int64_t)) {

       init_val_lv = llvm::ConstantInt::get(

           get_int_type(first_sample_slot_bytes * 8, LL_CONTEXT), init_val);

     }

   }


   auto sample_cas_lv = executor->cgen_state_->emitExternalCall(

       slot_empty_cas_func_name,

       llvm::Type::getInt1Ty(executor->cgen_state_->context_),

       {agg_col_ptr, target_lv_casted, init_val_lv});

   return sample_cas_lv;

 }


 std::ostream& operator<<(std::ostream& os, const TargetExprCodegen& target_expr_codegen) {

   os << "(target_expr: " << target_expr_codegen.target_expr->toString()

      << ", target_info: " << target_expr_codegen.target_info.toString()

      << ", base_slot_index: " << target_expr_codegen.base_slot_index

      << ", target_idx:" << target_expr_codegen.target_idx

      << ", is_group_by: " << target_expr_codegen.is_group_by << ")";

   return os;

 }

QueryMemoryDescriptor::isGroupBy
bool isGroupBy() const
Definition: QueryMemoryDescriptor.h:200

QueryMemoryDescriptor::varlenOutputRowSizeToSlot
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
Definition: QueryMemoryDescriptor.cpp:1355

LL_BUILDER
#define LL_BUILDER
Definition: TargetExprBuilder.cpp:33

CompilationOptions
Definition: CompilationOptions.h:31

agg_arg
const Analyzer::Expr * agg_arg(const Analyzer::Expr *expr)
Definition: OutputBufferInitialization.cpp:296

Analyzer::WindowFunction
Definition: Analyzer.h:2860

CHECK_EQ
#define CHECK_EQ(x, y)
Definition: Logger.h:301

anonymous_namespace{TargetExprBuilder.cpp}::target_has_geo
bool target_has_geo(const TargetInfo &target_info)
Definition: TargetExprBuilder.cpp:109

constrained_not_null
bool constrained_not_null(const Analyzer::Expr *expr, const std::list< std::shared_ptr< Analyzer::Expr >> &quals)
Definition: OutputBufferInitialization.cpp:301

kENCODING_NONE
Definition: sqltypes.h:241

DiamondCodegen::cond_false_
llvm::BasicBlock * cond_false_
Definition: DiamondCodegen.h:43

GroupByAndAggregate::codegenAggColumnPtr
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
Definition: GroupByAndAggregate.cpp:1744

MaxwellCodegenPatch.h

kAPPROX_QUANTILE
Definition: sqldefs.h:83

kENCODING_GEOINT
Definition: sqltypes.h:247

kSINGLE_VALUE
Definition: sqldefs.h:85

CodeGenerator
Definition: CodeGenerator.h:29

Analyzer::GeoExpr
Definition: Analyzer.h:3216

heavyai::NonGroupedAggregate
NonGroupedAggregate
Definition: enums.h:58

anonymous_namespace{TargetExprBuilder.cpp}::agg_fn_base_names
std::vector< std::string > agg_fn_base_names(const TargetInfo &target_info, const bool is_varlen_projection)
Definition: TargetExprBuilder.cpp:46

QueryMemoryDescriptor::isLogicalSizedColumnsAllowed
bool isLogicalSizedColumnsAllowed() const
Definition: QueryMemoryDescriptor.cpp:1116

GroupByAndAggregate::codegenMode
void codegenMode(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: GroupByAndAggregate.cpp:1992

OutputBufferInitialization.h

TargetInfo::sql_type
SQLTypeInfo sql_type
Definition: TargetInfo.h:52

TargetInfo
Definition: TargetInfo.h:49

TargetExprCodegen::codegen
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen, DiamondCodegen *sample_cfg=nullptr) const
Definition: TargetExprBuilder.cpp:116

QueryMemoryDescriptor
Definition: QueryMemoryDescriptor.h:68

Catalog_Namespace::operator<<
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57

Analyzer::Expr
Definition: Analyzer.h:68

TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions
void codegenMultiSlotSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
Definition: TargetExprBuilder.cpp:857

CodeGenerator::posArg
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:590

is_agg_domain_range_equivalent
bool is_agg_domain_range_equivalent(const SQLAgg agg_kind)
Definition: TargetInfo.h:83

UNREACHABLE
#define UNREACHABLE()
Definition: Logger.h:338

kFLOAT
Definition: sqltypes.h:74

CHECK_GE
#define CHECK_GE(x, y)
Definition: Logger.h:306

GroupByAndAggregate::emitCall
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: GroupByAndAggregate.cpp:2228

get_agg_initial_val
int64_t get_agg_initial_val(const SQLAgg agg, const SQLTypeInfo &ti, const bool enable_compaction, const unsigned min_byte_width_to_compact)
Definition: OutputBufferInitialization.cpp:126

heavyai::Projection
Projection
Definition: enums.h:58

GroupByAndAggregate::codegenApproxQuantile
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
Definition: GroupByAndAggregate.cpp:1951

GroupByAndAggregate::checkErrorCode
void checkErrorCode(llvm::Value *retCode)
Definition: GroupByAndAggregate.cpp:2234

takes_float_argument
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:106

LLVM_ALIGN
#define LLVM_ALIGN(alignment)
Definition: IRCodegenUtils.h:31

SQLTypeInfo::get_type
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391

GroupByAndAggregate.h

GpuSharedMemoryContext::isSharedMemoryUsed
bool isSharedMemoryUsed() const
Definition: GpuSharedMemoryContext.h:28

GroupByAndAggregate::needsUnnestDoublePatch
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
Definition: MaxwellCodegenPatch.cpp:29

TargetInfo::skip_null_val
bool skip_null_val
Definition: TargetInfo.h:54

DiamondCodegen::cond_true_
llvm::BasicBlock * cond_true_
Definition: DiamondCodegen.h:42

get_int_type
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
Definition: IRCodegenUtils.h:83

Analyzer::AggExpr
Definition: Analyzer.h:1310

WindowProjectNodeContext::getActiveWindowFunctionContext
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
Definition: WindowContext.cpp:1851

get_target_info
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92

to_string
std::string to_string(char const *&&v)
Definition: StringTransform.cpp:128

TargetInfo::agg_arg_type
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:53

patch_agg_fname
std::string patch_agg_fname(const std::string &agg_name)
Definition: MaxwellCodegenPatch.h:29

GpuSharedMemoryContext
Definition: GpuSharedMemoryContext.h:20

TargetExprBuilder.h
Helpers for codegen of target expressions.

QueryMemoryDescriptor::getColOnlyOffInBytes
size_t getColOnlyOffInBytes(const size_t col_idx) const
Definition: QueryMemoryDescriptor.cpp:892

ExecutorDeviceType::GPU

kMIN
Definition: sqldefs.h:78

get_compact_type
const SQLTypeInfo get_compact_type(const TargetInfo &target)
Definition: SqlTypesLayout.h:37

heavyai::TableFunction
TableFunction
Definition: enums.h:58

anonymous_namespace{TargetExprBuilder.cpp}::is_varlen_projection
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
Definition: TargetExprBuilder.cpp:41

kDOUBLE
Definition: sqltypes.h:75

TargetInfo::is_agg
bool is_agg
Definition: TargetInfo.h:50

QueryMemoryDescriptor::getCompactByteWidth
size_t getCompactByteWidth() const
Definition: QueryMemoryDescriptor.cpp:853

get_arg_by_name
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168

TargetExprCodegenBuilder::operator()
void operator()(const Analyzer::Expr *target_expr, const Executor *executor, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co)
Definition: TargetExprBuilder.cpp:652

TargetInfo::toString
std::string toString() const
Definition: TargetInfo.h:59

kSUM_IF
Definition: sqldefs.h:88

LL_INT
#define LL_INT(v)
Definition: TargetExprBuilder.cpp:35

GroupByAndAggregate::convertNullIfAny
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
Definition: GroupByAndAggregate.cpp:1594

TargetExprCodegenBuilder::codegenSampleExpressions
void codegenSampleExpressions(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
Definition: TargetExprBuilder.cpp:792

Logger.h

g_bigint_count
bool g_bigint_count
Definition: GroupByAndAggregate.cpp:54

kSUM
Definition: sqldefs.h:80

TargetExprCodegenBuilder::codegen
void codegen(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, DiamondCodegen &diamond_codegen) const
Definition: TargetExprBuilder.cpp:744

LL_CONTEXT
#define LL_CONTEXT
Definition: TargetExprBuilder.cpp:32

is_distinct_target
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:102

GroupByAndAggregate::codegenCountDistinct
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
Definition: GroupByAndAggregate.cpp:1881

SQLTypeInfo::usesFlatBuffer
bool usesFlatBuffer() const
Definition: sqltypes.h:1083

QueryMemoryDescriptor::getPaddedSlotWidthBytes
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Definition: QueryMemoryDescriptor.cpp:1189

AUTOMATIC_IR_METADATA
#define AUTOMATIC_IR_METADATA(CGENSTATE)
Definition: AutomaticIRMetadataGuard.h:205

TargetInfo::agg_kind
SQLAgg agg_kind
Definition: TargetInfo.h:51

QueryMemoryDescriptor::getQueryDescriptionType
QueryDescriptionType getQueryDescriptionType() const
Definition: QueryMemoryDescriptor.h:173

CompilationOptions::device_type
ExecutorDeviceType device_type
Definition: CompilationOptions.h:32

QueryMemoryDescriptor::varlenOutputBufferElemSize
std::optional< size_t > varlenOutputBufferElemSize() const
Definition: QueryMemoryDescriptor.cpp:1339

CodeGenerator::codegen
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30

window_function_is_aggregate
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:61

CHECK_LT
#define CHECK_LT(x, y)
Definition: Logger.h:303

TargetExprCodegenBuilder::codegenSlotEmptyKey
llvm::Value * codegenSlotEmptyKey(llvm::Value *agg_col_ptr, std::vector< llvm::Value * > &target_lvs, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const int64_t init_val) const
Definition: TargetExprBuilder.cpp:920

Execute.h

Analyzer::Expr::toString
virtual std::string toString() const =0

SQLTypeInfo::get_compression
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399

TargetExprCodegen::target_expr
const Analyzer::Expr * target_expr
Definition: TargetExprBuilder.h:73

TargetExprCodegen::is_group_by
bool is_group_by
Definition: TargetExprBuilder.h:78

kCOUNT_IF
Definition: sqldefs.h:87

TargetExprCodegen::target_idx
size_t target_idx
Definition: TargetExprBuilder.h:77

kCOUNT
Definition: sqldefs.h:81

kSAMPLE
Definition: sqldefs.h:84

GroupByAndAggregate::codegenAggArg
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
Definition: GroupByAndAggregate.cpp:2044

GroupByAndAggregate::codegenWindowRowPointer
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
Definition: GroupByAndAggregate.cpp:1645

kUNNEST
Definition: sqldefs.h:53

window_function_requires_peer_handling
bool window_function_requires_peer_handling(const Analyzer::WindowFunction *window_func)
Definition: WindowContext.cpp:531

shared::bit_cast
TO bit_cast(FROM &&from)
Definition: misc.h:307

anonymous_namespace{TargetExprBuilder.cpp}::is_simple_count
bool is_simple_count(const TargetInfo &target_info)
Definition: TargetExprBuilder.cpp:104

TargetExprCodegen::target_info
TargetInfo target_info
Definition: TargetExprBuilder.h:74

QueryMemoryDescriptor::didOutputColumnar
bool didOutputColumnar() const
Definition: QueryMemoryDescriptor.h:285

TargetExprCodegen::base_slot_index
int32_t base_slot_index
Definition: TargetExprBuilder.h:76

CodeGenerator.h

CHECK
#define CHECK(condition)
Definition: Logger.h:291

SQLTypeInfo::is_geometry
bool is_geometry() const
Definition: sqltypes.h:597

QueryMemoryDescriptor::threadsShareMemory
bool threadsShareMemory() const
Definition: QueryMemoryDescriptor.cpp:1140

SQLTypeInfo
Definition: sqltypes.h:332

TargetExprCodegen
Definition: TargetExprBuilder.h:33

WindowProjectNodeContext::resetWindowFunctionContext
static void resetWindowFunctionContext(Executor *executor)
Definition: WindowContext.cpp:1847

QueryMemoryDescriptor::setPaddedSlotWidthBytes
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
Definition: QueryMemoryDescriptor.cpp:1193

anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val
int64_t get_initial_agg_val(const TargetInfo &target_info, const QueryMemoryDescriptor &query_mem_desc)
Definition: TargetExprBuilder.cpp:729

numeric_type_name
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:230

SQLTypeInfo::is_string
bool is_string() const
Definition: sqltypes.h:561

kNULLT
Definition: sqltypes.h:66

TargetInfo::is_distinct
bool is_distinct
Definition: TargetInfo.h:55

kMAX
Definition: sqldefs.h:79

kAPPROX_COUNT_DISTINCT
Definition: sqldefs.h:82

QueryMemoryDescriptor::getLogicalSlotWidthBytes
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
Definition: QueryMemoryDescriptor.cpp:1198

SQLTypeInfo::get_physical_coord_cols
int get_physical_coord_cols() const
Definition: sqltypes.h:451

kAVG
Definition: sqldefs.h:77

QueryMemoryDescriptor::getColOffInBytes
size_t getColOffInBytes(const size_t col_idx) const
Definition: QueryMemoryDescriptor.cpp:905

TargetExprCodegen::codegenAggregate
void codegenAggregate(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::vector< llvm::Value * > &target_lvs, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, llvm::Value *varlen_output_buffer, int32_t slot_index) const
Definition: TargetExprBuilder.cpp:317

anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection
bool is_columnar_projection(const QueryMemoryDescriptor &query_mem_desc)
Definition: TargetExprBuilder.cpp:97

DiamondCodegen
Definition: DiamondCodegen.h:30

kMODE
Definition: sqldefs.h:86

query_mem_desc
query_mem_desc
Definition: QueryMemoryInitializer.cpp:479

VLOG
#define VLOG(n)
Definition: Logger.h:388

ROW_FUNC
#define ROW_FUNC
Definition: TargetExprBuilder.cpp:37

CompilationRetryNoCompaction
Definition: Execute.h:288

TargetExprCodegenBuilder::codegenSingleSlotSampleExpression
void codegenSingleSlotSampleExpression(GroupByAndAggregate *group_by_and_agg, Executor *executor, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const std::vector< llvm::Value * > &agg_out_vec, llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, DiamondCodegen &diamond_codegen) const
Definition: TargetExprBuilder.cpp:829

kPOINT
Definition: sqltypes.h:84

GroupByAndAggregate
Definition: GroupByAndAggregate.h:61