47 #include <boost/algorithm/cxx11/any_of.hpp>
48 #include <boost/range/adaptor/reversed.hpp>
71 const auto compound =
dynamic_cast<const RelCompound*
>(ra);
72 const auto aggregate =
dynamic_cast<const RelAggregate*
>(ra);
73 return ((compound && compound->isAggregate()) || aggregate);
79 std::unordered_set<PhysicalInput> phys_inputs2;
80 for (
auto& phi : phys_inputs) {
84 catalog->getColumnIdBySpi(phi.table_id, phi.col_id), phi.
table_id, phi.db_id});
90 std::map<ChunkKey, std::set<foreign_storage::ForeignStorageMgr::ParallelismHint>>
91 parallelism_hints_per_table;
93 const auto table_id = physical_input.table_id;
97 const auto table = catalog->getMetadataForTable(table_id,
true);
99 !table->is_in_memory_system_table) {
100 const auto col_id = catalog->getColumnIdBySpi(table_id, physical_input.col_id);
101 const auto col_desc = catalog->getMetadataForColumn(table_id, col_id);
102 const auto foreign_table = catalog->getForeignTable(table_id);
103 for (
const auto& fragment :
104 foreign_table->fragmenter->getFragmentsForQuery().fragments) {
107 physical_input.db_id, table_id, col_id, fragment.fragmentId};
116 if (!chunk.isChunkOnDevice(
118 parallelism_hints_per_table[{physical_input.db_id, table_id}].insert(
120 fragment.fragmentId});
125 if (!parallelism_hints_per_table.empty()) {
130 CHECK(foreign_storage_mgr);
131 foreign_storage_mgr->setParallelismHints(parallelism_hints_per_table);
139 const auto table = catalog->getMetadataForTable(table_id,
false);
141 const auto spi_col_id = catalog->getColumnIdBySpi(table_id, col_id);
158 const auto info_schema_catalog =
160 CHECK(info_schema_catalog);
161 std::map<int32_t, std::vector<int32_t>> system_table_columns_by_table_id;
163 if (info_schema_catalog->getDatabaseId() != physical_input.db_id) {
166 const auto table_id = physical_input.table_id;
167 const auto table = info_schema_catalog->getMetadataForTable(table_id,
false);
169 if (table->is_in_memory_system_table) {
170 const auto column_id =
171 info_schema_catalog->getColumnIdBySpi(table_id, physical_input.col_id);
172 system_table_columns_by_table_id[table_id].emplace_back(column_id);
176 if (!system_table_columns_by_table_id.empty() &&
181 for (
const auto& [table_id, column_ids] : system_table_columns_by_table_id) {
184 info_schema_catalog->getDataMgr().deleteChunksWithPrefix(
185 ChunkKey{info_schema_catalog->getDatabaseId(), table_id},
194 const auto td = info_schema_catalog->getMetadataForTable(table_id);
196 CHECK(td->fragmenter);
197 auto fragment_count = td->fragmenter->getFragmentsForQuery().fragments.size();
198 CHECK_LE(fragment_count, static_cast<size_t>(1))
199 <<
"In-memory system tables are expected to have a single fragment.";
200 if (fragment_count > 0) {
201 for (
auto column_id : column_ids) {
204 const auto cd = info_schema_catalog->getMetadataForColumn(table_id, column_id);
206 info_schema_catalog->getDatabaseId(), table_id, column_id, 0};
208 &(info_schema_catalog->getDataMgr()),
226 const std::vector<Analyzer::Expr*>& work_unit_target_exprs,
227 const std::vector<TargetMetaInfo>& targets_meta) {
228 CHECK_EQ(work_unit_target_exprs.size(), targets_meta.size());
230 for (
size_t i = 0; i < targets_meta.size(); ++i) {
231 render_info.
targets.emplace_back(std::make_shared<Analyzer::TargetEntry>(
232 targets_meta[i].get_resname(),
233 work_unit_target_exprs[i]->get_shared_ptr(),
246 return {left_deep_join_tree->
getId()};
251 const std::vector<unsigned>& aggregate,
252 const std::vector<unsigned>& next_result)
const override {
254 std::copy(next_result.begin(), next_result.end(), std::back_inserter(
result));
265 : text_decoding_casts(text_decoding_casts)
266 , text_encoding_casts(text_encoding_casts) {}
273 default_disregard_casts_to_none_encoding) {}
287 if (!operand_ti.is_string() && casted_ti.is_dict_encoded_string()) {
290 if (!casted_ti.is_string()) {
297 if (operand_ti.is_none_encoded_string() && casted_ti.is_dict_encoded_string()) {
300 if (operand_ti.is_dict_encoded_string() && casted_ti.is_none_encoded_string()) {
301 if (!disregard_casts_to_none_encoding) {
335 const auto prev_disregard_casts_to_none_encoding_state =
337 const auto left_u_oper =
339 if (left_u_oper && left_u_oper->get_optype() ==
kCAST) {
347 const auto right_u_oper =
349 if (right_u_oper && right_u_oper->get_optype() ==
kCAST) {
367 const auto prev_disregard_casts_to_none_encoding_state =
369 if (u_oper && u_oper->get_optype() ==
kCAST) {
408 auto check_node_for_text_casts = [&cast_counts](
410 const bool disregard_casts_to_none_encoding) {
415 const auto this_node_cast_counts = visitor.
visit(expr);
420 for (
const auto& qual : ra_exe_unit.
quals) {
421 check_node_for_text_casts(qual.get(),
false);
423 for (
const auto& simple_qual : ra_exe_unit.
simple_quals) {
424 check_node_for_text_casts(simple_qual.get(),
false);
427 check_node_for_text_casts(groupby_expr.get(),
false);
429 for (
const auto& target_expr : ra_exe_unit.
target_exprs) {
430 check_node_for_text_casts(target_expr,
false);
432 for (
const auto& join_condition : ra_exe_unit.
join_quals) {
433 for (
const auto& join_qual : join_condition.quals) {
439 check_node_for_text_casts(join_qual.get(),
449 const std::vector<InputTableInfo>& query_infos,
454 auto const tuples_upper_bound =
458 [](
auto max,
auto const& query_info) {
459 return std::max(max, query_info.info.getNumTuples());
466 const bool has_text_casts =
467 text_cast_counts.text_decoding_casts + text_cast_counts.text_encoding_casts > 0UL;
469 if (!has_text_casts) {
472 std::ostringstream oss;
473 oss <<
"Query requires one or more casts between none-encoded and dictionary-encoded "
474 <<
"strings, and the estimated table size (" << tuples_upper_bound <<
" rows) "
475 <<
"exceeds the configured watchdog none-encoded string translation limit of "
477 throw std::runtime_error(oss.str());
488 !query_for_partial_outer_frag &&
489 (!render_info || (render_info && !render_info->
isInSitu()));
507 auto lock =
executor_->acquireExecuteMutex();
518 CHECK(!ed_seq.empty());
519 if (ed_seq.size() > 1) {
527 auto exec_desc_ptr = ed_seq.getDescriptor(0);
528 CHECK(exec_desc_ptr);
529 auto& exec_desc = *exec_desc_ptr;
530 const auto body = exec_desc.getBody();
535 const auto project =
dynamic_cast<const RelProject*
>(body);
542 const auto compound =
dynamic_cast<const RelCompound*
>(body);
544 if (compound->isDeleteViaSelect()) {
546 }
else if (compound->isUpdateViaSelect()) {
549 if (compound->isAggregate()) {
565 const bool just_explain_plan,
566 const bool explain_verbose,
570 << static_cast<int>(
query_dag_->getBuildState());
577 co_in, eo, just_explain_plan, explain_verbose, render_info);
579 constexpr
bool vlog_result_set_summary{
false};
580 if constexpr (vlog_result_set_summary) {
581 VLOG(1) << execution_result.getRows()->summaryToString();
585 VLOG(1) <<
"Running post execution callback.";
586 (*post_execution_callback_)();
588 return execution_result;
598 LOG(
INFO) <<
"Query unable to run in GPU mode, retrying on CPU";
609 const bool just_explain_plan,
610 const bool explain_verbose,
614 auto timer_setup =
DEBUG_TIMER(
"Query pre-execution steps");
624 std::string query_session{
""};
625 std::string query_str{
"N/A"};
626 std::string query_submitted_time{
""};
629 query_session =
query_state_->getConstSessionInfo()->get_session_id();
631 query_submitted_time =
query_state_->getQuerySubmittedTime();
634 auto validate_or_explain_query =
636 auto interruptable = !render_info && !query_session.empty() &&
643 std::tie(query_session, query_str) =
executor_->attachExecutorToQuerySession(
644 query_session, query_str, query_submitted_time);
650 query_submitted_time,
651 QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL);
656 [
this, &query_session, &interruptable, &query_submitted_time] {
660 executor_->clearQuerySessionStatus(query_session, query_submitted_time);
664 auto acquire_execute_mutex = [](Executor * executor) ->
auto{
665 auto ret = executor->acquireExecuteMutex();
670 auto lock = acquire_execute_mutex(
executor_);
679 executor_->checkPendingQueryStatus(query_session);
682 throw std::runtime_error(
"Query execution has been interrupted (pending query)");
686 throw std::runtime_error(
"Checking pending query status failed: unknown error");
689 int64_t queue_time_ms =
timer_stop(clock_begin);
702 if (just_explain_plan) {
705 std::vector<const RelAlgNode*> steps;
706 for (
size_t i = 0; i < ed_seq.size(); i++) {
707 steps.emplace_back(ed_seq.getDescriptor(i)->getBody());
708 steps.back()->setStepNumber(i + 1);
710 std::stringstream ss;
715 auto rs = std::make_shared<ResultSet>(ss.str());
723 ed_seq, co, eo, render_info, queue_time_ms);
730 const auto subquery_ra = subquery->getRelAlg();
732 if (subquery_ra->hasContextData()) {
739 if (global_hints || local_hints) {
740 const auto subquery_rel_alg_dag = subquery_executor.
getRelAlgDag();
745 subquery_rel_alg_dag->registerQueryHint(subquery_ra, *local_hints);
750 subquery->setExecutionResult(std::make_shared<ExecutionResult>(
result));
758 return executor_->computeColRangesCache(phys_inputs);
763 return executor_->computeStringDictionaryGenerations(phys_inputs);
768 return executor_->computeTableGenerations(phys_table_ids);
780 std::pair<std::vector<unsigned>, std::unordered_map<unsigned, JoinQualsPerNestingLevel>>
782 auto sort_node =
dynamic_cast<const RelSort*
>(root_node);
789 auto left_deep_tree_ids = visitor.
visit(root_node);
797 const auto source = sort->
getInput(0);
798 if (dynamic_cast<const RelSort*>(source)) {
799 throw std::runtime_error(
"Sort node not supported as input to another sort");
807 const size_t step_idx,
815 const auto sort =
dynamic_cast<const RelSort*
>(exe_desc_ptr->getBody());
817 size_t shard_count{0};
824 auto order_entries =
sort->getOrderEntries();
831 const auto source =
sort->getInput(0);
834 CHECK_EQ(temp_seq.size(), size_t(1));
847 merge_type(exe_desc_ptr->getBody()),
848 exe_desc_ptr->getBody()->getId(),
852 seq, std::make_pair(step_idx, step_idx + 1), co, eo, render_info,
queue_time_ms_);
857 LOG(
INFO) <<
"Retry the query via CPU mode";
859 std::make_pair(step_idx, step_idx + 1),
866 VLOG(1) <<
"Running post execution callback.";
867 (*post_execution_callback_)();
869 return query_step_result;
882 executor_->row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>(
884 executor_->row_set_mem_owner_->setDictionaryGenerations(string_dictionary_generations);
885 executor_->table_generations_ = table_generations;
886 executor_->agg_col_range_cache_ = agg_col_range;
893 const int64_t queue_time_ms,
894 const bool with_existing_temp_tables) {
897 if (!with_existing_temp_tables) {
907 auto get_descriptor_count = [&seq, &eo]() ->
size_t {
922 const auto exec_desc_count = get_descriptor_count();
936 const auto cached_res =
937 executor_->getResultSetRecyclerHolder().getCachedQueryResultSet(kv.second);
943 const auto num_steps = exec_desc_count - 1;
944 for (
size_t i = 0; i < exec_desc_count; i++) {
945 VLOG(1) <<
"Executing query step " << i <<
" / " << num_steps;
948 seq, i, co, eo_copied, (i == num_steps) ? render_info :
nullptr, queue_time_ms);
957 LOG(
INFO) <<
"Retrying current query step " << i <<
" / " << num_steps <<
" on CPU";
959 if (render_info && i == num_steps) {
967 (i == num_steps) ? render_info :
nullptr,
973 auto eo_extern = eo_copied;
974 eo_extern.executor_type = ::ExecutorType::Extern;
976 const auto body = exec_desc_ptr->
getBody();
977 const auto compound =
dynamic_cast<const RelCompound*
>(body);
978 if (compound && (compound->getGroupByCount() || compound->isAggregate())) {
979 LOG(
INFO) <<
"Also failed to run the query using interoperability";
983 seq, i, co, eo_extern, (i == num_steps) ? render_info :
nullptr, queue_time_ms);
992 const std::pair<size_t, size_t> interval,
996 const int64_t queue_time_ms) {
1001 for (
size_t i = interval.first; i < interval.second; i++) {
1008 (i == interval.second - 1) ? render_info :
nullptr,
1018 LOG(
INFO) <<
"Retrying current query step " << i <<
" on CPU";
1020 if (render_info && i == interval.second - 1) {
1027 (i == interval.second - 1) ? render_info :
nullptr,
1039 auto columnar_output_hint_enabled =
false;
1040 auto rowwise_output_hint_enabled =
false;
1042 VLOG(1) <<
"A user forces to run the query on the CPU execution mode";
1047 VLOG(1) <<
"A user enables keeping query resultset but is skipped since data "
1048 "recycler is disabled";
1051 VLOG(1) <<
"A user enables keeping query resultset but is skipped since query "
1052 "resultset recycler is disabled";
1054 VLOG(1) <<
"A user enables keeping query resultset";
1061 VLOG(1) <<
"A user enables keeping table function's resultset but is skipped "
1062 "since data recycler is disabled";
1065 VLOG(1) <<
"A user enables keeping table function's resultset but is skipped "
1066 "since query resultset recycler is disabled";
1068 VLOG(1) <<
"A user enables keeping table function's resultset";
1074 VLOG(1) <<
"A user enables watchdog for this query";
1080 VLOG(1) <<
"A user disables watchdog for this query";
1086 VLOG(1) <<
"A user enables dynamic watchdog for this query";
1092 VLOG(1) <<
"A user disables dynamic watchdog for this query";
1097 std::ostringstream oss;
1098 oss <<
"A user sets query time limit to " << query_hints.
query_time_limit <<
" ms";
1102 oss <<
" (and system automatically enables dynamic watchdog to activate the "
1103 "given \"query_time_limit\" hint)";
1105 VLOG(1) << oss.str();
1108 VLOG(1) <<
"A user enables loop join";
1112 VLOG(1) <<
"A user disables loop join";
1117 VLOG(1) <<
"A user forces the maximum size of a join hash table as "
1123 VLOG(1) <<
"Skip query hint \"opt_cuda_grid_and_block_size\" when at least one "
1124 "of the following query hints are given simultaneously: "
1125 "\"cuda_block_size\" and \"cuda_grid_size_multiplier\"";
1127 VLOG(1) <<
"A user enables optimization of cuda block and grid sizes";
1132 VLOG(1) <<
"A user forces the query to run with columnar output";
1133 columnar_output_hint_enabled =
true;
1135 VLOG(1) <<
"A user forces the query to run with rowwise output";
1136 rowwise_output_hint_enabled =
true;
1139 : columnar_output_hint_enabled;
1140 if (
g_cluster && (columnar_output_hint_enabled || rowwise_output_hint_enabled)) {
1141 LOG(
INFO) <<
"Currently, we do not support applying query hint to change query "
1142 "output layout in distributed mode.";
1149 const size_t step_idx,
1153 const int64_t queue_time_ms) {
1157 CHECK(exec_desc_ptr);
1158 auto& exec_desc = *exec_desc_ptr;
1159 const auto body = exec_desc.getBody();
1160 if (body->isNop()) {
1171 auto target_node = body;
1172 auto query_plan_dag_hash = body->getQueryPlanDagHash();
1173 if (
auto sort_body = dynamic_cast<const RelSort*>(body)) {
1174 target_node = sort_body->getInput(0);
1179 target_node->getQueryPlanDagHash(),
SortInfo());
1188 if (
auto cached_resultset =
1189 executor_->getResultSetRecyclerHolder().getCachedQueryResultSet(
1190 query_plan_dag_hash)) {
1191 VLOG(1) <<
"recycle resultset of the root node " << body->getRelNodeDagId()
1192 <<
" from resultset cache";
1193 body->setOutputMetainfo(cached_resultset->getTargetMetaInfo());
1195 std::vector<std::shared_ptr<Analyzer::Expr>>& cached_target_exprs =
1196 executor_->getResultSetRecyclerHolder().getTargetExprs(query_plan_dag_hash);
1197 std::vector<Analyzer::Expr*> copied_target_exprs;
1198 for (
const auto& expr : cached_target_exprs) {
1199 copied_target_exprs.push_back(expr.get());
1202 *render_info, copied_target_exprs, cached_resultset->getTargetMetaInfo());
1204 exec_desc.setResult({cached_resultset, cached_resultset->getTargetMetaInfo()});
1210 const auto compound =
dynamic_cast<const RelCompound*
>(body);
1212 if (compound->isDeleteViaSelect()) {
1213 executeDelete(compound, co_copied, eo_copied, queue_time_ms);
1214 }
else if (compound->isUpdateViaSelect()) {
1215 executeUpdate(compound, co_copied, eo_copied, queue_time_ms);
1217 exec_desc.setResult(
1218 executeCompound(compound, co_copied, eo_copied, render_info, queue_time_ms));
1219 VLOG(3) <<
"Returned from executeCompound(), addTemporaryTable("
1220 <<
static_cast<int>(-compound->getId()) <<
", ...)"
1221 <<
" exec_desc.getResult().getDataPtr()->rowCount()="
1222 << exec_desc.getResult().getDataPtr()->rowCount();
1223 if (exec_desc.getResult().isFilterPushDownEnabled()) {
1230 const auto project =
dynamic_cast<const RelProject*
>(body);
1232 if (project->isDeleteViaSelect()) {
1233 executeDelete(project, co_copied, eo_copied, queue_time_ms);
1234 }
else if (project->isUpdateViaSelect()) {
1235 executeUpdate(project, co_copied, eo_copied, queue_time_ms);
1237 std::optional<size_t> prev_count;
1243 if (shared::dynamic_castable_to_any<RelCompound, RelLogicalValues>(prev_body)) {
1248 const auto& prev_exe_result = prev_exec_desc->getResult();
1249 const auto prev_result = prev_exe_result.getRows();
1251 prev_count = prev_result->rowCount();
1252 VLOG(3) <<
"Setting output row count for projection node to previous node ("
1253 << prev_exec_desc->getBody()->toString(
1255 <<
") to " << *prev_count;
1261 project, co_copied, eo_copied, render_info, queue_time_ms, prev_count));
1262 VLOG(3) <<
"Returned from executeProject(), addTemporaryTable("
1263 <<
static_cast<int>(-project->getId()) <<
", ...)"
1264 <<
" exec_desc.getResult().getDataPtr()->rowCount()="
1265 << exec_desc.getResult().getDataPtr()->rowCount();
1266 if (exec_desc.getResult().isFilterPushDownEnabled()) {
1273 const auto aggregate =
dynamic_cast<const RelAggregate*
>(body);
1275 exec_desc.setResult(
1276 executeAggregate(aggregate, co_copied, eo_copied, render_info, queue_time_ms));
1280 const auto filter =
dynamic_cast<const RelFilter*
>(body);
1282 exec_desc.setResult(
1283 executeFilter(filter, co_copied, eo_copied, render_info, queue_time_ms));
1287 const auto sort =
dynamic_cast<const RelSort*
>(body);
1289 exec_desc.setResult(
1291 if (exec_desc.getResult().isFilterPushDownEnabled()) {
1298 if (logical_values) {
1300 addTemporaryTable(-logical_values->getId(), exec_desc.getResult().getDataPtr());
1303 const auto modify =
dynamic_cast<const RelModify*
>(body);
1308 const auto logical_union =
dynamic_cast<const RelLogicalUnion*
>(body);
1309 if (logical_union) {
1311 logical_union, seq, co_copied, eo_copied, render_info, queue_time_ms));
1317 exec_desc.setResult(
1322 LOG(
FATAL) <<
"Unhandled body type: "
1329 CHECK(dynamic_cast<const RelAggregate*>(body));
1330 CHECK_EQ(
size_t(1), body->inputCount());
1331 const auto input = body->getInput(0);
1332 body->setOutputMetainfo(input->getOutputMetainfo());
1338 ed.setResult({it->second, input->getOutputMetainfo()});
1348 return synthesized_physical_inputs_owned;
1352 const RexInput* rex_input)
const override {
1355 const auto scan_ra =
dynamic_cast<const RelScan*
>(input_ra);
1359 const auto col_id = rex_input->
getIndex();
1361 scan_ra->getCatalog().getMetadataForColumnBySpi(td->tableId, col_id + 1);
1362 if (cd && cd->columnType.get_physical_cols() > 0) {
1364 std::unordered_set<const RexInput*> synthesized_physical_inputs;
1365 for (
auto i = 0; i < cd->columnType.get_physical_cols(); i++) {
1366 auto physical_input =
1368 synthesized_physical_inputs_owned.emplace_back(physical_input);
1369 synthesized_physical_inputs.insert(physical_input);
1371 return synthesized_physical_inputs;
1380 const std::unordered_set<const RexInput*>& aggregate,
1381 const std::unordered_set<const RexInput*>& next_result)
const override {
1383 result.insert(next_result.begin(), next_result.end());
1392 if (
auto table_func = dynamic_cast<const RelTableFunction*>(ra_node)) {
1395 if (
auto join = dynamic_cast<const RelJoin*>(ra_node)) {
1399 if (!dynamic_cast<const RelLogicalUnion*>(ra_node)) {
1402 auto only_src = ra_node->
getInput(0);
1403 const bool is_join =
dynamic_cast<const RelJoin*
>(only_src) ||
1404 dynamic_cast<const RelLeftDeepInnerJoin*>(only_src);
1405 return is_join ? only_src : ra_node;
1408 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1412 std::unordered_set<const RexInput*> used_inputs =
1413 filter_expr ? visitor.visit(filter_expr) : std::unordered_set<const RexInput*>{};
1415 for (
size_t i = 0; i < sources_size; ++i) {
1416 const auto source_inputs = visitor.visit(compound->
getScalarSource(i));
1417 used_inputs.insert(source_inputs.begin(), source_inputs.end());
1419 std::vector<std::shared_ptr<RexInput>> used_inputs_owned(visitor.
get_inputs_owned());
1420 return std::make_pair(used_inputs, used_inputs_owned);
1423 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1426 std::unordered_set<const RexInput*> used_inputs;
1427 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
1428 const auto source = aggregate->
getInput(0);
1431 CHECK_GE(in_metainfo.size(), group_count);
1432 for (
size_t i = 0; i < group_count; ++i) {
1433 auto synthesized_used_input =
new RexInput(source, i);
1434 used_inputs_owned.emplace_back(synthesized_used_input);
1435 used_inputs.insert(synthesized_used_input);
1437 for (
const auto& agg_expr : aggregate->
getAggExprs()) {
1438 for (
size_t i = 0; i < agg_expr->size(); ++i) {
1439 const auto operand_idx = agg_expr->getOperand(i);
1440 CHECK_GE(in_metainfo.size(),
static_cast<size_t>(operand_idx));
1441 auto synthesized_used_input =
new RexInput(source, operand_idx);
1442 used_inputs_owned.emplace_back(synthesized_used_input);
1443 used_inputs.insert(synthesized_used_input);
1446 return std::make_pair(used_inputs, used_inputs_owned);
1449 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1452 std::unordered_set<const RexInput*> used_inputs;
1453 for (
size_t i = 0; i < project->
size(); ++i) {
1454 const auto proj_inputs = visitor.visit(project->
getProjectAt(i));
1455 used_inputs.insert(proj_inputs.begin(), proj_inputs.end());
1457 std::vector<std::shared_ptr<RexInput>> used_inputs_owned(visitor.
get_inputs_owned());
1458 return std::make_pair(used_inputs, used_inputs_owned);
1461 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1464 std::unordered_set<const RexInput*> used_inputs;
1467 used_inputs.insert(table_func_inputs.begin(), table_func_inputs.end());
1469 std::vector<std::shared_ptr<RexInput>> used_inputs_owned(visitor.
get_inputs_owned());
1470 return std::make_pair(used_inputs, used_inputs_owned);
1473 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1475 std::unordered_set<const RexInput*> used_inputs;
1476 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
1478 for (
size_t nest_level = 0; nest_level < data_sink_node->inputCount(); ++nest_level) {
1479 const auto source = data_sink_node->getInput(nest_level);
1480 const auto scan_source =
dynamic_cast<const RelScan*
>(source);
1482 CHECK(source->getOutputMetainfo().empty());
1483 for (
size_t i = 0; i < scan_source->size(); ++i) {
1484 auto synthesized_used_input =
new RexInput(scan_source, i);
1485 used_inputs_owned.emplace_back(synthesized_used_input);
1486 used_inputs.insert(synthesized_used_input);
1489 const auto& partial_in_metadata = source->getOutputMetainfo();
1490 for (
size_t i = 0; i < partial_in_metadata.size(); ++i) {
1491 auto synthesized_used_input =
new RexInput(source, i);
1492 used_inputs_owned.emplace_back(synthesized_used_input);
1493 used_inputs.insert(synthesized_used_input);
1497 return std::make_pair(used_inputs, used_inputs_owned);
1500 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1502 std::unordered_set<const RexInput*> used_inputs(logical_union->
inputCount());
1503 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
1504 used_inputs_owned.reserve(logical_union->
inputCount());
1505 VLOG(3) <<
"logical_union->inputCount()=" << logical_union->
inputCount();
1506 auto const n_inputs = logical_union->
inputCount();
1507 for (
size_t nest_level = 0; nest_level < n_inputs; ++nest_level) {
1508 auto input = logical_union->
getInput(nest_level);
1509 for (
size_t i = 0; i < input->size(); ++i) {
1510 used_inputs_owned.emplace_back(std::make_shared<RexInput>(input, i));
1511 used_inputs.insert(used_inputs_owned.back().get());
1514 return std::make_pair(std::move(used_inputs), std::move(used_inputs_owned));
1518 const auto scan_ra =
dynamic_cast<const RelScan*
>(ra_node);
1521 table_key.
db_id = scan_ra->getCatalog().getDatabaseId();
1522 const auto td = scan_ra->getTableDescriptor();
1524 table_key.table_id = td->tableId;
1531 const std::vector<size_t>& input_permutation) {
1533 std::unordered_map<const RelAlgNode*, int> input_to_nest_level;
1534 for (
size_t input_idx = 0; input_idx < data_sink_node->inputCount(); ++input_idx) {
1535 const auto input_node_idx =
1536 input_permutation.empty() ? input_idx : input_permutation[input_idx];
1537 const auto input_ra = data_sink_node->getInput(input_node_idx);
1541 size_t const idx =
dynamic_cast<const RelLogicalUnion*
>(ra_node) ? 0 : input_idx;
1542 const auto it_ok = input_to_nest_level.emplace(input_ra, idx);
1543 CHECK(it_ok.second);
1546 <<
" to nest level " << input_idx;
1548 return input_to_nest_level;
1551 std::pair<std::unordered_set<const RexInput*>, std::vector<std::shared_ptr<RexInput>>>
1554 if (
auto join = dynamic_cast<const RelJoin*>(data_sink_node)) {
1556 const auto condition =
join->getCondition();
1558 auto condition_inputs = visitor.visit(condition);
1559 std::vector<std::shared_ptr<RexInput>> condition_inputs_owned(
1561 return std::make_pair(condition_inputs, condition_inputs_owned);
1564 if (
auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(data_sink_node)) {
1565 CHECK_GE(left_deep_join->inputCount(), 2u);
1566 const auto condition = left_deep_join->getInnerCondition();
1568 auto result = visitor.visit(condition);
1569 for (
size_t nesting_level = 1; nesting_level <= left_deep_join->inputCount() - 1;
1571 const auto outer_condition = left_deep_join->getOuterCondition(nesting_level);
1572 if (outer_condition) {
1573 const auto outer_result = visitor.visit(outer_condition);
1574 result.insert(outer_result.begin(), outer_result.end());
1577 std::vector<std::shared_ptr<RexInput>> used_inputs_owned(visitor.
get_inputs_owned());
1578 return std::make_pair(
result, used_inputs_owned);
1581 if (dynamic_cast<const RelLogicalUnion*>(ra_node)) {
1584 }
else if (dynamic_cast<const RelTableFunction*>(ra_node)) {
1592 return std::make_pair(std::unordered_set<const RexInput*>{},
1593 std::vector<std::shared_ptr<RexInput>>{});
1597 std::vector<InputDescriptor>& input_descs,
1598 std::unordered_set<std::shared_ptr<const InputColDescriptor>>& input_col_descs_unique,
1600 const std::unordered_set<const RexInput*>& source_used_inputs,
1601 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level) {
1603 <<
" input_col_descs_unique.size()=" << input_col_descs_unique.size()
1604 <<
" source_used_inputs.size()=" << source_used_inputs.size();
1605 for (
const auto used_input : source_used_inputs) {
1606 const auto input_ra = used_input->getSourceNode();
1608 auto col_id = used_input->getIndex();
1609 auto it = input_to_nest_level.find(input_ra);
1610 if (it != input_to_nest_level.end()) {
1611 const int nest_level = it->second;
1612 if (
auto rel_scan = dynamic_cast<const RelScan*>(input_ra)) {
1613 const auto& catalog = rel_scan->getCatalog();
1614 col_id = catalog.getColumnIdBySpi(table_key.table_id, col_id + 1);
1616 input_col_descs_unique.insert(std::make_shared<const InputColDescriptor>(
1617 col_id, table_key.table_id, table_key.db_id, nest_level));
1618 }
else if (!dynamic_cast<const RelLogicalUnion*>(ra_node)) {
1619 throw std::runtime_error(
"Bushy joins not supported");
1625 std::pair<std::vector<InputDescriptor>,
1626 std::list<std::shared_ptr<const InputColDescriptor>>>
1628 const std::unordered_set<const RexInput*>& used_inputs,
1629 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level,
1630 const std::vector<size_t>& input_permutation) {
1631 std::vector<InputDescriptor> input_descs;
1633 for (
size_t input_idx = 0; input_idx < data_sink_node->inputCount(); ++input_idx) {
1634 const auto input_node_idx =
1635 input_permutation.empty() ? input_idx : input_permutation[input_idx];
1636 auto input_ra = data_sink_node->getInput(input_node_idx);
1638 input_descs.emplace_back(table_key.db_id, table_key.table_id, input_idx);
1640 std::unordered_set<std::shared_ptr<const InputColDescriptor>> input_col_descs_unique;
1642 input_col_descs_unique,
1645 input_to_nest_level);
1646 std::unordered_set<const RexInput*> join_source_used_inputs;
1647 std::vector<std::shared_ptr<RexInput>> join_source_used_inputs_owned;
1648 std::tie(join_source_used_inputs, join_source_used_inputs_owned) =
1651 input_col_descs_unique,
1653 join_source_used_inputs,
1654 input_to_nest_level);
1655 std::vector<std::shared_ptr<const InputColDescriptor>> input_col_descs(
1656 input_col_descs_unique.begin(), input_col_descs_unique.end());
1659 input_col_descs.end(),
1660 [](std::shared_ptr<const InputColDescriptor>
const& lhs,
1661 std::shared_ptr<const InputColDescriptor>
const& rhs) {
1662 return std::make_tuple(lhs->getScanDesc().getNestLevel(),
1664 lhs->getScanDesc().getTableKey()) <
1665 std::make_tuple(rhs->getScanDesc().getNestLevel(),
1667 rhs->getScanDesc().getTableKey());
1669 return {input_descs,
1670 std::list<std::shared_ptr<const InputColDescriptor>>(input_col_descs.begin(),
1671 input_col_descs.end())};
1675 std::tuple<std::vector<InputDescriptor>,
1676 std::list<std::shared_ptr<const InputColDescriptor>>,
1677 std::vector<std::shared_ptr<RexInput>>>
1679 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level,
1680 const std::vector<size_t>& input_permutation) {
1681 std::unordered_set<const RexInput*> used_inputs;
1682 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
1684 VLOG(3) <<
"used_inputs.size() = " << used_inputs.size();
1685 auto input_desc_pair =
1687 return std::make_tuple(
1688 input_desc_pair.first, input_desc_pair.second, used_inputs_owned);
1696 return project->
size();
1716 const std::shared_ptr<Analyzer::Expr> expr) {
1717 const auto& ti = expr->get_type_info();
1721 auto transient_dict_ti = ti;
1725 transient_dict_ti.set_fixed_size();
1726 return expr->add_cast(transient_dict_ti);
1730 std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources,
1731 const std::shared_ptr<Analyzer::Expr>& expr) {
1735 scalar_sources.push_back(
fold_expr(expr.get()));
1740 const std::shared_ptr<Analyzer::Expr>& input) {
1741 const auto& input_ti = input->get_type_info();
1742 if (input_ti.is_string() && input_ti.get_compression() ==
kENCODING_DICT) {
1753 std::vector<std::shared_ptr<Analyzer::Expr>> scalar_sources;
1755 VLOG(3) <<
"get_scalar_sources_size("
1757 <<
") = " << scalar_sources_size;
1758 for (
size_t i = 0; i < scalar_sources_size; ++i) {
1759 const auto scalar_rex =
scalar_at(i, ra_node);
1760 if (dynamic_cast<const RexRef*>(scalar_rex)) {
1766 const auto scalar_expr =
1768 const auto rewritten_expr =
rewrite_expr(scalar_expr.get());
1772 scalar_sources.push_back(
fold_expr(rewritten_expr.get()));
1778 return scalar_sources;
1788 size_t starting_projection_column_idx) {
1789 std::vector<std::shared_ptr<Analyzer::Expr>> scalar_sources;
1791 const auto scalar_rex =
scalar_at(i, ra_node);
1792 if (dynamic_cast<const RexRef*>(scalar_rex)) {
1798 std::shared_ptr<Analyzer::Expr> translated_expr;
1800 translated_expr = cast_to_column_type(translator.
translate(scalar_rex),
1803 colNames[i - starting_projection_column_idx]);
1805 translated_expr = translator.
translate(scalar_rex);
1808 const auto rewritten_expr =
rewrite_expr(scalar_expr.get());
1812 return scalar_sources;
1817 const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
1821 std::list<std::shared_ptr<Analyzer::Expr>> groupby_exprs;
1822 for (
size_t group_idx = 0; group_idx < compound->
getGroupByCount(); ++group_idx) {
1825 return groupby_exprs;
1830 const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
1831 std::list<std::shared_ptr<Analyzer::Expr>> groupby_exprs;
1832 for (
size_t group_idx = 0; group_idx < aggregate->
getGroupByCount(); ++group_idx) {
1835 return groupby_exprs;
1841 const auto filter_expr = filter_rex ? translator.
translate(filter_rex) :
nullptr;
1850 size_t target_expr_idx,
1851 std::shared_ptr<Analyzer::Expr>& target_expr,
1852 std::unordered_map<size_t, SQLTypeInfo>& target_exprs_type_infos) {
1855 if (agg_expr->get_is_distinct()) {
1856 SQLTypeInfo const& ti = agg_expr->get_arg()->get_type_info();
1858 target_exprs_type_infos.emplace(target_expr_idx, ti);
1859 target_expr = target_expr->deep_copy();
1865 target_exprs_type_infos.emplace(target_expr_idx, target_expr->get_type_info());
1870 std::vector<std::shared_ptr<Analyzer::Expr>>& target_exprs_owned,
1871 std::unordered_map<size_t, SQLTypeInfo>& target_exprs_type_infos,
1872 const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources,
1873 const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
1877 std::vector<Analyzer::Expr*> target_exprs;
1878 for (
size_t i = 0; i < compound->
size(); ++i) {
1880 const auto target_rex_agg =
dynamic_cast<const RexAgg*
>(target_rex);
1881 std::shared_ptr<Analyzer::Expr> target_expr;
1882 if (target_rex_agg) {
1887 const auto target_rex_scalar =
dynamic_cast<const RexScalar*
>(target_rex);
1888 const auto target_rex_ref =
dynamic_cast<const RexRef*
>(target_rex_scalar);
1889 if (target_rex_ref) {
1890 const auto ref_idx = target_rex_ref->
getIndex();
1892 CHECK_LE(ref_idx, groupby_exprs.size());
1893 const auto groupby_expr = *std::next(groupby_exprs.begin(), ref_idx - 1);
1899 target_expr =
fold_expr(rewritten_expr.get());
1910 target_exprs_type_infos.emplace(i, target_expr->get_type_info());
1913 target_exprs_owned.push_back(target_expr);
1914 target_exprs.push_back(target_expr.get());
1916 return target_exprs;
1920 std::vector<std::shared_ptr<Analyzer::Expr>>& target_exprs_owned,
1921 std::unordered_map<size_t, SQLTypeInfo>& target_exprs_type_infos,
1922 const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources,
1923 const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
1926 std::vector<Analyzer::Expr*> target_exprs;
1927 size_t group_key_idx = 1;
1928 for (
const auto& groupby_expr : groupby_exprs) {
1931 target_exprs_owned.push_back(target_expr);
1932 target_exprs.push_back(target_expr.get());
1935 for (
const auto& target_rex_agg : aggregate->
getAggExprs()) {
1939 target_expr =
fold_expr(target_expr.get());
1940 target_exprs_owned.push_back(target_expr);
1941 target_exprs.push_back(target_expr.get());
1943 return target_exprs;
1953 if (agg_expr && agg_expr->get_contains_agg()) {
1969 }
else if (
is_agg(expr)) {
1978 const std::vector<Analyzer::Expr*>& target_exprs) {
1979 std::vector<TargetMetaInfo> targets_meta;
1980 CHECK_EQ(ra_node->size(), target_exprs.size());
1981 for (
size_t i = 0; i < ra_node->size(); ++i) {
1982 CHECK(target_exprs[i]);
1985 targets_meta.emplace_back(
1986 ra_node->getFieldName(i), ti, target_exprs[i]->get_type_info());
1988 return targets_meta;
1994 const std::vector<Analyzer::Expr*>& target_exprs) {
1996 if (
auto const* input = dynamic_cast<RelCompound const*>(input0)) {
1998 }
else if (
auto const* input = dynamic_cast<RelProject const*>(input0)) {
2000 }
else if (
auto const* input = dynamic_cast<RelLogicalUnion const*>(input0)) {
2002 }
else if (
auto const* input = dynamic_cast<RelAggregate const*>(input0)) {
2004 }
else if (
auto const* input = dynamic_cast<RelScan const*>(input0)) {
2006 }
else if (
auto const* input = dynamic_cast<RelLogicalValues const*>(input0)) {
2019 const int64_t queue_time_ms) {
2027 auto execute_update_for_node = [
this, &co, &eo_in](
const auto node,
2029 const bool is_aggregate) {
2030 auto table_descriptor = node->getModifiedTableDescriptor();
2031 CHECK(table_descriptor);
2032 if (node->isVarlenUpdateRequired() && !table_descriptor->hasDeletedCol) {
2033 throw std::runtime_error(
2034 "UPDATE queries involving variable length columns are only supported on tables "
2035 "with the vacuum attribute set to 'delayed'");
2038 auto catalog = node->getModifiedTableCatalog();
2043 std::make_unique<UpdateTransactionParameters>(table_descriptor,
2045 node->getTargetColumns(),
2046 node->getOutputMetainfo(),
2047 node->isVarlenUpdateRequired());
2051 auto execute_update_ra_exe_unit =
2052 [
this, &co, &eo_in, &table_infos, &table_descriptor, &node, catalog](
2057 if (dml_transaction_parameters_->tableIsTemporary()) {
2058 eo.output_columnar_hint =
true;
2065 dml_transaction_parameters_.get());
2067 CHECK(update_transaction_parameters);
2070 auto table_update_metadata =
2081 dml_transaction_parameters_->finalizeTransaction(*catalog);
2083 dml_transaction_parameters_->getTableDescriptor(),
executor_, *catalog};
2084 table_optimizer.vacuumFragmentsAboveMinSelectivity(table_update_metadata);
2091 if (dml_transaction_parameters_->tableIsTemporary()) {
2093 auto query_rewrite = std::make_unique<QueryRewriter>(table_infos,
executor_);
2097 auto update_transaction_params =
2099 CHECK(update_transaction_params);
2100 const auto td = update_transaction_params->getTableDescriptor();
2102 const auto update_column_names = update_transaction_params->getUpdateColumnNames();
2103 if (update_column_names.size() > 1) {
2104 throw std::runtime_error(
2105 "Multi-column update is not yet supported for temporary tables.");
2109 catalog->getMetadataForColumn(td->tableId, update_column_names.front());
2111 auto projected_column_to_update = makeExpr<Analyzer::ColumnVar>(
2115 const auto rewritten_exe_unit = query_rewrite->rewriteColumnarUpdate(
2116 work_unit.exe_unit, projected_column_to_update);
2117 if (rewritten_exe_unit.target_exprs.front()->get_type_info().is_varlen()) {
2118 throw std::runtime_error(
2119 "Variable length updates not yet supported on temporary tables.");
2121 execute_update_ra_exe_unit(rewritten_exe_unit, is_aggregate);
2123 execute_update_ra_exe_unit(work_unit.exe_unit, is_aggregate);
2127 if (
auto compound = dynamic_cast<const RelCompound*>(node)) {
2130 execute_update_for_node(compound, work_unit, compound->isAggregate());
2131 }
else if (
auto project = dynamic_cast<const RelProject*>(node)) {
2134 if (project->isSimple()) {
2135 CHECK_EQ(
size_t(1), project->inputCount());
2136 const auto input_ra = project->getInput(0);
2137 if (dynamic_cast<const RelSort*>(input_ra)) {
2138 const auto& input_table =
2141 work_unit.exe_unit.scan_limit = input_table->rowCount();
2144 if (project->hasWindowFunctionExpr() || project->hasPushedDownWindowExpr()) {
2159 throw std::runtime_error(
2160 "Update query having window function is not yet supported in distributed "
2165 computeWindow(work_unit, co, eo_in, column_cache, queue_time_ms);
2167 execute_update_for_node(project, work_unit,
false);
2169 throw std::runtime_error(
"Unsupported parent node for update: " +
2177 const int64_t queue_time_ms) {
2181 auto execute_delete_for_node = [
this, &co, &eo_in](
const auto node,
2183 const bool is_aggregate) {
2184 auto* table_descriptor = node->getModifiedTableDescriptor();
2185 CHECK(table_descriptor);
2186 if (!table_descriptor->hasDeletedCol) {
2187 throw std::runtime_error(
2188 "DELETE queries are only supported on tables with the vacuum attribute set to "
2192 const auto catalog = node->getModifiedTableCatalog();
2198 auto execute_delete_ra_exe_unit =
2199 [
this, &table_infos, &table_descriptor, &eo_in, &co, catalog](
2200 const auto& exe_unit,
const bool is_aggregate) {
2202 std::make_unique<DeleteTransactionParameters>(table_descriptor, *catalog);
2205 CHECK(delete_params);
2211 eo.output_columnar_hint =
true;
2215 CHECK_EQ(exe_unit.target_exprs.size(), size_t(1));
2219 auto table_update_metadata =
2233 table_optimizer.vacuumFragmentsAboveMinSelectivity(table_update_metadata);
2241 auto query_rewrite = std::make_unique<QueryRewriter>(table_infos,
executor_);
2242 const auto cd = catalog->getDeletedColumn(table_descriptor);
2244 auto delete_column_expr = makeExpr<Analyzer::ColumnVar>(
2247 catalog->getDatabaseId(), table_descriptor->tableId, cd->columnId},
2249 const auto rewritten_exe_unit =
2250 query_rewrite->rewriteColumnarDelete(work_unit.exe_unit, delete_column_expr);
2251 execute_delete_ra_exe_unit(rewritten_exe_unit, is_aggregate);
2253 execute_delete_ra_exe_unit(work_unit.exe_unit, is_aggregate);
2257 if (
auto compound = dynamic_cast<const RelCompound*>(node)) {
2259 execute_delete_for_node(compound, work_unit, compound->isAggregate());
2260 }
else if (
auto project = dynamic_cast<const RelProject*>(node)) {
2262 if (project->isSimple()) {
2263 CHECK_EQ(
size_t(1), project->inputCount());
2264 const auto input_ra = project->getInput(0);
2265 if (dynamic_cast<const RelSort*>(input_ra)) {
2266 const auto& input_table =
2269 work_unit.exe_unit.scan_limit = input_table->rowCount();
2272 execute_delete_for_node(project, work_unit,
false);
2274 throw std::runtime_error(
"Unsupported parent node for delete: " +
2283 const int64_t queue_time_ms) {
2300 const int64_t queue_time_ms) {
2330 const int64_t queue_time_ms,
2331 const std::optional<size_t> previous_count) {
2337 const auto input_ra = project->
getInput(0);
2338 if (dynamic_cast<const RelSort*>(input_ra)) {
2340 const auto& input_table =
2343 work_unit.exe_unit.scan_limit =
2344 std::min(input_table->getLimit(), input_table->rowCount());
2360 const int64_t queue_time_ms) {
2367 throw std::runtime_error(
"Table functions not supported in distributed mode yet");
2370 throw std::runtime_error(
"Table function support is disabled");
2376 const auto body = table_func_work_unit.body;
2379 const auto table_infos =
2393 auto cached_resultset =
2394 executor_->getResultSetRecyclerHolder().getCachedQueryResultSet(
2396 if (cached_resultset) {
2397 VLOG(1) <<
"recycle table function's resultset of the root node "
2399 result = {cached_resultset, cached_resultset->getTargetMetaInfo()};
2408 table_func_work_unit.exe_unit, table_infos, co, eo),
2409 body->getOutputMetainfo()};
2413 throw std::runtime_error(
"Table function ran out of memory during execution");
2415 auto query_exec_time =
timer_stop(query_exec_time_begin);
2416 result.setQueueTime(queue_time_ms);
2417 auto resultset_ptr =
result.getDataPtr();
2418 auto allow_auto_caching_resultset = resultset_ptr && resultset_ptr->hasValidBuffer() &&
2420 resultset_ptr->getBufferSizeBytes(co.device_type) <=
2423 if (use_resultset_recycler && (keep_result || allow_auto_caching_resultset) &&
2425 resultset_ptr->setExecTime(query_exec_time);
2426 resultset_ptr->setQueryPlanHash(table_func_work_unit.exe_unit.query_plan_dag_hash);
2427 resultset_ptr->setTargetMetaInfo(body->getOutputMetainfo());
2429 resultset_ptr->setInputTableKeys(std::move(input_table_keys));
2430 if (allow_auto_caching_resultset) {
2431 VLOG(1) <<
"Automatically keep table function's query resultset to recycler";
2433 executor_->getResultSetRecyclerHolder().putQueryResultSetToCache(
2434 table_func_work_unit.exe_unit.query_plan_dag_hash,
2435 resultset_ptr->getInputTableKeys(),
2437 resultset_ptr->getBufferSizeBytes(co.device_type),
2442 VLOG(1) <<
"Query hint \'keep_table_function_result\' is ignored since we do not "
2443 "support resultset recycling on distributed mode";
2445 VLOG(1) <<
"Query hint \'keep_table_function_result\' is ignored since a query "
2446 "has union-(all) operator";
2448 VLOG(1) <<
"Query hint \'keep_table_function_result\' is ignored since a query "
2449 "is either validate or explain query";
2451 VLOG(1) <<
"Query hint \'keep_table_function_result\' is ignored";
2468 std::vector<std::shared_ptr<Analyzer::Expr>> transformed_tuple;
2469 for (
const auto& element : tuple->getTuple()) {
2472 return makeExpr<Analyzer::ExpressionTuple>(transformed_tuple);
2476 throw std::runtime_error(
"Only columns supported in the window partition for now");
2478 return makeExpr<Analyzer::ColumnVar>(col->get_type_info(), col->getColumnKey(), 1);
2487 const int64_t queue_time_ms) {
2489 CHECK_EQ(query_infos.size(), size_t(1));
2490 if (query_infos.front().info.fragments.size() != 1) {
2491 throw std::runtime_error(
2492 "Only single fragment tables supported for window functions for now");
2497 query_infos.push_back(query_infos.front());
2506 std::unordered_map<QueryPlanHash, std::shared_ptr<HashJoin>> partition_cache;
2507 std::unordered_map<QueryPlanHash, std::shared_ptr<std::vector<int64_t>>>
2508 sorted_partition_cache;
2509 std::unordered_map<QueryPlanHash, size_t> sorted_partition_key_ref_count_map;
2510 std::unordered_map<size_t, std::unique_ptr<WindowFunctionContext>>
2511 window_function_context_map;
2512 std::unordered_map<QueryPlanHash, AggregateTreeForWindowFraming> aggregate_tree_map;
2522 std::shared_ptr<Analyzer::BinOper> partition_key_cond;
2523 if (partition_keys.size() >= 1) {
2524 std::shared_ptr<Analyzer::Expr> partition_key_tuple;
2525 if (partition_keys.size() > 1) {
2526 partition_key_tuple = makeExpr<Analyzer::ExpressionTuple>(partition_keys);
2528 CHECK_EQ(partition_keys.size(), size_t(1));
2529 partition_key_tuple = partition_keys.front();
2532 partition_key_cond =
2533 makeExpr<Analyzer::BinOper>(
kBOOLEAN,
2536 partition_key_tuple,
2541 partition_key_cond ,
2543 sorted_partition_key_ref_count_map,
2549 CHECK(window_function_context_map.emplace(target_index, std::move(context)).second);
2552 for (
auto& kv : window_function_context_map) {
2554 sorted_partition_key_ref_count_map, sorted_partition_cache, aggregate_tree_map);
2555 window_project_node_context->addWindowFunctionContext(std::move(kv.second), kv.first);
2561 const std::shared_ptr<Analyzer::BinOper>& partition_key_cond,
2562 std::unordered_map<
QueryPlanHash, std::shared_ptr<HashJoin>>& partition_cache,
2563 std::unordered_map<QueryPlanHash, size_t>& sorted_partition_key_ref_count_map,
2565 const std::vector<InputTableInfo>& query_infos,
2568 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
2569 const size_t elem_count = query_infos.front().info.fragments.front().getNumTuples();
2573 std::unique_ptr<WindowFunctionContext> context;
2579 if (partition_key_cond) {
2580 auto partition_cond_str = partition_key_cond->toString();
2582 boost::hash_combine(partition_cache_key, partition_key_hash);
2583 boost::hash_combine(partition_cache_key, static_cast<int>(window_partition_type));
2584 std::shared_ptr<HashJoin> partition_ptr;
2585 auto cached_hash_table_it = partition_cache.find(partition_cache_key);
2586 if (cached_hash_table_it != partition_cache.end()) {
2587 partition_ptr = cached_hash_table_it->second;
2588 VLOG(1) <<
"Reuse a hash table to compute window function context (key: "
2589 << partition_cache_key <<
", partition condition: " << partition_cond_str
2592 const auto hash_table_or_err =
executor_->buildHashTableForQualifier(
2596 window_partition_type,
2602 if (!hash_table_or_err.fail_reason.empty()) {
2603 throw std::runtime_error(hash_table_or_err.fail_reason);
2606 partition_ptr = hash_table_or_err.hash_table;
2607 CHECK(partition_cache.insert(std::make_pair(partition_cache_key, partition_ptr))
2609 VLOG(1) <<
"Put a generated hash table for computing window function context to "
2611 << partition_cache_key <<
", partition condition: " << partition_cond_str
2614 CHECK(partition_ptr);
2618 VLOG(1) <<
"Aggregate tree's fanout is set to " << aggregate_tree_fanout;
2620 context = std::make_unique<WindowFunctionContext>(window_func,
2621 partition_cache_key,
2626 aggregate_tree_fanout);
2628 context = std::make_unique<WindowFunctionContext>(
2629 window_func, elem_count, co.
device_type, row_set_mem_owner);
2632 if (!order_keys.empty()) {
2633 auto sorted_partition_cache_key = partition_cache_key;
2634 for (
auto& order_key : order_keys) {
2635 boost::hash_combine(sorted_partition_cache_key, order_key->toString());
2638 boost::hash_combine(sorted_partition_cache_key, collation.toString());
2640 context->setSortedPartitionCacheKey(sorted_partition_cache_key);
2641 auto cache_key_cnt_it =
2642 sorted_partition_key_ref_count_map.try_emplace(sorted_partition_cache_key, 1);
2643 if (!cache_key_cnt_it.second) {
2644 sorted_partition_key_ref_count_map[sorted_partition_cache_key] =
2645 cache_key_cnt_it.first->second + 1;
2648 std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
2649 for (
const auto& order_key : order_keys) {
2650 const auto order_col =
2653 throw std::runtime_error(
"Only order by columns supported for now");
2655 auto const [column, col_elem_count] =
2658 query_infos.front().info.fragments.front(),
2666 CHECK_EQ(col_elem_count, elem_count);
2667 context->addOrderColumn(column, order_col->get_type_info(), chunks_owner);
2670 if (context->getWindowFunction()->hasFraming() ||
2671 context->getWindowFunction()->isMissingValueFillingFunction()) {
2675 auto& window_function_expression_args = window_func->
getArgs();
2676 std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
2677 for (
auto& expr : window_function_expression_args) {
2678 if (
const auto arg_col_var =
2679 std::dynamic_pointer_cast<const Analyzer::ColumnVar>(expr)) {
2683 query_infos.front().info.fragments.front(),
2690 CHECK_EQ(col_elem_count, elem_count);
2691 context->addColumnBufferForWindowFunctionExpression(column, chunks_owner);
2702 const int64_t queue_time_ms) {
2706 work_unit, filter->
getOutputMetainfo(),
false, co, eo, render_info, queue_time_ms);
2710 std::vector<TargetMetaInfo>
const& rhs) {
2711 if (lhs.size() == rhs.size()) {
2712 for (
size_t i = 0; i < lhs.size(); ++i) {
2713 if (lhs[i].get_type_info() != rhs[i].get_type_info()) {
2731 const int64_t queue_time_ms) {
2733 if (!logical_union->
isAll()) {
2734 throw std::runtime_error(
"UNION without ALL is not supported yet.");
2739 throw std::runtime_error(
"UNION does not support subqueries with geo-columns.");
2759 for (
size_t i = 0; i < tuple_type.size(); ++i) {
2760 auto& target_meta_info = tuple_type[i];
2761 if (target_meta_info.get_type_info().get_type() ==
kNULLT) {
2767 {std::make_tuple(tuple_type[i].get_type_info().get_size(), 8)});
2771 std::vector<TargetInfo> target_infos;
2772 for (
const auto& tuple_type_component : tuple_type) {
2775 tuple_type_component.get_type_info(),
2782 std::shared_ptr<ResultSet> rs{
2791 return {rs, tuple_type};
2798 const std::string& columnName,
2809 CHECK(dd && dd->stringDict);
2810 int32_t str_id = dd->stringDict->getOrAdd(str);
2811 if (!dd->dictIsTemp) {
2812 const auto checkpoint_ok = dd->stringDict->checkpoint();
2813 if (!checkpoint_ok) {
2814 throw std::runtime_error(
"Failed to checkpoint dictionary for column " +
2818 const bool invalid = str_id > max_valid_int_value<T>();
2819 if (invalid || str_id == inline_int_null_value<int32_t>()) {
2821 LOG(
ERROR) <<
"Could not encode string: " << str
2822 <<
", the encoded value doesn't fit in " <<
sizeof(
T) * 8
2823 <<
" bits. Will store NULL instead.";
2846 throw std::runtime_error(
"EXPLAIN not supported for ModifyTable");
2856 std::vector<TargetMetaInfo> empty_targets;
2857 return {rs, empty_targets};
2872 size_t rows_number = values_lists.size();
2877 size_t rows_per_leaf = rows_number;
2878 if (td->nShards == 0) {
2880 ceil(static_cast<double>(rows_number) / static_cast<double>(leaf_count));
2882 auto max_number_of_rows_per_package =
2883 std::max(
size_t(1), std::min(rows_per_leaf,
size_t(64 * 1024)));
2885 std::vector<const ColumnDescriptor*> col_descriptors;
2886 std::vector<int> col_ids;
2887 std::unordered_map<int, std::unique_ptr<uint8_t[]>> col_buffers;
2888 std::unordered_map<int, std::vector<std::string>> str_col_buffers;
2889 std::unordered_map<int, std::vector<ArrayDatum>> arr_col_buffers;
2890 std::unordered_map<int, int> sequential_ids;
2892 for (
const int col_id : col_id_list) {
2895 if (cd->columnType.is_string()) {
2899 str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2900 CHECK(it_ok.second);
2904 const auto dd = catalog.getMetadataForDict(cd->columnType.get_comp_param());
2906 const auto it_ok = col_buffers.emplace(
2908 std::make_unique<uint8_t[]>(cd->columnType.get_size() *
2909 max_number_of_rows_per_package));
2910 CHECK(it_ok.second);
2916 }
else if (cd->columnType.is_geometry()) {
2918 str_col_buffers.insert(std::make_pair(col_id, std::vector<std::string>{}));
2919 CHECK(it_ok.second);
2920 }
else if (cd->columnType.is_array()) {
2922 arr_col_buffers.insert(std::make_pair(col_id, std::vector<ArrayDatum>{}));
2923 CHECK(it_ok.second);
2925 const auto it_ok = col_buffers.emplace(
2927 std::unique_ptr<uint8_t[]>(
new uint8_t[cd->columnType.get_logical_size() *
2928 max_number_of_rows_per_package]()));
2929 CHECK(it_ok.second);
2931 col_descriptors.push_back(cd);
2932 sequential_ids[col_id] = col_ids.size();
2933 col_ids.push_back(col_id);
2938 size_t start_row = 0;
2939 size_t rows_left = rows_number;
2940 while (rows_left != 0) {
2942 for (
const auto& kv : col_buffers) {
2943 memset(kv.second.get(), 0, max_number_of_rows_per_package);
2945 for (
auto& kv : str_col_buffers) {
2948 for (
auto& kv : arr_col_buffers) {
2952 auto package_size = std::min(rows_left, max_number_of_rows_per_package);
2957 for (
size_t row_idx = 0; row_idx < package_size; ++row_idx) {
2958 const auto& values_list = values_lists[row_idx + start_row];
2959 for (
size_t col_idx = 0; col_idx < col_descriptors.size(); ++col_idx) {
2960 CHECK(values_list.size() == col_descriptors.size());
2965 dynamic_cast<const Analyzer::UOper*
>(values_list[col_idx]->get_expr());
2971 const auto cd = col_descriptors[col_idx];
2972 auto col_datum = col_cv->get_constval();
2973 auto col_type = cd->columnType.get_type();
2974 uint8_t* col_data_bytes{
nullptr};
2975 if (!cd->columnType.is_array() && !cd->columnType.is_geometry() &&
2976 (!cd->columnType.is_string() ||
2978 const auto col_data_bytes_it = col_buffers.find(col_ids[col_idx]);
2979 CHECK(col_data_bytes_it != col_buffers.end());
2980 col_data_bytes = col_data_bytes_it->second.get();
2984 auto col_data =
reinterpret_cast<int8_t*
>(col_data_bytes);
2985 auto null_bool_val =
2987 col_data[row_idx] = col_cv->get_is_null() || null_bool_val
2989 : (col_datum.boolval ? 1 : 0);
2993 auto col_data =
reinterpret_cast<int8_t*
>(col_data_bytes);
2994 col_data[row_idx] = col_cv->get_is_null()
2996 : col_datum.tinyintval;
3000 auto col_data =
reinterpret_cast<int16_t*
>(col_data_bytes);
3001 col_data[row_idx] = col_cv->get_is_null()
3003 : col_datum.smallintval;
3007 auto col_data =
reinterpret_cast<int32_t*
>(col_data_bytes);
3008 col_data[row_idx] = col_cv->get_is_null()
3016 auto col_data =
reinterpret_cast<int64_t*
>(col_data_bytes);
3017 col_data[row_idx] = col_cv->get_is_null()
3019 : col_datum.bigintval;
3023 auto col_data =
reinterpret_cast<float*
>(col_data_bytes);
3024 col_data[row_idx] = col_datum.floatval;
3028 auto col_data =
reinterpret_cast<double*
>(col_data_bytes);
3029 col_data[row_idx] = col_datum.doubleval;
3035 switch (cd->columnType.get_compression()) {
3037 str_col_buffers[col_ids[col_idx]].push_back(
3038 col_datum.stringval ? *col_datum.stringval :
"");
3041 switch (cd->columnType.get_size()) {
3044 &reinterpret_cast<uint8_t*>(col_data_bytes)[row_idx],
3051 &reinterpret_cast<uint16_t*>(col_data_bytes)[row_idx],
3058 &reinterpret_cast<int32_t*>(col_data_bytes)[row_idx],
3076 auto col_data =
reinterpret_cast<int64_t*
>(col_data_bytes);
3077 col_data[row_idx] = col_cv->get_is_null()
3079 : col_datum.bigintval;
3083 const auto is_null = col_cv->get_is_null();
3084 const auto size = cd->columnType.get_size();
3087 const auto is_point_coords =
3089 if (
is_null && !is_point_coords) {
3093 for (int8_t* p = buf + elem_ti.
get_size(); (p - buf) < size;
3095 put_null(static_cast<void*>(p), elem_ti,
"");
3097 arr_col_buffers[col_ids[col_idx]].emplace_back(size, buf,
is_null);
3099 arr_col_buffers[col_ids[col_idx]].emplace_back(0,
nullptr,
is_null);
3103 const auto l = col_cv->get_value_list();
3104 size_t len = l.size() * elem_ti.
get_size();
3105 if (size > 0 && static_cast<size_t>(size) != len) {
3106 throw std::runtime_error(
"Array column " + cd->columnName +
" expects " +
3108 " values, " +
"received " +
3116 int32_t* p =
reinterpret_cast<int32_t*
>(buf);
3123 &p[elemIndex], cd->columnName, elem_ti, c.get(), catalog);
3147 if (col_datum.stringval && col_datum.stringval->empty()) {
3148 throw std::runtime_error(
3149 "Empty values are not allowed for geospatial column \"" +
3150 cd->columnName +
"\"");
3152 str_col_buffers[col_ids[col_idx]].push_back(
3153 col_datum.stringval ? *col_datum.stringval :
"");
3161 start_row += package_size;
3162 rows_left -= package_size;
3165 insert_data.
databaseId = catalog.getCurrentDB().dbId;
3166 insert_data.
tableId = table_id;
3167 insert_data.
data.resize(col_ids.size());
3169 for (
const auto& kv : col_buffers) {
3171 p.
numbersPtr =
reinterpret_cast<int8_t*
>(kv.second.get());
3172 insert_data.
data[sequential_ids[kv.first]] = p;
3174 for (
auto& kv : str_col_buffers) {
3177 insert_data.
data[sequential_ids[kv.first]] = p;
3179 for (
auto& kv : arr_col_buffers) {
3182 insert_data.
data[sequential_ids[kv.first]] = p;
3184 insert_data.
numRows = package_size;
3195 std::vector<TargetMetaInfo> empty_targets;
3196 return {rs, empty_targets};
3202 return limit ? *limit : 0;
3206 const auto aggregate =
dynamic_cast<const RelAggregate*
>(ra);
3210 const auto compound =
dynamic_cast<const RelCompound*
>(ra);
3211 return (compound && compound->isAggregate()) ? 0 :
get_limit_value(limit);
3215 return !order_entries.empty() && order_entries.front().is_desc;
3228 const int64_t queue_time_ms) {
3231 const auto source = sort->
getInput(0);
3238 executor_->addTransientStringLiterals(source_work_unit.exe_unit,
3241 auto& aggregated_result = it->second;
3242 auto& result_rows = aggregated_result.rs;
3244 const size_t offset = sort->
getOffset();
3245 if (limit || offset) {
3246 if (!order_entries.empty()) {
3250 result_rows->dropFirstN(offset);
3262 source_work_unit.exe_unit.target_exprs,
3263 aggregated_result.targets_meta);
3272 std::list<std::shared_ptr<Analyzer::Expr>> groupby_exprs;
3273 bool is_desc{
false};
3274 bool use_speculative_top_n_sort{
false};
3276 auto execute_sort_query = [
this,
3288 std::optional<size_t> limit = sort->
getLimit();
3289 const size_t offset = sort->
getOffset();
3291 auto source_node = sort->
getInput(0);
3295 SortInfo sort_info{order_entries, sort_algorithm, limit, offset};
3297 source_node->getQueryPlanDagHash(), sort_info);
3301 if (
auto cached_resultset =
3302 executor_->getResultSetRecyclerHolder().getCachedQueryResultSet(
3303 source_query_plan_dag)) {
3304 CHECK(cached_resultset->canUseSpeculativeTopNSort());
3305 VLOG(1) <<
"recycle resultset of the root node " << source_node->getRelNodeDagId()
3306 <<
" from resultset cache";
3308 ExecutionResult{cached_resultset, cached_resultset->getTargetMetaInfo()};
3312 use_speculative_top_n_sort = *cached_resultset->canUseSpeculativeTopNSort() &&
3314 source_node->setOutputMetainfo(cached_resultset->getTargetMetaInfo());
3318 if (!source_result.getDataPtr()) {
3327 VLOG(1) <<
"Punt sort's input query to CPU: detect union(-all) of none-encoded "
3331 groupby_exprs = source_work_unit.exe_unit.groupby_exprs;
3333 source->getOutputMetainfo(),
3339 use_speculative_top_n_sort =
3340 source_result.
getDataPtr() && source_result.getRows()->hasValidBuffer() &&
3342 source_result.getRows()->getQueryMemDesc());
3344 if (render_info && render_info->isInSitu()) {
3345 return source_result;
3347 if (source_result.isFilterPushDownEnabled()) {
3348 return source_result;
3350 auto rows_to_sort = source_result.getRows();
3351 if (eo.just_explain) {
3352 return {rows_to_sort, {}};
3355 if (sort->
collationCount() != 0 && !rows_to_sort->definitelyHasNoRows() &&
3356 !use_speculative_top_n_sort) {
3357 const size_t top_n = limit_val + offset;
3358 rows_to_sort->sort(order_entries, top_n, co.device_type,
executor_);
3360 if (limit || offset) {
3362 if (offset >= rows_to_sort->rowCount()) {
3363 rows_to_sort->dropFirstN(offset);
3365 rows_to_sort->keepFirstN(limit_val + offset);
3368 rows_to_sort->dropFirstN(offset);
3370 rows_to_sort->keepFirstN(limit_val);
3374 return {rows_to_sort, source_result.getTargetsMeta()};
3378 return execute_sort_query();
3380 CHECK_EQ(
size_t(1), groupby_exprs.size());
3381 CHECK(groupby_exprs.front());
3383 return execute_sort_query();
3389 std::list<Analyzer::OrderEntry>& order_entries,
3391 const auto source = sort->
getInput(0);
3393 const size_t offset = sort->
getOffset();
3395 const size_t scan_total_limit =
3397 size_t max_groups_buffer_entry_guess{
3400 SortInfo sort_info{order_entries, sort_algorithm, limit, offset};
3402 const auto& source_exe_unit = source_work_unit.exe_unit;
3405 for (
auto order_entry : order_entries) {
3407 const auto& te = source_exe_unit.target_exprs[order_entry.tle_no - 1];
3409 if (ti.sql_type.is_geometry() || ti.sql_type.is_array()) {
3410 throw std::runtime_error(
3411 "Columns with geometry or array types cannot be used in an ORDER BY clause.");
3415 if (source_exe_unit.groupby_exprs.size() == 1) {
3416 if (!source_exe_unit.groupby_exprs.front()) {
3430 std::move(source_exe_unit.input_col_descs),
3431 source_exe_unit.simple_quals,
3432 source_exe_unit.quals,
3433 source_exe_unit.join_quals,
3434 source_exe_unit.groupby_exprs,
3435 source_exe_unit.target_exprs,
3436 source_exe_unit.target_exprs_original_type_infos,
3438 {sort_info.order_entries, sort_algorithm, limit, offset},
3440 source_exe_unit.query_hint,
3441 source_exe_unit.query_plan_dag_hash,
3442 source_exe_unit.hash_table_build_plan_dag,
3443 source_exe_unit.table_id_to_node_map,
3444 source_exe_unit.use_bump_allocator,
3445 source_exe_unit.union_all,
3446 source_exe_unit.query_state},
3448 max_groups_buffer_entry_guess,
3449 std::move(source_work_unit.query_rewriter),
3450 source_work_unit.input_permutation,
3451 source_work_unit.left_deep_join_input_sizes};
3463 const std::vector<InputTableInfo>& table_infos) {
3464 CHECK(!table_infos.empty());
3465 const auto& first_table = table_infos.front();
3466 size_t max_num_groups = first_table.info.getNumTuplesUpperBound();
3467 auto table_key = first_table.table_key;
3468 for (
const auto& table_info : table_infos) {
3469 if (table_info.info.getNumTuplesUpperBound() > max_num_groups) {
3470 max_num_groups = table_info.info.getNumTuplesUpperBound();
3471 table_key = table_info.table_key;
3474 return std::make_pair(std::max(max_num_groups,
size_t(1)), table_key);
3487 if (render_info && render_info->
isInSitu()) {
3494 bool flatbuffer_is_used =
false;
3495 for (
const auto& target_expr : ra_exe_unit.
target_exprs) {
3496 const auto ti = target_expr->get_type_info();
3498 if (ti.usesFlatBuffer()) {
3499 flatbuffer_is_used =
true;
3505 if (ti.is_varlen()) {
3509 if (
auto top_project = dynamic_cast<const RelProject*>(body)) {
3510 if (top_project->isRowwiseOutputForced()) {
3511 if (flatbuffer_is_used) {
3512 throw std::runtime_error(
3513 "Cannot force rowwise output when FlatBuffer layout is used.");
3522 for (
const auto& target_expr : ra_exe_unit.
target_exprs) {
3523 if (target_expr->get_type_info().usesFlatBuffer()) {
3539 for (
const auto target_expr : ra_exe_unit.
target_exprs) {
3540 if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
3546 preflight_count_query_threshold =
3548 VLOG(1) <<
"Set the pre-flight count query's threshold as "
3549 << preflight_count_query_threshold <<
" by a query hint";
3553 ra_exe_unit.
scan_limit > preflight_count_query_threshold)) {
3560 return !(ra_exe_unit.
quals.empty() && ra_exe_unit.
join_quals.empty() &&
3566 const std::vector<InputTableInfo>& table_infos,
3567 const Executor* executor,
3569 std::vector<std::shared_ptr<Analyzer::Expr>>& target_exprs_owned) {
3571 for (
size_t i = 0; i < ra_exe_unit.
target_exprs.size(); ++i) {
3577 CHECK(dynamic_cast<const Analyzer::AggExpr*>(target_expr));
3580 const auto& arg_ti = arg->get_type_info();
3586 if (!(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time() ||
3587 (arg_ti.is_string() && arg_ti.get_compression() ==
kENCODING_DICT))) {
3598 const auto bitmap_sz_bits = arg_range.getIntMax() - arg_range.getIntMin() + 1;
3599 const auto sub_bitmap_count =
3601 int64_t approx_bitmap_sz_bits{0};
3602 const auto error_rate_expr =
static_cast<Analyzer::AggExpr*
>(target_expr)->get_arg1();
3603 if (error_rate_expr) {
3604 CHECK(error_rate_expr->get_type_info().get_type() ==
kINT);
3605 auto const error_rate =
3608 CHECK_GE(error_rate->get_constval().intval, 1);
3614 arg_range.getIntMin(),
3616 approx_bitmap_sz_bits,
3621 arg_range.getIntMin(),
3627 if (approx_count_distinct_desc.bitmapPaddedSizeBytes() >=
3628 precise_count_distinct_desc.bitmapPaddedSizeBytes()) {
3629 auto precise_count_distinct = makeExpr<Analyzer::AggExpr>(
3631 target_exprs_owned.push_back(precise_count_distinct);
3632 ra_exe_unit.
target_exprs[i] = precise_count_distinct.get();
3649 const std::vector<TargetMetaInfo>& targets_meta,
3654 const int64_t queue_time_ms,
3655 const std::optional<size_t> previous_count) {
3666 ScopeGuard clearWindowContextIfNecessary = [&]() {
3673 throw std::runtime_error(
"Window functions support is disabled");
3676 co.allow_lazy_fetch =
false;
3677 computeWindow(work_unit, co, eo, column_cache, queue_time_ms);
3679 if (!eo.just_explain && eo.find_push_down_candidates) {
3681 VLOG(1) <<
"Try to find filter predicate push-down candidate.";
3683 if (!selected_filters.empty() || eo.just_calcite_explain) {
3684 VLOG(1) <<
"Found " << selected_filters.size()
3685 <<
" filter(s) to be pushed down. Re-create a query plan based on pushed "
3686 "filter predicate(s).";
3687 return ExecutionResult(selected_filters, eo.find_push_down_candidates);
3689 VLOG(1) <<
"Continue with the current query plan";
3691 if (render_info && render_info->
isInSitu()) {
3692 co.allow_lazy_fetch =
false;
3694 const auto body = work_unit.
body;
3697 VLOG(3) <<
"body->getId()=" << body->getId()
3699 <<
" it==leaf_results_.end()=" << (it ==
leaf_results_.end());
3703 auto& aggregated_result = it->second;
3704 auto& result_rows = aggregated_result.rs;
3706 body->setOutputMetainfo(aggregated_result.targets_meta);
3720 auto candidate =
query_dag_->getQueryHint(body);
3722 ra_exe_unit.query_hint = *candidate;
3726 const auto& query_hints = ra_exe_unit.query_hint;
3728 orig_block_size =
executor_->blockSize(),
3729 orig_grid_size =
executor_->gridSize()]() {
3730 if (
executor_->getDataMgr()->getCudaMgr()) {
3732 if (orig_block_size) {
3733 executor_->setBlockSize(orig_block_size);
3739 if (orig_grid_size) {
3750 if (!
executor_->getDataMgr()->getCudaMgr()) {
3751 VLOG(1) <<
"Skip CUDA grid size query hint: cannot detect CUDA device";
3753 const auto num_sms =
executor_->cudaMgr()->getMinNumMPsForAllDevices();
3754 const auto new_grid_size =
static_cast<unsigned>(
3755 std::max(1.0, std::round(num_sms * query_hints.cuda_grid_size_multiplier)));
3756 const auto default_grid_size =
executor_->gridSize();
3757 if (new_grid_size != default_grid_size) {
3758 VLOG(1) <<
"Change CUDA grid size: " << default_grid_size
3759 <<
" (default_grid_size) -> " << new_grid_size <<
" (# SMs * "
3760 << query_hints.cuda_grid_size_multiplier <<
")";
3764 VLOG(1) <<
"Skip CUDA grid size query hint: invalid grid size";
3769 if (!
executor_->getDataMgr()->getCudaMgr()) {
3770 VLOG(1) <<
"Skip CUDA block size query hint: cannot detect CUDA device";
3772 int cuda_block_size = query_hints.cuda_block_size;
3774 if (cuda_block_size >= warp_size) {
3775 cuda_block_size = (cuda_block_size + warp_size - 1) / warp_size * warp_size;
3776 VLOG(1) <<
"Change CUDA block size w.r.t warp size (" << warp_size
3777 <<
"): " <<
executor_->blockSize() <<
" -> " << cuda_block_size;
3779 VLOG(1) <<
"Change CUDA block size: " <<
executor_->blockSize() <<
" -> "
3782 executor_->setBlockSize(cuda_block_size);
3789 CHECK_EQ(table_infos.size(), size_t(1));
3790 CHECK_EQ(table_infos.front().info.fragments.size(), size_t(1));
3791 max_groups_buffer_entry_guess =
3792 table_infos.front().info.fragments.front().getNumTuples();
3793 ra_exe_unit.scan_limit = max_groups_buffer_entry_guess;
3796 ra_exe_unit.scan_limit = *previous_count;
3800 ra_exe_unit.scan_limit = 0;
3801 ra_exe_unit.use_bump_allocator =
true;
3803 ra_exe_unit.scan_limit = 0;
3804 }
else if (!eo.just_explain) {
3806 if (filter_count_all) {
3807 ra_exe_unit.scan_limit = std::max(*filter_count_all,
size_t(1));
3808 VLOG(1) <<
"Set a new scan limit from filtered_count_all: "
3809 << ra_exe_unit.scan_limit;
3810 auto const has_limit_value = ra_exe_unit.sort_info.limit.has_value();
3811 auto const top_k_sort_query =
3812 has_limit_value && !ra_exe_unit.sort_info.order_entries.empty();
3815 if (has_limit_value && !top_k_sort_query &&
3816 ra_exe_unit.scan_limit > ra_exe_unit.sort_info.limit.value()) {
3817 ra_exe_unit.scan_limit = ra_exe_unit.sort_info.limit.value();
3818 VLOG(1) <<
"Override scan limit to LIMIT value: " << ra_exe_unit.scan_limit;
3830 VLOG(1) <<
"Using columnar layout for projection as output size of "
3831 << ra_exe_unit.scan_limit <<
" rows exceeds threshold of "
3833 <<
" or some target uses FlatBuffer memory layout.";
3834 eo.output_columnar_hint =
true;
3837 eo.output_columnar_hint =
false;
3848 auto execute_and_handle_errors = [&](
const auto max_groups_buffer_entry_guess_in,
3849 const bool has_cardinality_estimation,
3854 auto local_groups_buffer_entry_guess = max_groups_buffer_entry_guess_in;
3856 return {
executor_->executeWorkUnit(local_groups_buffer_entry_guess,
3863 has_cardinality_estimation,
3872 {ra_exe_unit, work_unit.
body, local_groups_buffer_entry_guess},
3884 for (
const auto& table_info : table_infos) {
3885 const auto db_id = table_info.table_key.db_id;
3888 if (td && (td->isTemporaryTable() || td->isView)) {
3889 use_resultset_cache =
false;
3890 if (eo.keep_result) {
3891 VLOG(1) <<
"Query hint \'keep_result\' is ignored since a query has either "
3892 "temporary table or view";
3900 auto cached_cardinality =
executor_->getCachedCardinality(cache_key);
3901 auto card = cached_cardinality.second;
3902 if (cached_cardinality.first && card >= 0) {
3903 VLOG(1) <<
"Use cached cardinality for max_groups_buffer_entry_guess: " << card;
3904 result = execute_and_handle_errors(
3907 VLOG(1) <<
"Use default cardinality for max_groups_buffer_entry_guess: "
3908 << max_groups_buffer_entry_guess;
3909 result = execute_and_handle_errors(
3910 max_groups_buffer_entry_guess,
3916 auto cached_cardinality =
executor_->getCachedCardinality(cache_key);
3917 auto card = cached_cardinality.second;
3918 if (cached_cardinality.first && card >= 0) {
3919 VLOG(1) <<
"CardinalityEstimationRequired, Use cached cardinality for "
3920 "max_groups_buffer_entry_guess: "
3922 result = execute_and_handle_errors(card,
true,
true);
3924 const auto ndv_groups_estimation =
3926 const auto estimated_groups_buffer_entry_guess =
3927 ndv_groups_estimation > 0
3928 ? 2 * ndv_groups_estimation
3931 CHECK_GT(estimated_groups_buffer_entry_guess,
size_t(0));
3932 VLOG(1) <<
"CardinalityEstimationRequired, Use ndv_estimation: "
3933 << ndv_groups_estimation
3934 <<
", cardinality for estimated_groups_buffer_entry_guess: "
3935 << estimated_groups_buffer_entry_guess;
3936 result = execute_and_handle_errors(
3937 estimated_groups_buffer_entry_guess,
true,
true);
3938 if (!(eo.just_validate || eo.just_explain)) {
3939 executor_->addToCardinalityCache(cache_key, estimated_groups_buffer_entry_guess);
3944 result.setQueueTime(queue_time_ms);
3949 return {std::make_shared<ResultSet>(
3953 ?
executor_->row_set_mem_owner_->cloneStrDictDataOnly()
3959 for (
auto& target_info :
result.getTargetsMeta()) {
3960 if (target_info.get_type_info().is_string() &&
3961 !target_info.get_type_info().is_dict_encoded_string()) {
3963 use_resultset_cache =
false;
3964 if (eo.keep_result) {
3965 VLOG(1) <<
"Query hint \'keep_result\' is ignored since a query has non-encoded "
3966 "string column projection";
3972 auto allow_auto_caching_resultset =
3975 if (use_resultset_cache && (eo.keep_result || allow_auto_caching_resultset)) {
3976 auto query_exec_time =
timer_stop(query_exec_time_begin);
3977 res->setExecTime(query_exec_time);
3978 res->setQueryPlanHash(ra_exe_unit.query_plan_dag_hash);
3979 res->setTargetMetaInfo(body->getOutputMetainfo());
3981 res->setInputTableKeys(std::move(input_table_keys));
3982 if (allow_auto_caching_resultset) {
3983 VLOG(1) <<
"Automatically keep query resultset to recycler";
3985 res->setUseSpeculativeTopNSort(
3987 executor_->getResultSetRecyclerHolder().putQueryResultSetToCache(
3988 ra_exe_unit.query_plan_dag_hash,
3989 res->getInputTableKeys(),
3991 res->getBufferSizeBytes(co.device_type),
3994 if (eo.keep_result) {
3996 VLOG(1) <<
"Query hint \'keep_result\' is ignored since we do not support "
3997 "resultset recycling on distributed mode";
3999 VLOG(1) <<
"Query hint \'keep_result\' is ignored since a query has union-(all) "
4001 }
else if (render_info && render_info->
isInSitu()) {
4002 VLOG(1) <<
"Query hint \'keep_result\' is ignored since a query is classified as "
4003 "a in-situ rendering query";
4005 VLOG(1) <<
"Query hint \'keep_result\' is ignored since a query is either "
4006 "validate or explain query";
4008 VLOG(1) <<
"Query hint \'keep_result\' is ignored";
4017 std::vector<InputTableInfo>
const& input_tables_info)
const {
4019 input_tables_info.begin(), input_tables_info.end(), [](
InputTableInfo const& info) {
4020 auto const& table_key = info.table_key;
4021 if (table_key.db_id > 0) {
4025 auto td = catalog->getMetadataForTable(table_key.table_id);
4027 if (catalog->getDeletedColumnIfRowsDeleted(td)) {
4037 std::string table_name{
""};
4038 if (table_key.
db_id > 0) {
4041 auto td = catalog->getMetadataForTable(table_key.
table_id);
4043 table_name = td->tableName;
4059 auto const num_rows = max_row_info.first;
4061 VLOG(1) <<
"Short-circuiting filtered count query for the projection query "
4062 "containing input table "
4063 << table_name <<
": return its table cardinality " << num_rows <<
" instead";
4064 return std::make_optional<size_t>(num_rows);
4076 VLOG(1) <<
"Try to execute pre-flight counts query";
4080 count_all_result =
executor_->executeWorkUnit(one,
4095 }
catch (
const std::exception& e) {
4096 LOG(
WARNING) <<
"Failed to run pre-flight filtered count with error " << e.what();
4097 return std::nullopt;
4099 const auto count_row = count_all_result->getNextRow(
false,
false);
4100 CHECK_EQ(
size_t(1), count_row.size());
4101 const auto& count_tv = count_row.front();
4102 const auto count_scalar_tv = boost::get<ScalarTargetValue>(&count_tv);
4103 CHECK(count_scalar_tv);
4104 const auto count_ptr = boost::get<int64_t>(count_scalar_tv);
4107 auto count_upper_bound =
static_cast<size_t>(*count_ptr);
4108 return std::max(count_upper_bound,
size_t(1));
4112 const auto& ra_exe_unit = work_unit.
exe_unit;
4113 if (ra_exe_unit.input_descs.size() != 1) {
4116 const auto& table_desc = ra_exe_unit.
input_descs.front();
4120 const auto& table_key = table_desc.getTableKey();
4121 for (
const auto& simple_qual : ra_exe_unit.simple_quals) {
4122 const auto comp_expr =
4124 if (!comp_expr || comp_expr->get_optype() !=
kEQ) {
4129 if (!lhs_col || !lhs_col->getTableKey().table_id || lhs_col->get_rte_idx()) {
4132 const auto rhs = comp_expr->get_right_operand();
4138 {table_key.db_id, table_key.table_id, lhs_col->getColumnKey().column_id});
4139 if (cd->isVirtualCol) {
4149 const std::vector<TargetMetaInfo>& targets_meta,
4154 const bool was_multifrag_kernel_launch,
4155 const int64_t queue_time_ms) {
4160 auto ra_exe_unit_in = work_unit.
exe_unit;
4173 auto eo_no_multifrag = eo;
4175 eo_no_multifrag.allow_multifrag =
false;
4176 eo_no_multifrag.find_push_down_candidates =
false;
4177 if (was_multifrag_kernel_launch) {
4181 LOG(
WARNING) <<
"Multifrag query ran out of memory, retrying with multifragment "
4182 "kernels disabled.";
4196 result.setQueueTime(queue_time_ms);
4199 LOG(
WARNING) <<
"Kernel per fragment query ran out of memory, retrying on CPU.";
4213 VLOG(1) <<
"Resetting max groups buffer entry guess.";
4214 max_groups_buffer_entry_guess = 0;
4216 int iteration_ctr = -1;
4239 CHECK(max_groups_buffer_entry_guess);
4243 throw std::runtime_error(
"Query ran out of output slots in the result");
4245 max_groups_buffer_entry_guess *= 2;
4246 LOG(
WARNING) <<
"Query ran out of slots in the output buffer, retrying with max "
4247 "groups buffer entry "
4249 << max_groups_buffer_entry_guess;
4255 result.setQueueTime(queue_time_ms);
4262 LOG(
ERROR) <<
"Query execution failed with error "
4268 LOG(
INFO) <<
"Query ran out of GPU memory, attempting punt to CPU";
4270 throw std::runtime_error(
4271 "Query ran out of GPU memory, unable to automatically retry on CPU");
4280 const char* code{
nullptr};
4281 const char* description{
nullptr};
4286 switch (error_code) {
4288 return {
"ERR_DIV_BY_ZERO",
"Division by zero"};
4290 return {
"ERR_OUT_OF_GPU_MEM",
4292 "Query couldn't keep the entire working set of columns in GPU memory"};
4294 return {
"ERR_UNSUPPORTED_SELF_JOIN",
"Self joins not supported yet"};
4296 return {
"ERR_OUT_OF_CPU_MEM",
"Not enough host memory to execute the query"};
4298 return {
"ERR_OVERFLOW_OR_UNDERFLOW",
"Overflow or underflow"};
4300 return {
"ERR_OUT_OF_TIME",
"Query execution has exceeded the time limit"};
4302 return {
"ERR_INTERRUPTED",
"Query execution has been interrupted"};
4304 return {
"ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED",
4305 "Columnar conversion not supported for variable length types"};
4307 return {
"ERR_TOO_MANY_LITERALS",
"Too many literals in the query"};
4309 return {
"ERR_STRING_CONST_IN_RESULTSET",
4311 "NONE ENCODED String types are not supported as input result set."};
4313 return {
"ERR_OUT_OF_RENDER_MEM",
4315 "Insufficient GPU memory for query results in render output buffer "
4316 "sized by render-mem-bytes"};
4318 return {
"ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY",
4319 "Streaming-Top-N not supported in Render Query"};
4321 return {
"ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES",
4322 "Multiple distinct values encountered"};
4324 return {
"ERR_GEOS",
"ERR_GEOS"};
4326 return {
"ERR_WIDTH_BUCKET_INVALID_ARGUMENT",
4328 "Arguments of WIDTH_BUCKET function does not satisfy the condition"};
4330 return {
nullptr,
nullptr};
4337 if (error_code < 0) {
4338 return "Ran out of slots in the query output buffer";
4342 if (errorInfo.code) {
4343 return errorInfo.code +
": "s + errorInfo.description;
4351 VLOG(1) <<
"Running post execution callback.";
4352 (*post_execution_callback_)();
4359 const auto compound =
dynamic_cast<const RelCompound*
>(node);
4363 const auto project =
dynamic_cast<const RelProject*
>(node);
4367 const auto aggregate =
dynamic_cast<const RelAggregate*
>(node);
4371 const auto filter =
dynamic_cast<const RelFilter*
>(node);
4375 LOG(
FATAL) <<
"Unhandled node type: "
4384 if (
auto join = dynamic_cast<const RelJoin*>(sink)) {
4385 return join->getJoinType();
4387 if (dynamic_cast<const RelLeftDeepInnerJoin*>(sink)) {
4395 const auto condition =
dynamic_cast<const RexOperator*
>(scalar);
4396 if (!condition || condition->getOperator() !=
kOR || condition->size() != 2) {
4399 const auto equi_join_condition =
4400 dynamic_cast<const RexOperator*
>(condition->getOperand(0));
4401 if (!equi_join_condition || equi_join_condition->getOperator() !=
kEQ) {
4404 const auto both_are_null_condition =
4405 dynamic_cast<const RexOperator*
>(condition->getOperand(1));
4406 if (!both_are_null_condition || both_are_null_condition->getOperator() !=
kAND ||
4407 both_are_null_condition->size() != 2) {
4410 const auto lhs_is_null =
4411 dynamic_cast<const RexOperator*
>(both_are_null_condition->getOperand(0));
4412 const auto rhs_is_null =
4413 dynamic_cast<const RexOperator*
>(both_are_null_condition->getOperand(1));
4414 if (!lhs_is_null || !rhs_is_null || lhs_is_null->getOperator() !=
kISNULL ||
4415 rhs_is_null->getOperator() !=
kISNULL) {
4418 CHECK_EQ(
size_t(1), lhs_is_null->size());
4419 CHECK_EQ(
size_t(1), rhs_is_null->size());
4420 CHECK_EQ(
size_t(2), equi_join_condition->size());
4421 const auto eq_lhs =
dynamic_cast<const RexInput*
>(equi_join_condition->getOperand(0));
4422 const auto eq_rhs =
dynamic_cast<const RexInput*
>(equi_join_condition->getOperand(1));
4423 const auto is_null_lhs =
dynamic_cast<const RexInput*
>(lhs_is_null->getOperand(0));
4424 const auto is_null_rhs =
dynamic_cast<const RexInput*
>(rhs_is_null->getOperand(0));
4425 if (!eq_lhs || !eq_rhs || !is_null_lhs || !is_null_rhs) {
4428 std::vector<std::unique_ptr<const RexScalar>> eq_operands;
4429 if (*eq_lhs == *is_null_lhs && *eq_rhs == *is_null_rhs) {
4431 auto lhs_op_copy = deep_copy_visitor.
visit(equi_join_condition->getOperand(0));
4432 auto rhs_op_copy = deep_copy_visitor.
visit(equi_join_condition->getOperand(1));
4433 eq_operands.emplace_back(lhs_op_copy.release());
4434 eq_operands.emplace_back(rhs_op_copy.release());
4435 return boost::make_unique<const RexOperator>(
4436 kBW_EQ, eq_operands, equi_join_condition->getType());
4443 const auto condition =
dynamic_cast<const RexOperator*
>(scalar);
4444 if (condition && condition->getOperator() ==
kAND) {
4445 CHECK_GE(condition->size(), size_t(2));
4450 for (
size_t i = 1; i < condition->size(); ++i) {
4451 std::vector<std::unique_ptr<const RexScalar>> and_operands;
4452 and_operands.emplace_back(std::move(acc));
4455 boost::make_unique<const RexOperator>(
kAND, and_operands, condition->getType());
4465 for (
size_t nesting_level = 1; nesting_level <= left_deep_join->
inputCount() - 1;
4470 auto cur_level_join_type = left_deep_join->
getJoinType(nesting_level);
4472 join_types[nesting_level - 1] = cur_level_join_type;
4480 std::vector<InputDescriptor>& input_descs,
4481 std::list<std::shared_ptr<const InputColDescriptor>>& input_col_descs,
4483 std::unordered_map<const RelAlgNode*, int>& input_to_nest_level,
4485 const std::vector<InputTableInfo>& query_infos,
4486 const Executor* executor) {
4492 if (node->isUpdateViaSelect() || node->isDeleteViaSelect()) {
4497 for (
const auto& table_info : query_infos) {
4498 if (table_info.table_key.table_id < 0) {
4507 const auto input_permutation =
4510 std::tie(input_descs, input_col_descs, std::ignore) =
4512 return input_permutation;
4517 std::vector<size_t> input_sizes;
4518 for (
size_t i = 0; i < left_deep_join->
inputCount(); ++i) {
4520 input_sizes.push_back(inputs.size());
4526 const std::list<std::shared_ptr<Analyzer::Expr>>& quals) {
4527 std::list<std::shared_ptr<Analyzer::Expr>> rewritten_quals;
4528 for (
const auto& qual : quals) {
4530 rewritten_quals.push_back(rewritten_qual ? rewritten_qual : qual);
4532 return rewritten_quals;
4541 std::vector<InputDescriptor> input_descs;
4542 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
4544 std::tie(input_descs, input_col_descs, std::ignore) =
4550 const auto left_deep_join =
4555 std::vector<size_t> input_permutation;
4556 std::vector<size_t> left_deep_join_input_sizes;
4557 std::optional<unsigned> left_deep_tree_id;
4558 if (left_deep_join) {
4559 left_deep_tree_id = left_deep_join->getId();
4562 left_deep_join, input_descs, input_to_nest_level, eo.
just_explain);
4564 std::find(join_types.begin(), join_types.end(),
JoinType::LEFT) ==
4568 left_deep_join_quals,
4569 input_to_nest_level,
4574 std::tie(input_descs, input_col_descs, std::ignore) =
4575 get_input_desc(compound, input_to_nest_level, input_permutation);
4577 left_deep_join, input_descs, input_to_nest_level, eo.
just_explain);
4582 input_to_nest_level,
4586 if (bbox_intersect_qual_info.is_reordered) {
4591 if (bbox_intersect_qual_info.has_bbox_intersect_join) {
4592 left_deep_join_quals = bbox_intersect_qual_info.join_quals;
4598 const auto scalar_sources =
4601 std::unordered_map<size_t, SQLTypeInfo> target_exprs_type_infos;
4603 target_exprs_type_infos,
4612 auto candidate =
query_dag_->getQueryHint(compound);
4614 query_hint = *candidate;
4622 left_deep_join_quals,
4625 target_exprs_type_infos,
4637 auto query_rewriter = std::make_unique<QueryRewriter>(query_infos,
executor_);
4638 auto rewritten_exe_unit = query_rewriter->rewrite(exe_unit);
4639 const auto targets_meta =
get_targets_meta(compound, rewritten_exe_unit.target_exprs);
4642 if (left_deep_tree_id && left_deep_tree_id.has_value()) {
4643 left_deep_trees_info.emplace(left_deep_tree_id.value(),
4644 rewritten_exe_unit.join_quals);
4648 compound, left_deep_tree_id, left_deep_trees_info,
executor_);
4649 rewritten_exe_unit.hash_table_build_plan_dag = join_info.hash_table_plan_dag;
4650 rewritten_exe_unit.table_id_to_node_map = join_info.table_id_to_node_map;
4652 return {rewritten_exe_unit,
4655 std::move(query_rewriter),
4657 left_deep_join_input_sizes};
4663 const auto left_deep_join =
4667 return std::make_shared<RelAlgTranslator>(
4675 const auto bin_oper =
dynamic_cast<const RexOperator*
>(qual_expr);
4676 if (!bin_oper || bin_oper->getOperator() !=
kAND) {
4679 CHECK_GE(bin_oper->size(), size_t(2));
4681 for (
size_t i = 1; i < bin_oper->size(); ++i) {
4683 lhs_cf.insert(lhs_cf.end(), rhs_cf.begin(), rhs_cf.end());
4689 const std::vector<std::shared_ptr<Analyzer::Expr>>& factors,
4691 CHECK(!factors.empty());
4692 auto acc = factors.front();
4693 for (
size_t i = 1; i < factors.size(); ++i) {
4699 template <
class QualsList>
4701 const std::shared_ptr<Analyzer::Expr>& needle) {
4702 for (
const auto& qual : haystack) {
4703 if (*qual == *needle) {
4714 const std::shared_ptr<Analyzer::Expr>& expr) {
4716 CHECK_GE(expr_terms.size(), size_t(1));
4717 const auto& first_term = expr_terms.front();
4719 std::vector<std::shared_ptr<Analyzer::Expr>> common_factors;
4722 for (
const auto& first_term_factor : first_term_factors.quals) {
4724 expr_terms.size() > 1;
4725 for (
size_t i = 1; i < expr_terms.size(); ++i) {
4733 common_factors.push_back(first_term_factor);
4736 if (common_factors.empty()) {
4740 std::vector<std::shared_ptr<Analyzer::Expr>> remaining_terms;
4741 for (
const auto& term : expr_terms) {
4743 std::vector<std::shared_ptr<Analyzer::Expr>> remaining_quals(
4744 term_cf.simple_quals.begin(), term_cf.simple_quals.end());
4745 for (
const auto& qual : term_cf.quals) {
4747 remaining_quals.push_back(qual);
4750 if (!remaining_quals.empty()) {
4756 if (remaining_terms.empty()) {
4767 const std::vector<JoinType>& join_types,
4768 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level,
4769 const bool just_explain)
const {
4773 std::list<std::shared_ptr<Analyzer::Expr>> join_condition_quals;
4774 for (
const auto rex_condition_component : rex_condition_cf) {
4777 translator.
translate(bw_equals ? bw_equals.get() : rex_condition_component));
4780 auto append_folded_cf_quals = [&join_condition_quals](
const auto& cf_quals) {
4781 for (
const auto& cf_qual : cf_quals) {
4782 join_condition_quals.emplace_back(
fold_expr(cf_qual.get()));
4786 append_folded_cf_quals(join_condition_cf.quals);
4787 append_folded_cf_quals(join_condition_cf.simple_quals);
4797 const std::vector<InputDescriptor>& input_descs,
4798 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level,
4799 const bool just_explain) {
4805 std::unordered_set<std::shared_ptr<Analyzer::Expr>> visited_quals;
4806 for (
size_t rte_idx = 1; rte_idx < input_descs.size(); ++rte_idx) {
4808 if (outer_condition) {
4809 result[rte_idx - 1].quals =
4810 makeJoinQuals(outer_condition, join_types, input_to_nest_level, just_explain);
4811 CHECK_LE(rte_idx, join_types.size());
4816 for (
const auto& qual : join_condition_quals) {
4817 if (visited_quals.count(qual)) {
4820 const auto qual_rte_idx = rte_idx_visitor.visit(qual.get());
4821 if (static_cast<size_t>(qual_rte_idx) <= rte_idx) {
4822 const auto it_ok = visited_quals.emplace(qual);
4823 CHECK(it_ok.second);
4824 result[rte_idx - 1].quals.push_back(qual);
4827 CHECK_LE(rte_idx, join_types.size());
4831 result[rte_idx - 1].type = join_types[rte_idx - 1];
4840 const size_t nest_level,
4841 const std::vector<TargetMetaInfo>& in_metainfo,
4842 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level) {
4845 const auto input = ra_node->
getInput(nest_level);
4846 const auto it_rte_idx = input_to_nest_level.find(input);
4847 CHECK(it_rte_idx != input_to_nest_level.end());
4848 const int rte_idx = it_rte_idx->second;
4850 std::vector<std::shared_ptr<Analyzer::Expr>> inputs;
4851 const auto scan_ra =
dynamic_cast<const RelScan*
>(input);
4853 for (
const auto& input_meta : in_metainfo) {
4854 inputs.push_back(std::make_shared<Analyzer::ColumnVar>(
4855 input_meta.get_type_info(),
4864 std::vector<std::shared_ptr<Analyzer::Expr>>
const& input) {
4865 std::vector<Analyzer::Expr*> output(input.size());
4866 auto const raw_ptr = [](
auto& shared_ptr) {
return shared_ptr.get(); };
4867 std::transform(input.cbegin(), input.cend(), output.begin(), raw_ptr);
4876 const bool just_explain) {
4877 std::vector<InputDescriptor> input_descs;
4878 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
4879 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
4881 std::tie(input_descs, input_col_descs, used_inputs_owned) =
4888 const auto source = aggregate->
getInput(0);
4890 const auto scalar_sources =
4893 std::unordered_map<size_t, SQLTypeInfo> target_exprs_type_infos;
4895 target_exprs_type_infos,
4907 auto candidate =
query_dag_->getQueryHint(aggregate);
4909 query_hint = *candidate;
4921 target_exprs_type_infos,
4928 join_info.hash_table_plan_dag,
4929 join_info.table_id_to_node_map,
4942 std::vector<InputDescriptor> input_descs;
4943 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
4945 std::tie(input_descs, input_col_descs, std::ignore) =
4950 const auto left_deep_join =
4955 std::vector<size_t> input_permutation;
4956 std::vector<size_t> left_deep_join_input_sizes;
4957 std::optional<unsigned> left_deep_tree_id;
4958 if (left_deep_join) {
4959 left_deep_tree_id = left_deep_join->getId();
4962 left_deep_join, input_descs, input_to_nest_level, eo.
just_explain);
4966 left_deep_join_quals,
4967 input_to_nest_level,
4972 std::tie(input_descs, input_col_descs, std::ignore) =
4975 left_deep_join, input_descs, input_to_nest_level, eo.
just_explain);
4980 input_to_nest_level,
4984 if (bbox_intersect_qual_info.is_reordered) {
4989 if (bbox_intersect_qual_info.has_bbox_intersect_join) {
4990 left_deep_join_quals = bbox_intersect_qual_info.join_quals;
4994 const auto target_exprs_owned =
5002 auto candidate =
query_dag_->getQueryHint(project);
5004 query_hint = *candidate;
5011 left_deep_join_quals,
5026 auto query_rewriter = std::make_unique<QueryRewriter>(query_infos,
executor_);
5027 auto rewritten_exe_unit = query_rewriter->rewrite(exe_unit);
5028 const auto targets_meta =
get_targets_meta(project, rewritten_exe_unit.target_exprs);
5031 if (left_deep_tree_id && left_deep_tree_id.has_value()) {
5032 left_deep_trees_info.emplace(left_deep_tree_id.value(),
5033 rewritten_exe_unit.join_quals);
5037 project, left_deep_tree_id, left_deep_trees_info,
executor_);
5038 rewritten_exe_unit.hash_table_build_plan_dag = join_info.hash_table_plan_dag;
5039 rewritten_exe_unit.table_id_to_node_map = join_info.table_id_to_node_map;
5041 return {rewritten_exe_unit,
5044 std::move(query_rewriter),
5046 left_deep_join_input_sizes};
5055 const int negative_node_id = -input_node->
getId();
5057 if (
auto rel_scan = dynamic_cast<const RelScan*>(input_node)) {
5058 db_id = rel_scan->getCatalog().getDatabaseId();
5060 std::vector<std::shared_ptr<Analyzer::Expr>> target_exprs;
5061 target_exprs.reserve(tmis.size());
5062 for (
size_t i = 0; i < tmis.size(); ++i) {
5063 target_exprs.push_back(std::make_shared<Analyzer::ColumnVar>(
5064 tmis[i].get_type_info(),
5068 return target_exprs;
5077 std::vector<InputDescriptor> input_descs;
5078 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
5081 std::tie(input_descs, input_col_descs, std::ignore) =
5084 auto const max_num_tuples =
5088 [](
auto max,
auto const& query_info) {
5089 return std::max(max, query_info.info.getNumTuples());
5092 VLOG(3) <<
"input_to_nest_level.size()=" << input_to_nest_level.size() <<
" Pairs are:";
5093 for (
auto& pair : input_to_nest_level) {
5095 << pair.second <<
')';
5100 std::vector<Analyzer::Expr*> target_exprs_pair[2];
5101 for (
unsigned i = 0; i < 2; ++i) {
5103 CHECK(!input_exprs_owned.empty())
5104 <<
"No metainfo found for input node(" << i <<
") "
5106 VLOG(3) <<
"i(" << i <<
") input_exprs_owned.size()=" << input_exprs_owned.size();
5107 for (
auto& input_expr : input_exprs_owned) {
5108 VLOG(3) <<
" " << input_expr->toString();
5116 <<
" target_exprs.size()=" << target_exprs_pair[0].size()
5117 <<
" max_num_tuples=" << max_num_tuples;
5125 target_exprs_pair[0],
5135 logical_union->
isAll(),
5137 target_exprs_pair[1]};
5138 auto query_rewriter = std::make_unique<QueryRewriter>(query_infos,
executor_);
5139 const auto rewritten_exe_unit = query_rewriter->rewrite(exe_unit);
5142 if (
auto const* node = dynamic_cast<const RelCompound*>(input0)) {
5145 }
else if (
auto const* node = dynamic_cast<const RelProject*>(input0)) {
5148 }
else if (
auto const* node = dynamic_cast<const RelLogicalUnion*>(input0)) {
5151 }
else if (
auto const* node = dynamic_cast<const RelAggregate*>(input0)) {
5154 }
else if (
auto const* node = dynamic_cast<const RelScan*>(input0)) {
5157 }
else if (
auto const* node = dynamic_cast<const RelFilter*>(input0)) {
5160 }
else if (
auto const* node = dynamic_cast<const RelLogicalValues*>(input0)) {
5163 }
else if (dynamic_cast<const RelSort*>(input0)) {
5164 throw QueryNotSupported(
"LIMIT and OFFSET are not currently supported with UNION.");
5169 VLOG(3) <<
"logical_union->getOutputMetainfo()="
5171 <<
" rewritten_exe_unit.input_col_descs.front()->getScanDesc().getTableKey()="
5172 << rewritten_exe_unit.input_col_descs.front()->getScanDesc().getTableKey();
5174 return {rewritten_exe_unit,
5177 std::move(query_rewriter)};
5182 const bool just_explain,
5183 const bool is_gpu) {
5184 std::vector<InputDescriptor> input_descs;
5185 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
5187 std::tie(input_descs, input_col_descs, std::ignore) =
5197 const auto table_function_impl_and_type_infos = [=]() {
5203 LOG(
WARNING) <<
"createTableFunctionWorkUnit[GPU]: " << e.what()
5205 <<
" step to run on CPU.";
5213 LOG(
WARNING) <<
"createTableFunctionWorkUnit[CPU]: " << e.what();
5218 const auto& table_function_impl = std::get<0>(table_function_impl_and_type_infos);
5219 const auto& table_function_type_infos = std::get<1>(table_function_impl_and_type_infos);
5220 size_t output_row_sizing_param = 0;
5221 if (table_function_impl
5222 .hasUserSpecifiedOutputSizeParameter()) {
5223 const auto parameter_index =
5224 table_function_impl.getOutputRowSizeParameter(table_function_type_infos);
5225 CHECK_GT(parameter_index,
size_t(0));
5227 const auto parameter_expr =
5229 const auto parameter_expr_literal =
dynamic_cast<const RexLiteral*
>(parameter_expr);
5230 if (!parameter_expr_literal) {
5231 throw std::runtime_error(
5232 "Provided output buffer sizing parameter is not a literal. Only literal "
5233 "values are supported with output buffer sizing configured table "
5236 int64_t literal_val = parameter_expr_literal->getVal<int64_t>();
5237 if (literal_val < 0) {
5238 throw std::runtime_error(
"Provided output sizing parameter " +
5240 " must be positive integer.");
5242 output_row_sizing_param =
static_cast<size_t>(literal_val);
5245 output_row_sizing_param = 1;
5248 makeExpr<Analyzer::Constant>(
kINT,
false, d);
5250 input_exprs_owned.insert(input_exprs_owned.begin() + parameter_index - 1,
5251 DEFAULT_ROW_MULTIPLIER_EXPR);
5253 }
else if (table_function_impl.hasNonUserSpecifiedOutputSize()) {
5254 output_row_sizing_param = table_function_impl.getOutputRowSizeParameter();
5259 std::vector<Analyzer::ColumnVar*> input_col_exprs;
5260 size_t input_index = 0;
5261 size_t arg_index = 0;
5262 const auto table_func_args = table_function_impl.getInputArgs();
5263 CHECK_EQ(table_func_args.size(), table_function_type_infos.size());
5264 for (
const auto& ti : table_function_type_infos) {
5265 if (ti.is_column_list()) {
5266 for (
int i = 0; i < ti.get_dimension(); i++) {
5267 auto& input_expr = input_exprs_owned[input_index];
5273 auto type_info = input_expr->get_type_info();
5274 if (ti.is_column_array()) {
5276 type_info.set_subtype(type_info.get_subtype());
5278 type_info.set_subtype(type_info.get_type());
5280 type_info.set_type(ti.get_type());
5281 type_info.set_dimension(ti.get_dimension());
5282 type_info.setUsesFlatBuffer(type_info.get_elem_type().supportsFlatBuffer());
5283 input_expr->set_type_info(type_info);
5285 input_col_exprs.push_back(col_var);
5288 }
else if (ti.is_column()) {
5289 auto& input_expr = input_exprs_owned[input_index];
5294 auto type_info = input_expr->get_type_info();
5295 if (ti.is_column_array()) {
5297 type_info.set_subtype(type_info.get_subtype());
5299 type_info.set_subtype(type_info.get_type());
5301 type_info.set_type(ti.get_type());
5302 type_info.setUsesFlatBuffer(type_info.get_elem_type().supportsFlatBuffer());
5303 input_expr->set_type_info(type_info);
5304 input_col_exprs.push_back(col_var);
5307 auto input_expr = input_exprs_owned[input_index];
5309 if (ext_func_arg_ti != input_expr->get_type_info()) {
5310 input_exprs_owned[input_index] = input_expr->add_cast(ext_func_arg_ti);
5317 std::vector<Analyzer::Expr*> table_func_outputs;
5318 constexpr int32_t transient_pos{-1};
5319 for (
size_t i = 0; i < table_function_impl.getOutputsSize(); i++) {
5320 auto ti = table_function_impl.getOutputSQLType(i);
5321 ti.setUsesFlatBuffer(ti.supportsFlatBuffer());
5322 if (ti.is_geometry()) {
5323 auto p = table_function_impl.getInputID(i);
5324 int32_t input_pos = p.first;
5325 if (input_pos != transient_pos) {
5326 CHECK(!ti.is_column_list());
5327 CHECK_LT(input_pos, input_exprs_owned.size());
5328 const auto& reference_ti = input_exprs_owned[input_pos]->get_type_info();
5330 ti.set_input_srid(reference_ti.get_input_srid());
5331 ti.set_output_srid(reference_ti.get_output_srid());
5332 ti.set_compression(reference_ti.get_compression());
5333 ti.set_comp_param(reference_ti.get_comp_param());
5335 ti.set_input_srid(0);
5336 ti.set_output_srid(0);
5338 ti.set_comp_param(0);
5340 }
else if (ti.is_dict_encoded_string() || ti.is_text_encoding_dict_array()) {
5341 auto p = table_function_impl.getInputID(i);
5343 int32_t input_pos = p.first;
5344 if (input_pos == transient_pos) {
5351 for (
int j = 0; j < input_pos; j++) {
5352 const auto ti = table_function_type_infos[j];
5353 offset += ti.is_column_list() ? ti.get_dimension() : 1;
5355 input_pos = offset + p.second;
5357 CHECK_LT(input_pos, input_exprs_owned.size());
5358 const auto& dict_key =
5359 input_exprs_owned[input_pos]->get_type_info().getStringDictKey();
5360 ti.set_comp_param(dict_key.dict_id);
5361 ti.setStringDictKey(dict_key);
5375 output_row_sizing_param,
5376 table_function_impl};
5379 return {exe_unit, rel_table_func};
5384 std::pair<std::vector<TargetMetaInfo>, std::vector<std::shared_ptr<Analyzer::Expr>>>
5387 const std::vector<std::shared_ptr<RexInput>>& inputs_owned,
5388 const std::unordered_map<const RelAlgNode*, int>& input_to_nest_level) {
5389 std::vector<TargetMetaInfo> in_metainfo;
5390 std::vector<std::shared_ptr<Analyzer::Expr>> exprs_owned;
5392 auto input_it = inputs_owned.begin();
5393 for (
size_t nest_level = 0; nest_level < data_sink_node->inputCount(); ++nest_level) {
5394 const auto source = data_sink_node->getInput(nest_level);
5395 const auto scan_source =
dynamic_cast<const RelScan*
>(source);
5397 CHECK(source->getOutputMetainfo().empty());
5398 std::vector<std::shared_ptr<Analyzer::Expr>> scalar_sources_owned;
5399 for (
size_t i = 0; i < scan_source->size(); ++i, ++input_it) {
5400 scalar_sources_owned.push_back(translator.
translate(input_it->get()));
5402 const auto source_metadata =
5405 in_metainfo.end(), source_metadata.begin(), source_metadata.end());
5407 exprs_owned.end(), scalar_sources_owned.begin(), scalar_sources_owned.end());
5409 const auto& source_metadata = source->getOutputMetainfo();
5410 input_it += source_metadata.size();
5412 in_metainfo.end(), source_metadata.begin(), source_metadata.end());
5414 data_sink_node, nest_level, source_metadata, input_to_nest_level);
5416 exprs_owned.end(), scalar_sources_owned.begin(), scalar_sources_owned.end());
5419 return std::make_pair(in_metainfo, exprs_owned);
5426 const bool just_explain) {
5428 std::vector<InputDescriptor> input_descs;
5429 std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
5430 std::vector<TargetMetaInfo> in_metainfo;
5431 std::vector<std::shared_ptr<RexInput>> used_inputs_owned;
5432 std::vector<std::shared_ptr<Analyzer::Expr>> target_exprs_owned;
5435 std::tie(input_descs, input_col_descs, used_inputs_owned) =
5440 std::tie(in_metainfo, target_exprs_owned) =
5441 get_inputs_meta(filter, translator, used_inputs_owned, input_to_nest_level);
5442 const auto filter_expr = translator.translate(filter->
getCondition());
5445 const auto qual =
fold_expr(filter_expr.get());
5454 auto candidate =
query_dag_->getQueryHint(filter);
5456 query_hint = *candidate;
5461 return {{input_descs,
5464 {rewritten_qual ? rewritten_qual : qual},
5475 join_info.hash_table_plan_dag,
5476 join_info.table_id_to_node_map},
5485 if (
auto foreign_storage_mgr =
5486 executor_->getDataMgr()->getPersistentStorageMgr()->getForeignStorageMgr()) {
5490 foreign_storage_mgr->setParallelismHints({});
5498 executor_->setupCaching(phys_inputs, phys_table_ids);
const size_t getGroupByCount() const
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
bool is_agg(const Analyzer::Expr *expr)
Analyzer::ExpressionPtr rewrite_array_elements(Analyzer::Expr const *expr)
std::vector< Analyzer::Expr * > target_exprs
void setOutputMetainfo(std::vector< TargetMetaInfo > targets_metainfo) const
size_t text_decoding_casts
std::vector< Analyzer::Expr * > get_raw_pointers(std::vector< std::shared_ptr< Analyzer::Expr >> const &input)
std::vector< int > ChunkKey
std::optional< std::function< void()> > post_execution_callback_
ExecutionResult executeAggregate(const RelAggregate *aggregate, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info, const int64_t queue_time_ms)
bool hasNoneEncodedTextArg() const
TextEncodingCastCounts visitUOper(const Analyzer::UOper *u_oper) const override
RaExecutionDesc * getDescriptor(size_t idx) const
TextEncodingCastCounts visitStringOper(const Analyzer::StringOper *string_oper) const override
std::unordered_map< const RelAlgNode *, int > get_input_nest_levels(const RelAlgNode *ra_node, const std::vector< size_t > &input_permutation)
std::vector< JoinType > left_deep_join_types(const RelLeftDeepInnerJoin *left_deep_join)
void visitBegin() const override
HOST DEVICE int get_size() const
int32_t getErrorCode() const
bool find_push_down_candidates
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
bool g_use_query_resultset_cache
bool can_use_bump_allocator(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo)
bool isFrameNavigateWindowFunction() const
bool contains(const std::shared_ptr< Analyzer::Expr > expr, const bool desc) const
const Rex * getTargetExpr(const size_t i) const
const Expr * get_escape_expr() const
size_t text_encoding_casts
bool has_valid_query_plan_dag(const RelAlgNode *node)
AggregatedColRange computeColRangesCache()
static const int32_t ERR_INTERRUPTED
std::vector< size_t > do_table_reordering(std::vector< InputDescriptor > &input_descs, std::list< std::shared_ptr< const InputColDescriptor >> &input_col_descs, const JoinQualsPerNestingLevel &left_deep_join_quals, std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, const RA *node, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
class for a per-database catalog. also includes metadata for the current database and the current use...
WorkUnit createProjectWorkUnit(const RelProject *, const SortInfo &, const ExecutionOptions &eo)
size_t size() const override
int hll_size_for_rate(const int err_percent)
const std::vector< std::vector< std::shared_ptr< TargetEntry > > > & get_values_lists() const
std::vector< std::string > * stringsPtr
ExecutionResult executeRelAlgSeq(const RaExecutionSequence &seq, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info, const int64_t queue_time_ms, const bool with_existing_temp_tables=false)
const RexScalar * getFilterExpr() const
size_t get_limit_value(std::optional< size_t > limit)
TableFunctionWorkUnit createTableFunctionWorkUnit(const RelTableFunction *table_func, const bool just_explain, const bool is_gpu)
std::vector< std::shared_ptr< Analyzer::Expr > > synthesize_inputs(const RelAlgNode *ra_node, const size_t nest_level, const std::vector< TargetMetaInfo > &in_metainfo, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level)
std::vector< ArrayDatum > * arraysPtr
std::list< Analyzer::OrderEntry > getOrderEntries() const
bool is_validate_or_explain_query(const ExecutionOptions &eo)
WorkUnit createAggregateWorkUnit(const RelAggregate *, const SortInfo &, const bool just_explain)
StorageIOFacility::UpdateCallback yieldDeleteCallback(DeleteTransactionParameters &delete_parameters)
ErrorInfo getErrorDescription(const int32_t error_code)
std::tuple< std::vector< InputDescriptor >, std::list< std::shared_ptr< const InputColDescriptor > >, std::vector< std::shared_ptr< RexInput > > > get_input_desc(const RA *ra_node, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, const std::vector< size_t > &input_permutation)
const bool hasQueryStepForUnion() const
TextEncodingCastCounts get_text_cast_counts(const RelAlgExecutionUnit &ra_exe_unit)
std::pair< size_t, shared::TableKey > groups_approx_upper_bound(const std::vector< InputTableInfo > &table_infos)
#define SPIMAP_GEO_PHYSICAL_INPUT(c, i)
bool g_skip_intermediate_count
std::unique_ptr< const RexOperator > get_bitwise_equals_conjunction(const RexScalar *scalar)
bool with_dynamic_watchdog
const RexScalar * getOuterCondition(const size_t nesting_level) const
TableGenerations computeTableGenerations()
shared::TableKey table_key_from_ra(const RelAlgNode *ra_node)
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
std::pair< int, int > ParallelismHint
size_t size() const override
bool is_none_encoded_text(TargetMetaInfo const &target_meta_info)
std::vector< size_t > outer_fragment_indices
std::pair< std::unordered_set< const RexInput * >, std::vector< std::shared_ptr< RexInput > > > get_join_source_used_inputs(const RelAlgNode *ra_node)
static SpeculativeTopNBlacklist speculative_topn_blacklist_
bool g_allow_query_step_skipping
size_t get_scalar_sources_size(const RelCompound *compound)
RelAlgExecutionUnit exe_unit
std::pair< std::vector< TargetMetaInfo >, std::vector< std::shared_ptr< Analyzer::Expr > > > get_inputs_meta(const RelFilter *filter, const RelAlgTranslator &translator, const std::vector< std::shared_ptr< RexInput >> &inputs_owned, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level)
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::vector< std::shared_ptr< Analyzer::TargetEntry > > targets
const Expr * get_right_operand() const
const std::unordered_map< int, QueryPlanHash > getSkippedQueryStepCacheKeys() const
TemporaryTables temporary_tables_
size_t getNumRows() const
void prepare_string_dictionaries(const std::unordered_set< PhysicalInput > &phys_inputs)
PersistentStorageMgr * getPersistentStorageMgr() const
ExecutionResult executeRelAlgQueryWithFilterPushDown(const RaExecutionSequence &seq, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info, const int64_t queue_time_ms)
void setupCaching(const RelAlgNode *ra)
const RexScalar * getCondition() const
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
const bool default_disregard_casts_to_none_encoding_
const std::vector< TargetMetaInfo > getTupleType() const
std::vector< std::shared_ptr< Analyzer::Expr > > translate_scalar_sources_for_update(const RA *ra_node, const RelAlgTranslator &translator, int32_t tableId, const Catalog_Namespace::Catalog &cat, const ColumnNameList &colNames, size_t starting_projection_column_idx)
std::vector< InputDescriptor > input_descs
std::shared_ptr< Analyzer::Var > var_ref(const Analyzer::Expr *expr, const Analyzer::Var::WhichRow which_row, const int varno)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > & getLeftDeepJoinTreesInfo()
size_t g_preflight_count_query_threshold
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
DEVICE void sort(ARGS &&...args)
int64_t insert_one_dict_str(T *col_data, const std::string &columnName, const SQLTypeInfo &columnType, const Analyzer::Constant *col_cv, const Catalog_Namespace::Catalog &catalog)
std::vector< PushedDownFilterInfo > selectFiltersToBePushedDown(const RelAlgExecutor::WorkUnit &work_unit, const CompilationOptions &co, const ExecutionOptions &eo)
static WindowProjectNodeContext * create(Executor *executor)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
TypeR::rep timer_stop(Type clock_begin)
ExecutionResult executeModify(const RelModify *modify, const ExecutionOptions &eo)
void addTemporaryTable(const int table_id, const ResultSetPtr &result)
std::shared_ptr< Analyzer::Expr > ExpressionPtr
void check_sort_node_source_constraint(const RelSort *sort)
std::unordered_map< unsigned, AggregatedResult > leaf_results_
static const int32_t ERR_GEOS
const ResultSetPtr & getDataPtr() const
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::vector< TargetInfo > TargetInfoList
SQLTypeInfo get_logical_type_for_expr(const Analyzer::Expr *expr)
std::vector< JoinCondition > JoinQualsPerNestingLevel
std::shared_ptr< ResultSet > ResultSetPtr
static const int32_t ERR_TOO_MANY_LITERALS
ExecutionResult executeLogicalValues(const RelLogicalValues *, const ExecutionOptions &)
virtual void handleQueryEngineVector(std::vector< std::shared_ptr< RelAlgNode >> const nodes)
bool g_enable_dynamic_watchdog
std::shared_ptr< Analyzer::Expr > set_transient_dict(const std::shared_ptr< Analyzer::Expr > expr)
std::optional< RegisteredQueryHint > getParsedQueryHint(const RelAlgNode *node)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
TextEncodingCastCounts visit(const Analyzer::Expr *expr) const
void setNoExplainExecutionOptions(bool no_validation=false)
void set_parallelism_hints(const RelAlgNode &ra_node)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
static SortInfo createFromSortNode(const RelSort *sort_node)
HOST DEVICE SQLTypes get_type() const
const std::string kInfoSchemaDbName
ExecutionResult executeFilter(const RelFilter *, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms)
const std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries() const noexcept
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define TRANSIENT_DICT_ID
bool g_enable_data_recycler
const std::vector< std::shared_ptr< Analyzer::Expr > > & getOrderKeys() const
bool is_window_execution_unit(const RelAlgExecutionUnit &ra_exe_unit)
const std::vector< OrderEntry > & getCollation() const
void insertData(const Catalog_Namespace::SessionInfo &session_info, InsertData &insert_data)
size_t max_join_hash_table_size
static std::unordered_set< size_t > getScanNodeTableKey(RelAlgNode const *rel_alg_node)
const Expr * get_arg() const
std::vector< const RexScalar * > rex_to_conjunctive_form(const RexScalar *qual_expr)
bool is_count_distinct(const Analyzer::Expr *expr)
QueryStepExecutionResult executeRelAlgQuerySingleStep(const RaExecutionSequence &seq, const size_t step_idx, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info)
bool hasStepForUnion() const
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
std::shared_ptr< Analyzer::Expr > transform_to_inner(const Analyzer::Expr *expr)
std::vector< std::shared_ptr< RelAlgNode > > & getNodes()
bool filter_on_deleted_column
foreign_storage::ForeignStorageMgr * getForeignStorageMgr() const
void handleNop(RaExecutionDesc &ed)
void set_type_info(const SQLTypeInfo &ti)
#define LOG_IF(severity, condition)
size_t getQueryPlanDagHash() const
void computeWindow(const WorkUnit &work_unit, const CompilationOptions &co, const ExecutionOptions &eo, ColumnCacheMap &column_cache_map, const int64_t queue_time_ms)
std::shared_ptr< Analyzer::Expr > cast_dict_to_none(const std::shared_ptr< Analyzer::Expr > &input)
std::vector< node_t > get_node_input_permutation(const JoinQualsPerNestingLevel &left_deep_join_quals, const std::vector< InputTableInfo > &table_infos, const Executor *executor)
bool can_output_columnar(const RelAlgExecutionUnit &ra_exe_unit, const RenderInfo *render_info, const RelAlgNode *body)
int tableId
identifies the database into which the data is being inserted
bool list_contains_expression(const QualsList &haystack, const std::shared_ptr< Analyzer::Expr > &needle)
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
static const int32_t ERR_STRING_CONST_IN_RESULTSET
size_t g_watchdog_none_encoded_string_translation_limit
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
size_t getColInputsSize() const
SQLOps get_optype() const
bool just_calcite_explain
size_t numRows
a vector of column ids for the row(s) being inserted
virtual T visit(const RexScalar *rex_scalar) const
const size_t getScalarSourcesSize() const
bool output_columnar_hint
WorkUnit createCompoundWorkUnit(const RelCompound *, const SortInfo &, const ExecutionOptions &eo)
static const int32_t ERR_STREAMING_TOP_N_NOT_SUPPORTED_IN_RENDER_QUERY
Data_Namespace::DataMgr & getDataMgr() const
static const int32_t ERR_COLUMNAR_CONVERSION_NOT_SUPPORTED
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
WorkUnit createWorkUnit(const RelAlgNode *, const SortInfo &, const ExecutionOptions &eo)
size_t append_move(std::vector< T > &destination, std::vector< T > &&source)
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_owned_
std::unordered_set< shared::TableKey > getPhysicalTableIds() const
StorageIOFacility::UpdateCallback yieldUpdateCallback(UpdateTransactionParameters &update_parameters)
bool key_does_not_shard_to_leaf(const ChunkKey &key)
std::unique_ptr< WindowFunctionContext > createWindowFunctionContext(const Analyzer::WindowFunction *window_func, const std::shared_ptr< Analyzer::BinOper > &partition_key_cond, std::unordered_map< QueryPlanHash, std::shared_ptr< HashJoin >> &partition_cache, std::unordered_map< QueryPlanHash, size_t > &sorted_partition_key_ref_count_map, const WorkUnit &work_unit, const std::vector< InputTableInfo > &query_infos, const CompilationOptions &co, ColumnCacheMap &column_cache_map, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
static const int32_t ERR_DIV_BY_ZERO
void check_none_encoded_string_cast_tuple_limit(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
static SysCatalog & instance()
size_t getOuterFragmentCount(const CompilationOptions &co, const ExecutionOptions &eo)
static CompilationOptions makeCpuOnly(const CompilationOptions &in)
bool g_from_table_reordering
size_t g_window_function_aggregation_tree_fanout
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
const std::vector< std::shared_ptr< Analyzer::Expr > > & getArgs() const
bool disregard_casts_to_none_encoding_
bool isGeometry(TargetMetaInfo const &target_meta_info)
void conditionally_change_arg_to_int_type(size_t target_expr_idx, std::shared_ptr< Analyzer::Expr > &target_expr, std::unordered_map< size_t, SQLTypeInfo > &target_exprs_type_infos)
ExecutorType executor_type
bool g_enable_system_tables
std::vector< Analyzer::Expr * > translate_targets(std::vector< std::shared_ptr< Analyzer::Expr >> &target_exprs_owned, std::unordered_map< size_t, SQLTypeInfo > &target_exprs_type_infos, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources, const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const RelCompound *compound, const RelAlgTranslator &translator, const ExecutorType executor_type)
std::vector< unsigned > aggregateResult(const std::vector< unsigned > &aggregate, const std::vector< unsigned > &next_result) const override
void * checked_malloc(const size_t size)
DEVICE auto copy(ARGS &&...args)
#define INJECT_TIMER(DESC)
static const int32_t ERR_OUT_OF_RENDER_MEM
std::list< std::shared_ptr< Analyzer::Expr > > translate_groupby_exprs(const RelCompound *compound, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
const JoinQualsPerNestingLevel join_quals
static void reset(Executor *executor)
T visit(const RelAlgNode *rel_alg) const
void build_render_targets(RenderInfo &render_info, const std::vector< Analyzer::Expr * > &work_unit_target_exprs, const std::vector< TargetMetaInfo > &targets_meta)
void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id)
void executeUpdate(const RelAlgNode *node, const CompilationOptions &co, const ExecutionOptions &eo, const int64_t queue_time_ms)
A container for relational algebra descriptors defining the execution order for a relational algebra ...
void put_null_array(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
std::vector< unsigned > visitLeftDeepInnerJoin(const RelLeftDeepInnerJoin *left_deep_join_tree) const override
TableIdToNodeMap table_id_to_node_map
bool estimate_output_cardinality
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
size_t g_big_group_threshold
static const int32_t ERR_OVERFLOW_OR_UNDERFLOW
std::list< Analyzer::OrderEntry > order_entries
static const int32_t ERR_OUT_OF_TIME
ExecutionResult handleOutOfMemoryRetry(const RelAlgExecutor::WorkUnit &work_unit, const std::vector< TargetMetaInfo > &targets_meta, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info, const bool was_multifrag_kernel_launch, const int64_t queue_time_ms)
std::optional< size_t > getFilteredCountAll(const RelAlgExecutionUnit &ra_exe_unit, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo)
const RelAlgNode * getInput(const size_t idx) const
void executePostExecutionCallback()
DEVICE auto accumulate(ARGS &&...args)
RelAlgExecutionUnit createCountAllExecutionUnit(Analyzer::Expr *replacement_target) const
std::shared_ptr< Analyzer::Expr > build_logical_expression(const std::vector< std::shared_ptr< Analyzer::Expr >> &factors, const SQLOps sql_op)
size_t getNDVEstimation(const WorkUnit &work_unit, const int64_t range, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo)
static const int32_t ERR_UNSUPPORTED_SELF_JOIN
bool g_allow_auto_resultset_caching
ExecutionResult executeRelAlgSubSeq(const RaExecutionSequence &seq, const std::pair< size_t, size_t > interval, const CompilationOptions &co, const ExecutionOptions &eo, RenderInfo *render_info, const int64_t queue_time_ms)
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
specifies the content in-memory of a row in the column metadata table
OUTPUT transform(INPUT const &input, FUNC const &func)
bool get_is_distinct() const
WorkUnit createUnionWorkUnit(const RelLogicalUnion *, const SortInfo &, const ExecutionOptions &eo)
static const int32_t ERR_SINGLE_VALUE_FOUND_MULTIPLE_VALUES
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
TextEncodingCastCounts(const size_t text_decoding_casts, const size_t text_encoding_casts)
JoinType get_join_type(const RelAlgNode *ra)
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
void executeRelAlgStep(const RaExecutionSequence &seq, const size_t step_idx, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms)
static const int32_t ERR_OUT_OF_GPU_MEM
size_t getTableFuncInputsSize() const
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
std::shared_ptr< Analyzer::Expr > reverse_logical_distribution(const std::shared_ptr< Analyzer::Expr > &expr)
ExecutionResult executeSimpleInsert(const Analyzer::Query &insert_query, Fragmenter_Namespace::InsertDataLoader &inserter, const Catalog_Namespace::SessionInfo &session)
Argument type based extension function binding.
BoundingBoxIntersectJoinTranslationInfo convert_bbox_intersect_join(JoinQualsPerNestingLevel const &join_quals, std::vector< InputDescriptor > &input_descs, std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, std::vector< size_t > &input_permutation, std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, Executor const *executor)
const SQLTypeInfo & get_type_info() const
std::vector< std::shared_ptr< Analyzer::Expr > > target_exprs_for_union(RelAlgNode const *input_node)
virtual std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const =0
Executor * getExecutor() const
int get_result_table_id() const
const std::vector< std::unique_ptr< const RexAgg > > & getAggExprs() const
ExecutorDeviceType device_type
bool node_is_aggregate(const RelAlgNode *ra)
std::vector< TargetMetaInfo > get_targets_meta(const RA *ra_node, const std::vector< Analyzer::Expr * > &target_exprs)
void put_null(void *ndptr, const SQLTypeInfo &ntype, const std::string col_name)
std::optional< RegisteredQueryHint > getGlobalQueryHint()
bool g_enable_window_functions
TextEncodingCastCounts aggregateResult(const TextEncodingCastCounts &aggregate, const TextEncodingCastCounts &next_result) const override
bool isEmptyResult() const
ExecutionResult executeTableFunction(const RelTableFunction *, const CompilationOptions &, const ExecutionOptions &, const int64_t queue_time_ms)
std::unique_ptr< RelAlgDag > query_dag_
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_join_info_
size_t preflight_count_query_threshold
const RexScalar * getProjectAt(const size_t idx) const
ExecutionResult executeWorkUnit(const WorkUnit &work_unit, const std::vector< TargetMetaInfo > &targets_meta, const bool is_agg, const CompilationOptions &co_in, const ExecutionOptions &eo_in, RenderInfo *, const int64_t queue_time_ms, const std::optional< size_t > previous_count=std::nullopt)
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
TextEncodingCastCounts visitBinOper(const Analyzer::BinOper *bin_oper) const override
const RaExecutionDesc * getContextData() const
static RegisteredQueryHint defaults()
void prepare_foreign_table_for_execution(const RelAlgNode &ra_node)
int32_t countRexLiteralArgs() const
bool string_op_returns_string(const SqlStringOpKind kind)
Expression class for string functions The "arg" constructor parameter must be an expression that reso...
size_t get_scan_limit(const RelAlgNode *ra, std::optional< size_t > limit)
HOST DEVICE EncodingType get_compression() const
static std::pair< const int8_t *, size_t > getOneColumnFragment(Executor *executor, const Analyzer::ColumnVar &hash_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
Gets one chunk's pointer and element count on either CPU or GPU.
ExecutionResult executeUnion(const RelLogicalUnion *, const RaExecutionSequence &, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms)
bool table_is_temporary(const TableDescriptor *const td)
#define DEFAULT_ROW_MULTIPLIER_VALUE
SqlStringOpKind get_kind() const
std::vector< DataBlockPtr > data
the number of rows being inserted
std::shared_ptr< const query_state::QueryState > query_state_
const Expr * get_like_expr() const
bool table_is_replicated(const TableDescriptor *td)
Catalog & getCatalog() const
bool is_projection(const RelAlgExecutionUnit &ra_exe_unit)
size_t g_estimator_failure_max_groupby_size
size_t max_join_hash_table_size
static RelRexToStringConfig defaults()
const Expr * get_operand() const
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
bool isHintRegistered(const QueryHint hint) const
Basic constructors and methods of the row set interface.
Datum get_constval() const
SQLTypes get_int_type_by_size(size_t const nbytes)
const size_t getGroupByCount() const
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
bool is_none_encoded_string() const
const RelAlgNode & getRootRelAlgNode() const
size_t collationCount() const
void setInputSourceNode(const RelAlgNode *input_source_node)
bool canUseResultsetCache(const ExecutionOptions &eo, RenderInfo *render_info) const
ExecutionResult executeSort(const RelSort *, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms)
bool isRowidLookup(const WorkUnit &work_unit)
bool exe_unit_has_quals(const RelAlgExecutionUnit ra_exe_unit)
static const int32_t ERR_WIDTH_BUCKET_INVALID_ARGUMENT
ExecutionResult executeProject(const RelProject *, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms, const std::optional< size_t > previous_count)
bool hasDeletedRowInQuery(std::vector< InputTableInfo > const &) const
std::vector< std::shared_ptr< Analyzer::Expr > > translate_scalar_sources(const RA *ra_node, const RelAlgTranslator &translator, const ::ExecutorType executor_type)
static void handlePersistentError(const int32_t error_code)
const std::tuple< table_functions::TableFunction, std::vector< SQLTypeInfo > > bind_table_function(std::string name, Analyzer::ExpressionPtrVector input_args, const std::vector< table_functions::TableFunction > &table_funcs, const bool is_gpu)
HOST DEVICE int get_comp_param() const
const RexScalar * getTableFuncInputAt(const size_t idx) const
static const StringDictKey kTransientDictKey
bool compute_output_buffer_size(const RelAlgExecutionUnit &ra_exe_unit)
size_t g_default_max_groups_buffer_entry_guess
bool should_output_columnar(const RelAlgExecutionUnit &ra_exe_unit)
SQLAgg get_aggtype() const
const JoinType getJoinType(const size_t nesting_level) const
bool optimize_cuda_block_and_grid_sizes
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
const size_t max_groups_buffer_entry_guess
std::string getFunctionName() const
void prepareForeignTables()
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool wasMultifragKernelLaunch() const
bool g_enable_bump_allocator
StringDictionaryGenerations computeStringDictionaryGenerations()
ExecutionResult executeRelAlgQueryNoRetry(const CompilationOptions &co, const ExecutionOptions &eo, const bool just_explain_plan, const bool explain_verbose, RenderInfo *render_info)
std::vector< std::string > ColumnNameList
const RexScalar * getInnerCondition() const
RegisteredQueryHint query_hint
void prepare_for_system_table_execution(const RelAlgNode &ra_node, const CompilationOptions &co)
#define DEBUG_TIMER(name)
std::unordered_set< PhysicalInput > get_physical_inputs_with_spi_col_id(const RelAlgNode *ra)
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
size_t getLeafCount() const
ExecutionResult executeCompound(const RelCompound *, const CompilationOptions &, const ExecutionOptions &, RenderInfo *, const int64_t queue_time_ms)
void collect_used_input_desc(std::vector< InputDescriptor > &input_descs, std::unordered_set< std::shared_ptr< const InputColDescriptor >> &input_col_descs_unique, const RelAlgNode *ra_node, const std::unordered_set< const RexInput * > &source_used_inputs, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level)
const RelAlgNode * getBody() const
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
TextEncodingCastCountVisitor(const bool default_disregard_casts_to_none_encoding)
std::unique_ptr< const RexOperator > get_bitwise_equals(const RexScalar *scalar)
Estimators to be used when precise cardinality isn't useful.
bool g_allow_query_step_cpu_retry
static std::string getErrorMessageFromCode(const int32_t error_code)
The data to be inserted using the fragment manager.
QualsConjunctiveForm translate_quals(const RelCompound *compound, const RelAlgTranslator &translator)
void add(const std::shared_ptr< Analyzer::Expr > expr, const bool desc)
const Expr * get_left_operand() const
void cleanupPostExecution()
void initializeParallelismHints()
std::list< std::shared_ptr< Analyzer::Expr > > makeJoinQuals(const RexScalar *join_condition, const std::vector< JoinType > &join_types, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, const bool just_explain) const
const RexScalar * scalar_at(const size_t i, const RelCompound *compound)
TextEncodingCastCounts visitLikeExpr(const Analyzer::LikeExpr *like) const override
std::vector< Analyzer::Expr * > target_exprs
unsigned dynamic_watchdog_time_limit
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
std::optional< size_t > getLimit() const
const size_t inputCount() const
size_t getRelNodeDagId() const
ExecutionResult executeRelAlgQuery(const CompilationOptions &co, const ExecutionOptions &eo, const bool just_explain_plan, const bool explain_verbose, RenderInfo *render_info)
bool g_columnar_large_projections
HOST DEVICE bool get_notnull() const
TextEncodingCastCounts defaultResult() const override
size_t aggregate_tree_fanout
virtual size_t get_num_column_vars(const bool include_agg) const
std::unique_ptr< TransactionParameters > dml_transaction_parameters_
static constexpr char const * FOREIGN_TABLE
RelAlgExecutionUnit decide_approx_count_distinct_implementation(const RelAlgExecutionUnit &ra_exe_unit_in, const std::vector< InputTableInfo > &table_infos, const Executor *executor, const ExecutorDeviceType device_type_in, std::vector< std::shared_ptr< Analyzer::Expr >> &target_exprs_owned)
const std::list< int > & get_result_col_list() const
bool sameTypeInfo(std::vector< TargetMetaInfo > const &lhs, std::vector< TargetMetaInfo > const &rhs)
size_t g_columnar_large_projections_threshold
const std::vector< std::shared_ptr< Analyzer::Expr > > & getPartitionKeys() const
const RelAlgNode * get_data_sink(const RelAlgNode *ra_node)
bool allow_runtime_query_interrupt
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
std::pair< std::vector< unsigned >, std::unordered_map< unsigned, JoinQualsPerNestingLevel > > getJoinInfo(const RelAlgNode *root_node)
static const int32_t ERR_OUT_OF_CPU_MEM
void executeDelete(const RelAlgNode *node, const CompilationOptions &co, const ExecutionOptions &eo_in, const int64_t queue_time_ms)
static ExecutionOptions defaults()
SQLTypeInfo get_elem_type() const
const TableDescriptor * getTableDescriptor() const
std::vector< int > columnIds
identifies the table into which the data is being inserted
std::shared_ptr< RelAlgTranslator > getRelAlgTranslator(const RelAlgNode *root_node)
RaExecutionDesc * getDescriptorByBodyId(unsigned const body_id, size_t const start_idx) const
static size_t getArenaBlockSize()
RelAlgDag * getRelAlgDag()
bool hasContextData() const
void prepareForSystemTableExecution(const CompilationOptions &co) const
std::list< std::shared_ptr< Analyzer::Expr > > combine_equi_join_conditions(const std::list< std::shared_ptr< Analyzer::Expr >> &join_quals)
DEVICE void swap(ARGS &&...args)
const Expr * getArg(const size_t i) const
std::pair< std::unordered_set< const RexInput * >, std::vector< std::shared_ptr< RexInput > > > get_used_inputs(const RelCompound *compound)
SQLOps get_optype() const
WorkUnit createFilterWorkUnit(const RelFilter *, const SortInfo &, const bool just_explain)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
RANodeOutput get_node_output(const RelAlgNode *ra_node)
std::pair< std::vector< InputDescriptor >, std::list< std::shared_ptr< const InputColDescriptor > > > get_input_desc_impl(const RA *ra_node, const std::unordered_set< const RexInput * > &used_inputs, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, const std::vector< size_t > &input_permutation)
size_t g_auto_resultset_caching_threshold
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
JoinQualsPerNestingLevel translateLeftDeepJoinFilter(const RelLeftDeepInnerJoin *join, const std::vector< InputDescriptor > &input_descs, const std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, const bool just_explain)
bool g_enable_table_functions
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
void setHasStepForUnion(bool flag)
std::string get_table_name_from_table_key(shared::TableKey const &table_key)
bool first_oe_is_desc(const std::list< Analyzer::OrderEntry > &order_entries)
void prepareLeafExecution(const AggregatedColRange &agg_col_range, const StringDictionaryGenerations &string_dictionary_generations, const TableGenerations &table_generations)
HashTableBuildDagMap hash_table_build_plan_dag
const RexScalar * getScalarSource(const size_t i) const
std::vector< size_t > get_left_deep_join_input_sizes(const RelLeftDeepInnerJoin *left_deep_join)
WorkUnit createSortInputWorkUnit(const RelSort *, std::list< Analyzer::OrderEntry > &order_entries, const ExecutionOptions &eo)
void set_transient_dict_maybe(std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources, const std::shared_ptr< Analyzer::Expr > &expr)
std::list< std::shared_ptr< Analyzer::Expr > > rewrite_quals(const std::list< std::shared_ptr< Analyzer::Expr >> &quals)