28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
31 #include "MapDRelease.h"
34 #include "gen-cpp/CalciteServer.h"
81 #include <aws/core/auth/AWSCredentialsProviderChain.h>
85 #include <sys/types.h>
87 #include <boost/algorithm/string.hpp>
88 #include <boost/filesystem.hpp>
89 #include <boost/make_shared.hpp>
90 #include <boost/process/search_path.hpp>
91 #include <boost/program_options.hpp>
92 #include <boost/tokenizer.hpp>
105 #include <arrow/api.h>
106 #include <arrow/io/api.h>
107 #include <arrow/ipc/api.h>
113 #ifdef ENABLE_IMPORT_PARQUET
114 extern bool g_enable_parquet_import_fsi;
128 #define INVALID_SESSION_ID ""
130 #define SET_REQUEST_ID(parent_request_id) \
131 if (g_uniform_request_ids_per_thrift_call && parent_request_id) \
132 logger::set_request_id(parent_request_id); \
133 else if (logger::set_new_request_id(); parent_request_id) \
134 LOG(INFO) << "This request has parent request_id(" << parent_request_id << ')'
136 #define THROW_DB_EXCEPTION(errstr) \
139 ex.error_msg = errstr; \
140 LOG(ERROR) << ex.error_msg; \
150 const int32_t user_id,
151 const std::string& dashboard_name) {
162 extern std::unique_ptr<std::string> g_libgeos_so_filename;
166 const std::vector<LeafHostInfo>& string_leaves,
167 const std::string& base_data_path,
168 const bool allow_multifrag,
169 const bool jit_debug,
170 const bool intel_jit_profile,
171 const bool read_only,
172 const bool allow_loop_joins,
173 const bool enable_rendering,
174 const bool renderer_use_ppll_polys,
175 const bool renderer_prefer_igpu,
176 const unsigned renderer_vulkan_timeout_ms,
177 const bool renderer_use_parallel_executors,
178 const bool enable_auto_clear_render_mem,
179 const int render_oom_retry_threshold,
180 const size_t render_mem_bytes,
181 const size_t max_concurrent_render_sessions,
182 const size_t reserved_gpu_mem,
183 const bool render_compositor_use_last_gpu,
184 const size_t num_reader_threads,
187 const bool legacy_syntax,
188 const int idle_session_duration,
189 const int max_session_duration,
190 const std::string& udf_filename,
191 const std::string& clang_path,
192 const std::vector<std::string>& clang_options,
194 const std::string& libgeos_so_filename,
197 const bool is_new_db)
198 : leaf_aggregator_(db_leaves)
199 , db_leaves_(db_leaves)
200 , string_leaves_(string_leaves)
201 , base_data_path_(base_data_path)
202 , random_gen_(std::random_device{}())
203 , session_id_dist_(0, INT32_MAX)
204 , jit_debug_(jit_debug)
205 , intel_jit_profile_(intel_jit_profile)
206 , allow_multifrag_(allow_multifrag)
207 , read_only_(read_only)
208 , allow_loop_joins_(allow_loop_joins)
209 , authMetadata_(authMetadata)
210 , system_parameters_(system_parameters)
211 , legacy_syntax_(legacy_syntax)
213 std::make_unique<QueryDispatchQueue>(system_parameters.num_executors))
214 , super_user_rights_(
false)
215 , idle_session_duration_(idle_session_duration * 60)
216 , max_session_duration_(max_session_duration * 60)
217 , enable_rendering_(enable_rendering)
218 , renderer_use_ppll_polys_(renderer_use_ppll_polys)
219 , renderer_prefer_igpu_(renderer_prefer_igpu)
220 , renderer_vulkan_timeout_(renderer_vulkan_timeout_ms)
221 , renderer_use_parallel_executors_(renderer_use_parallel_executors)
222 , enable_auto_clear_render_mem_(enable_auto_clear_render_mem)
223 , render_oom_retry_threshold_(render_oom_retry_threshold)
224 , max_concurrent_render_sessions_(max_concurrent_render_sessions)
225 , reserved_gpu_mem_(reserved_gpu_mem)
226 , render_compositor_use_last_gpu_(render_compositor_use_last_gpu)
227 , render_mem_bytes_(render_mem_bytes)
228 , num_reader_threads_(num_reader_threads)
230 , libgeos_so_filename_(libgeos_so_filename)
232 , disk_cache_config_(disk_cache_config)
233 , udf_filename_(udf_filename)
234 , clang_path_(clang_path)
235 , clang_options_(clang_options)
236 , max_num_sessions_(-1) {
238 initialize(is_new_db);
239 resetSessionsStore();
246 for (
auto session : sessions) {
264 "Server already initialized; service restart required to activate any new "
278 LOG(
WARNING) <<
"This build isn't CUDA enabled, will run on CPU";
285 is_rendering_enabled =
false;
293 if (is_rendering_enabled) {
297 std::unique_ptr<CudaMgr_Namespace::CudaMgr> cuda_mgr;
301 cuda_mgr = std::make_unique<CudaMgr_Namespace::CudaMgr>(
303 }
catch (
const std::exception& e) {
304 LOG(
ERROR) <<
"Unable to instantiate CudaMgr, falling back to CPU-only mode. "
308 is_rendering_enabled =
false;
321 }
catch (
const std::exception& e) {
322 LOG(
FATAL) <<
"Failed to initialize data manager: " << e.what();
325 std::string udf_ast_filename(
"");
329 const auto cuda_mgr =
data_mgr_->getCudaMgr();
331 cuda_mgr ? cuda_mgr->getDeviceArch()
337 if (!cuda_udf_ir_file.empty()) {
342 }
catch (
const std::exception& e) {
343 LOG(
FATAL) <<
"Failed to initialize UDF compiler: " << e.what();
349 }
catch (
const std::exception& e) {
350 LOG(
FATAL) <<
"Failed to initialize Calcite server: " << e.what();
358 }
catch (
const std::exception& e) {
359 LOG(
FATAL) <<
"Failed to initialize extension functions: " << e.what();
364 }
catch (
const std::exception& e) {
365 LOG(
FATAL) <<
"Failed to initialize table functions factory: " << e.what();
371 std::vector<TUserDefinedFunction> udfs = {};
372 calcite_->setRuntimeExtensionFunctions(udfs, udtfs,
false);
373 }
catch (
const std::exception& e) {
374 LOG(
FATAL) <<
"Failed to register compile-time table functions: " << e.what();
379 LOG(
ERROR) <<
"No GPUs detected, falling back to CPU mode";
394 }
catch (
const std::exception& e) {
395 LOG(
FATAL) <<
"Failed to initialize system catalog: " << e.what();
401 if (is_rendering_enabled) {
414 }
catch (
const std::exception& e) {
415 LOG(
ERROR) <<
"Backend rendering disabled: " << e.what();
422 if (!libgeos_so_filename_.empty()) {
423 g_libgeos_so_filename.reset(
new std::string(libgeos_so_filename_));
424 LOG(
INFO) <<
"Overriding default geos library with '" + *g_libgeos_so_filename +
"'";
440 const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr) {
447 std::string session_id;
452 calcite_->getInternalSessionProxyUserName(),
453 calcite_->getInternalSessionProxyPassword(),
460 std::make_shared<Catalog_Namespace::SessionInfo>(
462 CHECK(emplace_ret.second);
474 const std::string& username,
475 const std::string& dbname) {
478 std::string username2 = username;
479 std::string dbname2 = dbname;
481 std::shared_ptr<Catalog>
cat =
nullptr;
484 SysCatalog::instance().login(dbname2, username2, std::string(), user_meta,
false);
485 }
catch (std::exception& e) {
492 std::vector<DBObject> dbObjects;
493 dbObjects.push_back(dbObject);
494 if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
496 " is not allowed to access database " + dbname2 +
".");
498 connect_impl(session_id, std::string(), dbname2, user_meta, cat, stdlog);
506 const std::string& inputToken,
507 const std::string& dbname) {
512 const std::string& username,
513 const std::string& passwd,
514 const std::string& dbname) {
518 std::string username2 = username;
519 std::string dbname2 = dbname;
521 std::shared_ptr<Catalog>
cat =
nullptr;
523 cat = SysCatalog::instance().login(
525 }
catch (std::exception& e) {
526 stdlog.appendNameValuePairs(
"user", username,
"db", dbname,
"exception", e.what());
533 std::vector<DBObject> dbObjects;
534 dbObjects.push_back(dbObject);
535 if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
536 stdlog.appendNameValuePairs(
537 "user", username,
"db", dbname,
"exception",
"Missing Privileges");
539 " is not allowed to access database " + dbname2 +
".");
541 connect_impl(session_id, passwd, dbname2, user_meta, cat, stdlog);
544 SysCatalog::instance().check_for_session_encryption(passwd, session_id);
548 const std::string& passwd,
549 const std::string& dbname,
551 std::shared_ptr<Catalog>
cat,
557 session_id = session_ptr->get_session_id();
566 ? std::vector<std::string>{{
"super"}}
567 : SysCatalog::instance().getRoles(
581 const auto session_id = session_ptr->get_session_id();
582 std::exception_ptr leaf_exception =
nullptr;
588 leaf_exception = std::current_exception();
600 if (leaf_exception) {
601 std::rethrow_exception(leaf_exception);
606 const std::string& dbname) {
610 auto stdlog =
STDLOG(session_ptr);
612 std::string dbname2 = dbname;
614 std::shared_ptr<Catalog>
cat = SysCatalog::instance().switchDatabase(
615 dbname2, session_ptr->get_currentUser().userName);
616 session_ptr->set_catalog_ptr(cat);
621 }
catch (std::exception& e) {
627 const TSessionId& session1_id_or_json) {
631 auto stdlog =
STDLOG(session1_ptr);
636 std::shared_ptr<Catalog>
cat = session1_ptr->get_catalog_ptr();
638 session2_id = session2_ptr->get_session_id();
645 }
catch (std::exception& e) {
651 const TSessionId& interrupt_session_id_or_json) {
660 auto&
cat = session_ptr->getCatalog();
661 auto stdlog =
STDLOG(session_ptr);
663 const auto allow_query_interrupt =
666 const auto dbname = cat.getCurrentDB().dbName;
677 auto target_executor_ids =
678 executor->getExecutorIdsRunningQuery(query_request_info.
sessionId());
679 if (target_executor_ids.empty()) {
681 executor->getSessionLock());
682 if (executor->checkIsQuerySessionEnrolled(query_request_info.
sessionId(),
683 session_read_lock)) {
684 session_read_lock.unlock();
685 VLOG(1) <<
"Received interrupt: "
686 <<
"User " << session_ptr->get_currentUser().userLoggable()
687 <<
", Database " << dbname << std::endl;
688 executor->interrupt(query_request_info.
sessionId(),
692 for (
auto& executor_id : target_executor_ids) {
693 VLOG(1) <<
"Received interrupt: "
694 <<
"Executor " << executor_id <<
", User "
695 << session_ptr->get_currentUser().userLoggable() <<
", Database "
696 << dbname << std::endl;
698 target_executor->interrupt(query_request_info.
sessionId(),
703 LOG(
INFO) <<
"User " << session_ptr->get_currentUser().userName
704 <<
" interrupted session with database " << dbname << std::endl;
711 return TRole::type::AGGREGATOR;
713 return TRole::type::LEAF;
715 return TRole::type::SERVER;
718 const TSessionId& session_id_or_json) {
726 _return.rendering_enabled = rendering_enabled;
730 _return.poly_rendering_enabled = rendering_enabled;
732 _return.renderer_status_json =
737 const TSessionId& session_id_or_json) {
757 LOG(
INFO) <<
"get_status() called in session-less mode";
763 ret.rendering_enabled = rendering_enabled;
767 ret.poly_rendering_enabled = rendering_enabled;
769 ret.renderer_status_json =
772 _return.push_back(ret);
774 std::vector<TServerStatus> leaf_status =
776 _return.insert(_return.end(), leaf_status.begin(), leaf_status.end());
781 const TSessionId& session_id_or_json) {
787 const auto cuda_mgr =
data_mgr_->getCudaMgr();
789 ret.num_gpu_hw = cuda_mgr->getDeviceCount();
790 ret.start_gpu = cuda_mgr->getStartGpu();
791 if (ret.start_gpu >= 0) {
792 ret.num_gpu_allocated = cuda_mgr->getDeviceCount() - cuda_mgr->getStartGpu();
795 for (int16_t device_id = 0; device_id < ret.num_gpu_hw; device_id++) {
796 TGpuSpecification gpu_spec;
797 auto deviceProperties = cuda_mgr->getDeviceProperties(device_id);
798 gpu_spec.num_sm = deviceProperties->numMPs;
799 gpu_spec.clock_frequency_kHz = deviceProperties->clockKhz;
800 gpu_spec.memory = deviceProperties->globalMem;
801 gpu_spec.compute_capability_major = deviceProperties->computeMajor;
802 gpu_spec.compute_capability_minor = deviceProperties->computeMinor;
803 ret.gpu_info.push_back(gpu_spec);
808 ret.num_cpu_hw = std::thread::hardware_concurrency();
812 _return.hardware_info.push_back(ret);
816 const TSessionId& session_id_or_json) {
821 auto stdlog =
STDLOG(session_ptr);
823 auto user_metadata = session_ptr->get_currentUser();
824 _return.user = user_metadata.userName;
825 _return.database = session_ptr->getCatalog().getCurrentDB().dbName;
826 _return.start_time = session_ptr->get_start_time();
827 _return.is_super = user_metadata.isSuper;
840 const auto array_tv = boost::get<ArrayTargetValue>(&tv);
842 bool is_null = !array_tv->is_initialized();
844 const auto& vec = array_tv->get();
845 for (
const auto& elem_tv : vec) {
849 column.data.arr_col.push_back(tColumn);
850 column.nulls.push_back(is_null && !ti.
get_notnull());
852 const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
854 auto s_n = boost::get<NullableString>(scalar_tv);
855 auto s = boost::get<std::string>(s_n);
857 column.data.str_col.push_back(*s);
859 column.data.str_col.emplace_back(
"");
860 auto null_p = boost::get<void*>(s_n);
861 CHECK(null_p && !*null_p);
865 const auto array_tv = boost::get<ArrayTargetValue>(&tv);
867 bool is_null = !array_tv->is_initialized();
871 const auto& vec = array_tv->get();
872 for (
const auto& elem_tv : vec) {
875 column.data.arr_col.push_back(tColumn);
876 column.nulls.push_back(
false);
879 column.data.arr_col.push_back(tColumn);
880 column.nulls.push_back(is_null && !ti.
get_notnull());
885 const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
887 if (boost::get<int64_t>(scalar_tv)) {
888 int64_t data = *(boost::get<int64_t>(scalar_tv));
891 double val =
static_cast<double>(data);
893 val /= pow(10.0, std::abs(ti.
get_scale()));
895 column.data.real_col.push_back(val);
897 column.data.int_col.push_back(data);
926 column.nulls.push_back(
false);
928 }
else if (boost::get<double>(scalar_tv)) {
929 double data = *(boost::get<double>(scalar_tv));
930 column.data.real_col.push_back(data);
936 }
else if (boost::get<float>(scalar_tv)) {
938 float data = *(boost::get<float>(scalar_tv));
939 column.data.real_col.push_back(data);
941 }
else if (boost::get<NullableString>(scalar_tv)) {
942 auto s_n = boost::get<NullableString>(scalar_tv);
943 auto s = boost::get<std::string>(s_n);
945 column.data.str_col.push_back(*s);
947 column.data.str_col.emplace_back(
"");
948 auto null_p = boost::get<void*>(s_n);
949 CHECK(null_p && !*null_p);
960 const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
963 const auto array_tv = boost::get<ArrayTargetValue>(&tv);
965 if (array_tv->is_initialized()) {
966 const auto& vec = array_tv->get();
967 for (
const auto& elem_tv : vec) {
969 datum.val.arr_val.push_back(scalar_col_val);
972 datum.is_null =
false;
974 datum.is_null =
true;
978 if (boost::get<int64_t>(scalar_tv)) {
979 int64_t data = *(boost::get<int64_t>(scalar_tv));
982 double val =
static_cast<double>(data);
984 val /= pow(10.0, std::abs(ti.
get_scale()));
986 datum.val.real_val = val;
988 datum.val.int_val = data;
1002 datum.is_null = (datum.val.int_val ==
NULL_INT);
1007 datum.is_null = (datum.val.int_val ==
NULL_BIGINT);
1014 datum.is_null = (datum.val.int_val ==
NULL_BIGINT);
1017 datum.is_null =
false;
1019 }
else if (boost::get<double>(scalar_tv)) {
1020 datum.val.real_val = *(boost::get<double>(scalar_tv));
1022 datum.is_null = (datum.val.real_val ==
NULL_FLOAT);
1024 datum.is_null = (datum.val.real_val ==
NULL_DOUBLE);
1026 }
else if (boost::get<float>(scalar_tv)) {
1028 datum.val.real_val = *(boost::get<float>(scalar_tv));
1029 datum.is_null = (datum.val.real_val ==
NULL_FLOAT);
1030 }
else if (boost::get<NullableString>(scalar_tv)) {
1031 auto s_n = boost::get<NullableString>(scalar_tv);
1032 auto s = boost::get<std::string>(s_n);
1034 datum.val.str_val = *s;
1036 auto null_p = boost::get<void*>(s_n);
1037 CHECK(null_p && !*null_p);
1047 TQueryResult& _return,
1049 const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
1050 const std::string& query_str,
1051 const bool column_format,
1052 const std::string& nonce,
1053 const int32_t first_n,
1054 const int32_t at_most_n,
1055 const bool use_calcite) {
1056 _return.total_time_ms = 0;
1057 _return.nonce = nonce;
1059 switch (pw.getQueryType()) {
1061 _return.query_type = TQueryType::READ;
1062 VLOG(1) <<
"query type: READ";
1066 _return.query_type = TQueryType::WRITE;
1067 VLOG(1) <<
"query type: WRITE";
1071 _return.query_type = TQueryType::SCHEMA_READ;
1072 VLOG(1) <<
"query type: SCHEMA READ";
1076 _return.query_type = TQueryType::SCHEMA_WRITE;
1077 VLOG(1) <<
"query type: SCHEMA WRITE";
1081 _return.query_type = TQueryType::UNKNOWN;
1093 session_ptr->get_executor_device_type(),
1099 _return, result, query_state_proxy, column_format, first_n, at_most_n);
1106 const bool column_format,
1107 const int32_t first_n,
1108 const int32_t at_most_n) {
1110 if (result.
empty()) {
1143 const TSessionId& session_id_or_json,
1144 const std::string& query_str,
1145 const bool column_format,
1146 const std::string& nonce,
1147 const int32_t first_n,
1148 const int32_t at_most_n) {
1151 const std::string exec_ra_prefix =
"execute relalg";
1152 const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1154 use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1157 auto stdlog =
STDLOG(session_ptr, query_state);
1159 stdlog.appendNameValuePairs(
"nonce", nonce);
1162 ScopeGuard reset_was_deferred_copy_from = [
this, &session_ptr] {
1166 if (first_n >= 0 && at_most_n >= 0) {
1176 query_state->createQueryStateProxy(),
1177 query_state->getQueryStr(),
1184 _return.nonce = nonce;
1187 query_state->createQueryStateProxy(),
1197 std::string debug_json = timer.stopAndGetJson();
1198 if (!debug_json.empty()) {
1199 _return.__set_debug(std::move(debug_json));
1201 stdlog.appendNameValuePairs(
1202 "execution_time_ms",
1203 _return.execution_time_ms,
1205 stdlog.duration<std::chrono::milliseconds>());
1208 }
catch (
const std::exception& e) {
1209 if (strstr(e.what(),
"java.lang.NullPointerException")) {
1211 }
else if (strstr(e.what(),
"SQL Error: Encountered \";\"")) {
1213 }
else if (strstr(e.what(),
"SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1222 const TSessionId& session_id_or_json,
1223 const std::string& query_str,
1224 const bool column_format,
1225 const int32_t first_n,
1226 const int32_t at_most_n,
1230 const std::string exec_ra_prefix =
"execute relalg";
1231 const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1233 use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1238 auto stdlog =
STDLOG(session_ptr, query_state);
1242 ScopeGuard reset_was_deferred_copy_from = [
this, &session_ptr] {
1246 if (first_n >= 0 && at_most_n >= 0) {
1251 query_state->createQueryStateProxy(),
1253 session_ptr->get_executor_device_type(),
1263 stdlog.appendNameValuePairs(
1264 "execution_time_ms",
1267 stdlog.duration<std::chrono::milliseconds>());
1270 }
catch (
const std::exception& e) {
1271 if (strstr(e.what(),
"java.lang.NullPointerException")) {
1273 }
else if (strstr(e.what(),
"SQL Error: Encountered \";\"")) {
1275 }
else if (strstr(e.what(),
"SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1284 int64_t total_time_ms(0);
1296 TCreateParams create_params;
1297 if (deferred_copy_from_state->partitions ==
"REPLICATED") {
1298 create_params.is_replicated =
true;
1304 deferred_copy_from_state->table,
1305 deferred_copy_from_state->file_name,
1306 deferred_copy_from_state->copy_params,
1311 return total_time_ms;
1315 const TSessionId& session_id_or_json,
1316 const std::string& query_str,
1318 const int32_t device_id,
1319 const int32_t first_n,
1326 auto stdlog =
STDLOG(session_ptr, query_state);
1328 const auto executor_device_type = session_ptr->get_executor_device_type();
1330 if (results_device_type == TDeviceType::GPU) {
1337 if (device_id < 0 || device_id >=
data_mgr_->getCudaMgr()->getDeviceCount()) {
1339 std::string(
"Invalid device_id or unavailable GPU with this ID"));
1345 "Only read queries supported for the Arrow sql_execute_df endpoint."));
1349 "Explain is currently unsupported by the Arrow sql_execute_df endpoint."));
1355 query_state->createQueryStateProxy(),
1357 executor_device_type,
1363 const auto result_set = execution_result.
getRows();
1364 const auto executor_results_device_type = results_device_type == TDeviceType::CPU
1367 _return.execution_time_ms =
1369 const auto converter = std::make_unique<ArrowResultSetConverter>(
1372 executor_results_device_type,
1378 _return.arrow_conversion_time_ms +=
1381 std::string(arrow_result.sm_handle.begin(), arrow_result.sm_handle.end());
1382 _return.sm_size = arrow_result.sm_size;
1384 std::string(arrow_result.df_handle.begin(), arrow_result.df_handle.end());
1386 std::string(arrow_result.df_buffer.begin(), arrow_result.df_buffer.end());
1391 std::make_pair(_return.df_handle, arrow_result.serialized_cuda_handle));
1393 _return.df_size = arrow_result.df_size;
1397 const TSessionId& session_id_or_json,
1398 const std::string& query_str,
1399 const int32_t device_id,
1400 const int32_t first_n) {
1406 request_info.
json(),
1411 TArrowTransport::SHARED_MEMORY);
1416 const TDataFrame& df,
1418 const int32_t device_id) {
1422 std::string serialized_cuda_handle =
"";
1423 if (device_type == TDeviceType::GPU) {
1427 ex.error_msg = std::string(
1428 "Current data frame handle is not bookkept or been inserted "
1436 std::vector<char> sm_handle(df.sm_handle.begin(), df.sm_handle.end());
1437 std::vector<char> df_handle(df.df_handle.begin(), df.df_handle.end());
1439 sm_handle, df.sm_size, df_handle, df.df_size, serialized_cuda_handle};
1448 const TSessionId& session_id_or_json,
1449 const std::string& query_str) {
1456 stdlog.setQueryState(query_state);
1459 if (
ExplainInfo(query_str).isExplain() || pw.is_ddl || pw.is_update_dml) {
1460 throw std::runtime_error(
"Can only validate SELECT statements.");
1463 const auto execute_read_lock =
1468 TPlanResult parse_result;
1470 std::tie(parse_result, locks) =
parse_to_ra(query_state->createQueryStateProxy(),
1471 query_state->getQueryStr(),
1476 const auto query_ra = parse_result.plan_result;
1480 query_state->getConstSessionInfo()->getCatalog());
1481 }
catch (
const std::exception& e) {
1497 const std::string& sql) {
1498 boost::regex id_regex{R
"(([[:alnum:]]|_|\.)+)",
1499 boost::regex::extended | boost::regex::icase};
1500 boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1501 boost::sregex_token_iterator end;
1502 std::unordered_set<std::string> uc_column_names;
1503 std::unordered_set<std::string> uc_column_table_qualifiers;
1504 for (; tok_it != end; ++tok_it) {
1505 std::string column_name = *tok_it;
1506 std::vector<std::string> column_tokens;
1507 boost::split(column_tokens, column_name, boost::is_any_of(
"."));
1508 if (column_tokens.size() == 2) {
1510 uc_column_table_qualifiers.insert(
to_upper(column_tokens.front()));
1512 uc_column_names.insert(
to_upper(column_name));
1515 return {uc_column_names, uc_column_table_qualifiers};
1521 const TSessionId& session_id_or_json,
1522 const std::string& sql,
1527 std::vector<std::string> visible_tables;
1531 proj_tokens.uc_column_names, visible_tables, stdlog);
1533 compatible_table_names.insert(proj_tokens.uc_column_table_qualifiers.begin(),
1534 proj_tokens.uc_column_table_qualifiers.end());
1539 [&compatible_table_names](
const TCompletionHint& lhs,
const TCompletionHint& rhs) {
1540 if (lhs.type == TCompletionHintType::TABLE &&
1541 rhs.type == TCompletionHintType::TABLE) {
1544 if (compatible_table_names.find(
to_upper(lhs.hints.back())) !=
1545 compatible_table_names.end() &&
1546 compatible_table_names.find(
to_upper(rhs.hints.back())) ==
1547 compatible_table_names.end()) {
1551 return lhs.type < rhs.type;
1556 std::vector<std::string>& visible_tables,
1558 const std::string& sql,
1566 calcite_->getCompletionHints(session_info, visible_tables, sql, cursor));
1567 }
catch (
const std::exception& e) {
1569 ex.error_msg = std::string(e.what());
1573 boost::regex from_expr{R
"(\s+from\s+)", boost::regex::extended | boost::regex::icase};
1574 const size_t length_to_cursor =
1575 cursor < 0 ? sql.size() : std::min(sql.size(),
static_cast<size_t>(cursor));
1577 if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, from_expr)) {
1586 std::vector<std::string>& visible_tables,
1587 const std::string& sql,
1589 const auto last_word =
1591 boost::regex select_expr{R
"(\s*select\s+)",
1592 boost::regex::extended | boost::regex::icase};
1593 const size_t length_to_cursor =
1594 cursor < 0 ? sql.size() : std::min(sql.size(),
static_cast<size_t>(cursor));
1597 if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, select_expr)) {
1608 const std::string kFromKeyword{
"FROM"};
1609 if (boost::istarts_with(kFromKeyword, last_word)) {
1610 TCompletionHint keyword_hint;
1611 keyword_hint.type = TCompletionHintType::KEYWORD;
1612 keyword_hint.replaced = last_word;
1613 keyword_hint.hints.emplace_back(kFromKeyword);
1614 hints.push_back(keyword_hint);
1617 const std::string kSelectKeyword{
"SELECT"};
1618 if (boost::istarts_with(kSelectKeyword, last_word)) {
1619 TCompletionHint keyword_hint;
1620 keyword_hint.type = TCompletionHintType::KEYWORD;
1621 keyword_hint.replaced = last_word;
1622 keyword_hint.hints.emplace_back(kSelectKeyword);
1623 hints.push_back(keyword_hint);
1628 std::unordered_map<std::string, std::unordered_set<std::string>>
1631 std::unordered_map<std::string, std::unordered_set<std::string>> column_names_by_table;
1632 for (
auto it = table_names.begin(); it != table_names.end();) {
1633 TTableDetails table_details;
1636 }
catch (
const TDBException& e) {
1638 it = table_names.erase(it);
1641 for (
const auto& column_type : table_details.row_desc) {
1642 column_names_by_table[*it].emplace(column_type.col_name);
1646 return column_names_by_table;
1655 const std::unordered_set<std::string>& uc_column_names,
1656 std::vector<std::string>& table_names,
1658 std::unordered_set<std::string> compatible_table_names_by_column;
1659 for (
auto it = table_names.begin(); it != table_names.end();) {
1660 TTableDetails table_details;
1663 }
catch (
const TDBException& e) {
1665 it = table_names.erase(it);
1668 for (
const auto& column_type : table_details.row_desc) {
1669 if (uc_column_names.find(
to_upper(column_type.col_name)) != uc_column_names.end()) {
1670 compatible_table_names_by_column.emplace(
to_upper(*it));
1676 return compatible_table_names_by_column;
1680 const bool is_update_delete) {
1687 TQueryResult _return;
1689 auto execute_rel_alg_task = std::make_shared<QueryDispatchQueue::Task>(
1694 parent_thread_local_ids =
1710 auto result_future = execute_rel_alg_task->get_future();
1711 result_future.get();
1717 const TSessionId& session_id_or_json) {
1721 auto session_ptr = stdlog.getConstSessionInfo();
1722 if (!session_ptr->get_currentUser().isSuper) {
1726 SysCatalog::instance().getRoles(session_ptr->get_currentUser().userName,
1727 session_ptr->getCatalog().getCurrentDB().dbId);
1729 roles = SysCatalog::instance().getRoles(
1730 false,
true, session_ptr->get_currentUser().userName);
1735 const std::string& granteeName,
1736 const std::string& roleName) {
1740 const auto stdlog =
STDLOG(session_ptr);
1741 const auto current_user = session_ptr->get_currentUser();
1742 if (!current_user.isSuper) {
1743 if (
const auto* user = SysCatalog::instance().getUserGrantee(granteeName);
1744 user && current_user.userName != granteeName) {
1746 }
else if (!SysCatalog::instance().isRoleGrantedToGrantee(
1747 current_user.userName, granteeName,
true)) {
1749 "Only super users can check roles assignment that have not been directly "
1750 "granted to a user.");
1753 return SysCatalog::instance().isRoleGrantedToGrantee(granteeName, roleName,
false);
1758 TDBObject outObject;
1759 outObject.objectName = inObject.
getName();
1760 outObject.grantee = roleName;
1813 const int type_val =
static_cast<int>(inObject.
getType());
1814 CHECK(type_val >= 0 && type_val < 6);
1820 const TDBObjectPermissions& permissions) {
1821 if (!permissions.__isset.database_permissions_) {
1824 auto perms = permissions.database_permissions_;
1827 (perms.view_sql_editor_ &&
1837 const TDBObjectPermissions& permissions) {
1838 if (!permissions.__isset.table_permissions_) {
1841 auto perms = permissions.table_permissions_;
1857 const TDBObjectPermissions& permissions) {
1858 if (!permissions.__isset.dashboard_permissions_) {
1861 auto perms = permissions.dashboard_permissions_;
1873 const TDBObjectPermissions& permissions) {
1874 if (!permissions.__isset.view_permissions_) {
1877 auto perms = permissions.view_permissions_;
1891 const TDBObjectPermissions& permissions) {
1892 CHECK(permissions.__isset.server_permissions_);
1893 auto perms = permissions.server_permissions_;
1905 const std::string& granteeName,
1906 const std::string& objectName,
1908 const TDBObjectPermissions& permissions) {
1912 auto stdlog =
STDLOG(session_ptr);
1913 auto const&
cat = session_ptr->getCatalog();
1914 auto const& current_user = session_ptr->get_currentUser();
1915 if (!current_user.isSuper && !SysCatalog::instance().isRoleGrantedToGrantee(
1916 current_user.userName, granteeName,
false)) {
1918 "Users except superusers can only check privileges for self or roles granted "
1923 if (SysCatalog::instance().getMetadataForUser(granteeName, user_meta) &&
1927 Grantee* grnt = SysCatalog::instance().getGrantee(granteeName);
1932 std::string func_name;
1933 switch (objectType) {
1936 func_name =
"database";
1940 func_name =
"table";
1944 func_name =
"dashboard";
1952 func_name =
"server";
1957 DBObject req_object(objectName, type);
1961 if (grantee_object) {
1971 const TSessionId& session_id_or_json,
1972 const std::string& roleName) {
1976 auto stdlog =
STDLOG(session_ptr);
1977 auto const& user = session_ptr->get_currentUser();
1978 if (!user.isSuper &&
1979 !SysCatalog::instance().isRoleGrantedToGrantee(user.userName, roleName,
false)) {
1982 auto* rl = SysCatalog::instance().getGrantee(roleName);
1984 auto dbId = session_ptr->getCatalog().getCurrentDB().dbId;
1985 for (
auto& dbObject : *rl->getDbObjects(
true)) {
1986 if (dbObject.first.dbId != dbId) {
1992 TDBObjectsForRole.push_back(tdbObject);
2000 const TSessionId& session_id_or_json,
2001 const std::string& objectName,
2006 auto stdlog =
STDLOG(session_ptr);
2007 const auto&
cat = session_ptr->getCatalog();
2029 DBObject object_to_find(objectName, object_type);
2034 if (objectName ==
"") {
2035 object_to_find =
DBObject(-1, object_type);
2037 object_to_find =
DBObject(std::stoi(objectName), object_type);
2040 !objectName.empty()) {
2042 auto td =
cat.getMetadataForTable(objectName,
false);
2045 object_to_find =
DBObject(objectName, object_type);
2049 }
catch (
const std::exception&) {
2054 DBObject object_to_find_dblevel(
"", object_type);
2057 if (session_ptr->get_currentUser().isSuper) {
2061 session_ptr->get_currentUser().userId};
2062 dbObj.setName(
"super");
2063 TDBObjects.push_back(
2067 std::vector<std::string> grantees =
2068 SysCatalog::instance().getRoles(
true,
2069 session_ptr->get_currentUser().isSuper,
2070 session_ptr->get_currentUser().userName);
2071 for (
const auto& grantee : grantees) {
2073 auto* gr = SysCatalog::instance().getGrantee(grantee);
2074 if (gr && (object_found = gr->findDbObject(object_to_find.
getObjectKey(),
true))) {
2079 (object_found = gr->findDbObject(object_to_find_dblevel.
getObjectKey(),
true))) {
2086 std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
2087 std::vector<std::string>& roles,
2088 const std::string& granteeName,
2090 auto* grantee = SysCatalog::instance().getGrantee(granteeName);
2092 if (session_ptr->get_currentUser().isSuper) {
2093 roles = grantee->getRoles(!effective);
2094 }
else if (grantee->isUser()) {
2095 if (session_ptr->get_currentUser().userName == granteeName) {
2096 roles = grantee->getRoles(!effective);
2099 "Only a superuser is authorized to request list of roles granted to another "
2103 CHECK(!grantee->isUser());
2106 if (SysCatalog::instance().isRoleGrantedToGrantee(
2107 session_ptr->get_currentUser().userName, granteeName,
false)) {
2108 roles = grantee->getRoles(!effective);
2119 const TSessionId& session_id_or_json,
2120 const std::string& granteeName) {
2126 auto session_ptr = stdlog.getConstSessionInfo();
2131 const TSessionId& session_id_or_json,
2132 const std::string& granteeName) {
2136 auto session_ptr = stdlog.getConstSessionInfo();
2142 const std::map<std::string, std::vector<std::string>>& table_col_names) {
2143 std::ostringstream oss;
2144 for (
const auto& [table_name, col_names] : table_col_names) {
2145 oss <<
":" << table_name;
2146 for (
const auto& col_name : col_names) {
2147 oss <<
"," << col_name;
2155 TPixelTableRowResult& _return,
2156 const TSessionId& session_id_or_json,
2157 const int64_t widget_id,
2158 const TPixel& pixel,
2159 const std::map<std::string, std::vector<std::string>>& table_col_names,
2160 const bool column_format,
2161 const int32_t pixel_radius,
2162 const std::string& nonce) {
2166 auto stdlog =
STDLOG(session_ptr,
2195 }
catch (std::exception& e) {
2202 TColumnType col_type;
2226 col_type.col_type.comp_param = 0;
2233 col_type.col_type.comp_param = dd->dictNBits;
2235 col_type.col_type.comp_param =
2248 const TSessionId& session_id_or_json,
2249 const std::string& table_name,
2250 const bool include_system_columns) {
2260 TTableDetails& _return,
2261 const TSessionId& session_id_or_json,
2262 const std::string& table_name,
2263 const std::string& database_name) {
2273 const TSessionId& session_id_or_json,
2274 const std::string& table_name) {
2281 auto execute_read_lock =
2289 const TSessionId& session_id_or_json,
2290 const std::string& table_name,
2291 const std::string& database_name) {
2298 auto execute_read_lock =
2309 CHECK(foreign_table);
2310 TTableRefreshInfo refresh_info;
2311 const auto& update_type =
2313 CHECK(update_type.has_value());
2316 }
else if (update_type.value() ==
2318 refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2320 UNREACHABLE() <<
"Unexpected refresh update type: " << update_type.value();
2323 const auto& timing_type =
2325 CHECK(timing_type.has_value());
2327 refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2328 refresh_info.interval_count = -1;
2329 }
else if (timing_type.value() ==
2331 refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2332 const auto& start_date_time = foreign_table->getOption(
2334 CHECK(start_date_time.has_value());
2335 auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2336 refresh_info.start_date_time =
2338 const auto& interval =
2340 CHECK(interval.has_value());
2341 const auto& interval_str = interval.value();
2342 refresh_info.interval_count =
2343 std::stoi(interval_str.substr(0, interval_str.length() - 1));
2344 auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2345 if (interval_type ==
'H') {
2346 refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2347 }
else if (interval_type ==
'D') {
2348 refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2349 }
else if (interval_type ==
'S') {
2351 refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2353 UNREACHABLE() <<
"Unexpected interval type: " << interval_str;
2356 UNREACHABLE() <<
"Unexpected refresh timing type: " << timing_type.value();
2358 if (foreign_table->last_refresh_time !=
2361 {
kTIMESTAMP}, foreign_table->last_refresh_time);
2363 if (foreign_table->next_refresh_time !=
2366 {
kTIMESTAMP}, foreign_table->next_refresh_time);
2368 return refresh_info;
2374 const std::string& table_name,
2375 const bool get_system,
2376 const bool get_physical,
2377 const std::string& database_name) {
2380 auto cat = (database_name.empty())
2381 ? &session_info->getCatalog()
2382 : SysCatalog::instance().getCatalog(database_name).get();
2386 const auto td_with_lock =
2388 *
cat, table_name,
false);
2389 const auto td = td_with_lock();
2392 bool have_privileges_on_view_sources =
true;
2398 const auto [query_ra, locks] =
parse_to_ra(query_state->createQueryStateProxy(),
2399 query_state->getQueryStr(),
2405 calcite_->checkAccessedObjectsPrivileges(query_state->createQueryStateProxy(),
2407 }
catch (
const std::runtime_error&) {
2408 have_privileges_on_view_sources =
false;
2412 query_state->createQueryStateProxy());
2416 throw std::runtime_error(
2417 "Unable to access view " + table_name +
2418 ". The view may not exist, or the logged in user may not "
2419 "have permission to access the view.");
2421 }
catch (
const std::exception& e) {
2422 throw std::runtime_error(
"View '" + table_name +
2423 "' query has failed with an error: '" +
2424 std::string(e.what()) +
2425 "'.\nThe view must be dropped and re-created to "
2426 "resolve the error. \nQuery:\n" +
2427 query_state->getQueryStr());
2431 const auto col_descriptors =
cat->getAllColumnMetadataForTable(
2432 td->tableId, get_system,
true, get_physical);
2433 const auto deleted_cd =
cat->getDeletedColumn(td);
2434 for (
const auto cd : col_descriptors) {
2435 if (cd == deleted_cd) {
2441 throw std::runtime_error(
2442 "Unable to access table " + table_name +
2443 ". The table may not exist, or the logged in user may not "
2444 "have permission to access the table.");
2447 _return.fragment_size = td->maxFragRows;
2448 _return.page_size = td->fragPageSize;
2449 _return.max_rows = td->maxRows;
2451 (have_privileges_on_view_sources ? td->viewSQL
2452 :
"[Not enough privileges to see the view SQL]");
2453 _return.shard_count = td->nShards;
2454 _return.key_metainfo = td->keyMetainfo;
2456 _return.partition_detail =
2457 td->partitions.empty()
2458 ? TPartitionDetail::DEFAULT
2460 ? TPartitionDetail::REPLICATED
2461 : (td->partitions ==
"SHARDED" ? TPartitionDetail::SHARDED
2462 : TPartitionDetail::OTHER));
2464 _return.table_type = TTableType::VIEW;
2465 }
else if (td->isTemporaryTable()) {
2466 _return.table_type = TTableType::TEMPORARY;
2467 }
else if (td->isForeignTable()) {
2468 _return.table_type = TTableType::FOREIGN;
2471 _return.table_type = TTableType::DEFAULT;
2474 }
catch (
const std::runtime_error& e) {
2480 const TSessionId& session_id_or_json,
2481 const std::string& link) {
2485 auto stdlog =
STDLOG(session_ptr);
2487 auto const&
cat = session_ptr->getCatalog();
2492 _return.view_state = ld->viewState;
2493 _return.view_name = ld->link;
2494 _return.update_time = ld->updateTime;
2495 _return.view_metadata = ld->viewMetadata;
2504 if (user_metadata.isSuper) {
2510 std::vector<DBObject> privObjects = {dbObject};
2512 return SysCatalog::instance().hasAnyPrivileges(user_metadata, privObjects);
2518 const std::string& database_name) {
2519 if (database_name.empty()) {
2523 auto request_cat = SysCatalog::instance().getCatalog(database_name);
2527 table_names = request_cat->getTableNamesForUser(session_info.
get_currentUser(),
2533 const TSessionId& session_id_or_json) {
2543 const TSessionId& session_id_or_json,
2544 const std::string& database_name) {
2551 *stdlog.getConstSessionInfo(),
2557 const TSessionId& session_id_or_json) {
2566 const TSessionId& session_id_or_json) {
2577 const bool with_table_locks) {
2581 const auto tables =
cat.getAllTableMetadataCopy();
2582 _return.reserve(
tables.size());
2584 for (
const auto& td :
tables) {
2585 if (td.shard >= 0) {
2595 ret.table_name = td.tableName;
2596 ret.is_view = td.isView;
2598 ret.shard_count = td.nShards;
2599 ret.max_rows = td.maxRows;
2600 ret.table_id = td.tableId;
2602 std::vector<TTypeInfo> col_types;
2603 std::vector<std::string> col_names;
2604 size_t num_cols = 0;
2607 TPlanResult parse_result;
2611 const auto query_ra = parse_result.plan_result;
2626 num_cols = result.row_set.row_desc.size();
2627 for (
const auto& col : result.row_set.row_desc) {
2628 if (col.is_physical) {
2632 col_types.push_back(col.col_type);
2633 col_names.push_back(col.col_name);
2635 }
catch (std::exception& e) {
2636 LOG(
WARNING) <<
"get_tables_meta: Ignoring broken view: " << td.tableName;
2641 const auto col_descriptors =
2642 cat.getAllColumnMetadataForTable(td.tableId,
false,
true,
false);
2643 const auto deleted_cd =
cat.getDeletedColumn(&td);
2644 for (
const auto cd : col_descriptors) {
2645 if (cd == deleted_cd) {
2649 col_names.push_back(cd->columnName);
2651 num_cols = col_descriptors.size();
2655 }
catch (
const std::runtime_error& e) {
2660 ret.num_cols = num_cols;
2661 std::copy(col_types.begin(), col_types.end(), std::back_inserter(ret.col_types));
2662 std::copy(col_names.begin(), col_names.end(), std::back_inserter(ret.col_names));
2664 _return.push_back(ret);
2669 const TSessionId& session_id_or_json) {
2674 auto session_ptr = stdlog.getConstSessionInfo();
2676 stdlog.setQueryState(query_state);
2678 auto execute_read_lock =
2685 }
catch (
const std::exception& e) {
2691 const TSessionId& session_id_or_json) {
2696 auto session_ptr = stdlog.getConstSessionInfo();
2697 std::list<Catalog_Namespace::UserMetadata> user_list;
2699 if (!session_ptr->get_currentUser().isSuper) {
2700 user_list = SysCatalog::instance().getAllUserMetadata(
2701 session_ptr->getCatalog().getCurrentDB().dbId);
2703 user_list = SysCatalog::instance().getAllUserMetadata();
2705 for (
auto u : user_list) {
2706 user_names.push_back(u.userName);
2719 auto session_ptr = stdlog.getConstSessionInfo();
2720 if (!session_ptr->get_currentUser().isSuper) {
2725 }
catch (
const std::exception& e) {
2737 auto session_ptr = stdlog.getConstSessionInfo();
2738 if (!session_ptr->get_currentUser().isSuper) {
2743 }
catch (
const std::exception& e) {
2756 auto session_ptr = stdlog.getConstSessionInfo();
2757 if (!session_ptr->get_currentUser().isSuper) {
2767 const TSessionId& leaf_session_id_or_json,
2768 const std::string& start_time_str,
2769 const std::string&
label,
2770 bool for_running_query_kernel) {
2777 auto session_ptr = stdlog.getConstSessionInfo();
2780 executor->enrollQuerySession(parent_request_info.
sessionId(),
2784 for_running_query_kernel
2785 ? QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL
2786 : QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
2790 const TSessionId& leaf_session_id_or_json,
2791 const std::string& start_time_str,
2792 const std::string&
label,
2793 bool for_running_query_kernel) {
2801 executor->clearQuerySessionStatus(parent_request_info.
sessionId(), start_time_str);
2809 const TSessionId& session_id_or_json,
2810 const std::string& memory_level) {
2815 std::vector<Data_Namespace::MemoryInfo> internal_memory;
2816 if (!memory_level.compare(
"gpu")) {
2824 for (
auto memInfo : internal_memory) {
2825 TNodeMemoryInfo nodeInfo;
2826 nodeInfo.page_size = memInfo.pageSize;
2827 nodeInfo.max_num_pages = memInfo.maxNumPages;
2828 nodeInfo.num_pages_allocated = memInfo.numPageAllocated;
2829 nodeInfo.is_allocation_capped = memInfo.isAllocationCapped;
2830 for (
auto gpu : memInfo.nodeMemoryData) {
2832 md.slab = gpu.slabNum;
2833 md.start_page = gpu.startPage;
2834 md.num_pages = gpu.numPages;
2835 md.touch = gpu.touch;
2836 md.chunk_key.insert(md.chunk_key.end(), gpu.chunk_key.begin(), gpu.chunk_key.end());
2838 nodeInfo.node_memory_data.push_back(md);
2840 _return.push_back(nodeInfo);
2845 const TSessionId& session_id_or_json) {
2850 auto session_ptr = stdlog.getConstSessionInfo();
2851 const auto& user = session_ptr->get_currentUser();
2853 SysCatalog::instance().getDatabaseListForUser(user);
2854 for (
auto& db : dbs) {
2856 dbinfo.db_name = std::move(db.dbName);
2857 dbinfo.db_owner = std::move(db.dbOwnerName);
2858 dbinfos.push_back(std::move(dbinfo));
2863 auto executor =
get_session_ptr(session_id)->get_executor_device_type();
2866 return TExecuteMode::CPU;
2868 return TExecuteMode::GPU;
2873 return TExecuteMode::CPU;
2880 auto stdlog =
STDLOG(session_ptr);
2889 throw std::runtime_error(
"Cannot import a sharded table directly to a leaf");
2894 const std::vector<std::string>& column_names) {
2895 std::unordered_set<std::string> unique_names;
2896 for (
const auto&
name : column_names) {
2898 if (unique_names.find(lower_name) != unique_names.end()) {
2901 unique_names.insert(lower_name);
2904 for (
const auto& cd : descs) {
2905 auto iter = unique_names.find(
to_lower(cd->columnName));
2906 if (iter != unique_names.end()) {
2907 unique_names.erase(iter);
2910 if (!unique_names.empty()) {
2920 const std::vector<std::string>& column_names) {
2921 std::vector<int> desc_to_column_ids;
2922 if (column_names.empty()) {
2924 for (
const auto& cd : descs) {
2925 if (!cd->isGeoPhyCol) {
2926 desc_to_column_ids.push_back(col_idx);
2931 for (
const auto& cd : descs) {
2932 if (!cd->isGeoPhyCol) {
2934 for (
size_t j = 0; j < column_names.size(); ++j) {
2937 desc_to_column_ids.push_back(j);
2942 if (!cd->columnType.get_notnull()) {
2943 desc_to_column_ids.push_back(-1);
2946 "' cannot be omitted due to NOT NULL constraint");
2952 return desc_to_column_ids;
2958 const TSessionId& session_id,
2960 std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
2964 const std::string& table_name,
2965 bool assign_render_groups) {
2966 auto geo_col_idx = col_idx - 1;
2967 const auto wkt_or_wkb_hex_column = import_buffers[geo_col_idx]->getGeoStringBuffer();
2968 std::vector<std::vector<double>> coords_column, bounds_column;
2969 std::vector<std::vector<int>> ring_sizes_column, poly_rings_column;
2970 std::vector<int> render_groups_column;
2972 if (num_rows != wkt_or_wkb_hex_column->size() ||
2980 std::ostringstream oss;
2981 oss <<
"Invalid geometry in column " << cd->
columnName;
2998 <<
"load_table_binary_columnar_polys: Creating Render Group Assignment "
2999 "Persistent Data for Session '"
3000 << session_id <<
"'";
3001 auto [itr_column, emplaced_column] =
3004 <<
"load_table_binary_columnar_polys: Creating Render Group Assignment "
3005 "Persistent Data for Table '"
3006 << table_name <<
"'";
3007 auto [itr_analyzer, emplaced_analyzer] = itr_column->second.try_emplace(
3008 cd->
columnName, std::make_unique<import_export::RenderGroupAnalyzer>());
3010 <<
"load_table_binary_columnar_polys: Creating Render Group Assignment "
3011 "Persistent Data for Column '"
3013 render_group_analyzer = itr_analyzer->second.get();
3014 CHECK(render_group_analyzer);
3017 if (emplaced_analyzer) {
3018 LOG(
INFO) <<
"load_table_binary_columnar_polys: Seeding Render Groups from "
3019 "existing table...";
3020 render_group_analyzer->seedFromExistingTableContents(
3022 LOG(
INFO) <<
"load_table_binary_columnar_polys: Done";
3027 LOG(
INFO) <<
"load_table_binary_columnar_polys: Assigning Render Groups...";
3028 render_groups_column.reserve(bounds_column.size());
3029 for (
auto const& bounds : bounds_column) {
3031 int rg = render_group_analyzer->insertBoundsAndReturnRenderGroup(bounds);
3032 render_groups_column.push_back(rg);
3034 LOG(
INFO) <<
"load_table_binary_columnar_polys: Done";
3037 render_groups_column.resize(bounds_column.size(), 0);
3049 render_groups_column);
3053 const TSessionId& session_id,
3055 std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
3056 const std::list<const ColumnDescriptor*>& cds,
3057 const std::vector<int>& desc_id_to_column_id,
3059 const std::string& table_name,
3060 bool assign_render_groups) {
3061 size_t skip_physical_cols = 0;
3062 size_t col_idx = 0, import_idx = 0;
3063 for (
const auto& cd : cds) {
3064 if (skip_physical_cols > 0) {
3065 CHECK(cd->isGeoPhyCol);
3066 skip_physical_cols--;
3068 }
else if (cd->columnType.is_geometry()) {
3069 skip_physical_cols = cd->columnType.get_physical_cols();
3071 if (desc_id_to_column_id[import_idx] == -1) {
3072 import_buffers[col_idx]->addDefaultValues(cd, num_rows);
3074 if (cd->columnType.is_geometry()) {
3082 assign_render_groups);
3086 col_idx += skip_physical_cols;
3093 const std::string& table_name,
3094 const std::vector<TRow>& rows,
3095 const std::vector<std::string>& column_names) {
3102 auto session_ptr = stdlog.getConstSessionInfo();
3108 const auto execute_read_lock =
3112 std::unique_ptr<import_export::Loader> loader;
3113 std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3116 rows.front().cols.size(),
3120 "load_table_binary");
3122 auto col_descs = loader->get_column_descs();
3125 size_t rows_completed = 0;
3126 for (
auto const& row : rows) {
3129 for (
auto cd : col_descs) {
3130 auto mapped_idx = desc_id_to_column_id[col_idx];
3131 if (mapped_idx != -1) {
3132 import_buffers[col_idx]->add_value(
3133 cd, row.cols[mapped_idx], row.cols[mapped_idx].is_null);
3138 }
catch (
const std::exception& e) {
3139 for (
size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
3140 import_buffers[col_idx_to_pop]->pop_value();
3142 LOG(
ERROR) <<
"Input exception thrown: " << e.what()
3143 <<
". Row discarded, issue at column : " << (col_idx + 1)
3144 <<
" data :" << row;
3148 session_ptr->getCatalog(),
3151 desc_id_to_column_id,
3156 session_ptr->getCatalog(), table_name);
3157 if (!loader->load(import_buffers, rows.size(), session_ptr.get())) {
3160 }
catch (
const std::exception& e) {
3165 std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
3168 const std::string& table_name,
3170 std::unique_ptr<import_export::Loader>* loader,
3171 std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
3172 const std::vector<std::string>& column_names,
3173 std::string load_type) {
3174 if (num_cols == 0) {
3180 std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3182 cat, table_name,
true));
3183 const auto td = (*td_with_lock)();
3194 auto col_descs = (*loader)->get_column_descs();
3196 if (column_names.empty()) {
3199 auto geo_physical_cols = std::count_if(
3200 col_descs.begin(), col_descs.end(), [](
auto cd) {
return cd->isGeoPhyCol; });
3201 const auto num_table_cols =
static_cast<size_t>(td->nColumns) - geo_physical_cols -
3202 (td->hasDeletedCol ? 2 : 1);
3203 if (num_cols != num_table_cols) {
3204 throw std::runtime_error(
"Number of columns to load (" +
std::to_string(num_cols) +
3205 ") does not match number of columns in table " +
3209 }
else if (num_cols != column_names.size()) {
3211 "Number of columns specified does not match the "
3212 "number of columns given (" +
3217 return std::move(td_with_lock);
3222 if (!column.nulls.empty()) {
3223 return column.nulls.size();
3228 return column.data.int_col.size() + column.data.arr_col.size() +
3229 column.data.real_col.size() + column.data.str_col.size();
3236 const std::string& table_name,
3237 const std::vector<TColumn>& cols,
3238 const std::vector<std::string>& column_names) {
3249 const TSessionId& session_id_or_json,
3250 const std::string& table_name,
3251 const std::vector<TColumn>& cols,
3252 const std::vector<std::string>& column_names,
3253 const bool assign_render_groups) {
3260 assign_render_groups
3266 const TSessionId& session_id,
3267 const std::string& table_name,
3268 const std::vector<TColumn>& cols,
3269 const std::vector<std::string>& column_names,
3273 auto session_ptr = stdlog.getConstSessionInfo();
3279 "load_table_binary_columnar_polys: Column data must be empty when called with "
3280 "assign_render_groups = false");
3290 LOG(
INFO) <<
"load_table_binary_columnar_polys: Cleaning up Render Group "
3291 "Assignment Persistent Data for Session '"
3292 << session_id <<
"', Table '" << table_name <<
"'";
3293 itr_session->second.erase(table_name);
3300 const auto execute_read_lock =
3304 std::unique_ptr<import_export::Loader> loader;
3305 std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3312 "load_table_binary_columnar");
3314 auto desc_id_to_column_id =
3317 size_t import_idx = 0;
3320 size_t skip_physical_cols = 0;
3321 for (
auto cd : loader->get_column_descs()) {
3322 if (skip_physical_cols > 0) {
3323 CHECK(cd->isGeoPhyCol);
3324 skip_physical_cols--;
3327 auto mapped_idx = desc_id_to_column_id[import_idx];
3328 if (mapped_idx != -1) {
3329 size_t col_rows = import_buffers[col_idx]->add_values(cd, cols[mapped_idx]);
3330 if (col_rows != num_rows) {
3331 std::ostringstream oss;
3332 oss <<
"load_table_binary_columnar: Inconsistent number of rows in column "
3333 << cd->columnName <<
" , expecting " << num_rows <<
" rows, column "
3334 << col_idx <<
" has " << col_rows <<
" rows";
3340 if (cd->columnType.is_geometry()) {
3342 session_ptr->getCatalog(),
3349 skip_physical_cols = cd->columnType.get_physical_cols();
3353 if (cd->columnType.is_geometry()) {
3354 skip_physical_cols = cd->columnType.get_physical_cols();
3355 col_idx += skip_physical_cols;
3361 }
catch (
const std::exception& e) {
3362 std::ostringstream oss;
3363 oss <<
"load_table_binary_columnar: Input exception thrown: " << e.what()
3364 <<
". Issue at column : " << (col_idx + 1) <<
". Import aborted";
3368 session_ptr->getCatalog(),
3370 loader->get_column_descs(),
3371 desc_id_to_column_id,
3376 session_ptr->getCatalog(), table_name);
3377 if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3384 #define ARROW_THRIFT_THROW_NOT_OK(s) \
3386 ::arrow::Status _s = (s); \
3387 if (UNLIKELY(!_s.ok())) { \
3389 ex.error_msg = _s.ToString(); \
3390 LOG(ERROR) << s.ToString(); \
3401 auto stream_buffer =
3402 std::make_shared<arrow::Buffer>(
reinterpret_cast<const uint8_t*
>(stream.c_str()),
3403 static_cast<int64_t>(stream.size()));
3405 arrow::io::BufferReader buf_reader(stream_buffer);
3406 std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3408 arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3411 std::shared_ptr<arrow::RecordBatch> batch;
3414 if (batch ==
nullptr) {
3417 batches.emplace_back(std::move(batch));
3419 }
catch (
const std::exception& e) {
3420 LOG(
ERROR) <<
"Error parsing Arrow stream: " << e.what() <<
". Import aborted";
3428 const std::string& table_name,
3429 const std::string& arrow_stream,
3430 const bool use_column_names) {
3435 auto session_ptr = stdlog.getConstSessionInfo();
3439 if (batches.size() != 1) {
3443 std::shared_ptr<arrow::RecordBatch> batch = batches[0];
3444 std::unique_ptr<import_export::Loader> loader;
3445 std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3446 std::vector<std::string> column_names;
3447 if (use_column_names) {
3448 column_names = batch->schema()->field_names();
3450 const auto execute_read_lock =
3454 auto schema_read_lock =
3457 static_cast<size_t>(batch->num_columns()),
3461 "load_table_binary_arrow");
3463 auto desc_id_to_column_id =
3465 size_t num_rows = 0;
3468 for (
auto cd : loader->get_column_descs()) {
3469 auto mapped_idx = desc_id_to_column_id[col_idx];
3470 if (mapped_idx != -1) {
3471 auto& array = *batch->column(mapped_idx);
3473 num_rows = import_buffers[col_idx]->add_arrow_values(
3474 cd, array,
true, row_slice,
nullptr);
3478 }
catch (
const std::exception& e) {
3479 LOG(
ERROR) <<
"Input exception thrown: " << e.what()
3480 <<
". Issue at column : " << (col_idx + 1) <<
". Import aborted";
3486 session_ptr->getCatalog(),
3488 loader->get_column_descs(),
3489 desc_id_to_column_id,
3494 session_ptr->getCatalog(), table_name);
3495 if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3501 const std::string& table_name,
3502 const std::vector<TStringRow>& rows,
3503 const std::vector<std::string>& column_names) {
3510 auto session_ptr = stdlog.getConstSessionInfo();
3516 const auto execute_read_lock =
3520 std::unique_ptr<import_export::Loader> loader;
3521 std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3522 auto schema_read_lock =
3525 static_cast<size_t>(rows.front().cols.size()),
3531 auto col_descs = loader->get_column_descs();
3534 size_t rows_completed = 0;
3535 for (
auto const& row : rows) {
3536 size_t import_idx = 0;
3539 size_t skip_physical_cols = 0;
3540 for (
auto cd : col_descs) {
3541 if (skip_physical_cols > 0) {
3542 CHECK(cd->isGeoPhyCol);
3543 skip_physical_cols--;
3546 auto mapped_idx = desc_id_to_column_id[import_idx];
3547 if (mapped_idx != -1) {
3548 import_buffers[col_idx]->add_value(cd,
3549 row.cols[mapped_idx].str_val,
3550 row.cols[mapped_idx].is_null,
3554 if (cd->columnType.is_geometry()) {
3556 skip_physical_cols = cd->columnType.get_physical_cols();
3557 col_idx += skip_physical_cols;
3563 }
catch (
const std::exception& e) {
3564 LOG(
ERROR) <<
"Input exception thrown: " << e.what()
3565 <<
". Row discarded, issue at column : " << (col_idx + 1)
3566 <<
" data :" << row;
3571 if (rows.size() != 0) {
3572 const auto& row = rows[0];
3575 size_t import_idx = 0;
3576 size_t skip_physical_cols = 0;
3577 for (
auto cd : col_descs) {
3578 if (skip_physical_cols > 0) {
3579 skip_physical_cols--;
3582 auto mapped_idx = desc_id_to_column_id[import_idx];
3584 if (cd->columnType.is_geometry()) {
3585 skip_physical_cols = cd->columnType.get_physical_cols();
3586 if (mapped_idx != -1) {
3588 session_ptr->getCatalog(),
3596 col_idx += skip_physical_cols;
3601 }
catch (
const std::exception& e) {
3602 LOG(
ERROR) <<
"Input exception thrown: " << e.what()
3603 <<
". Row discarded, issue at column : " << (col_idx + 1)
3604 <<
" data :" << row;
3609 session_ptr->getCatalog(),
3612 desc_id_to_column_id,
3617 session_ptr->getCatalog(), table_name);
3618 if (!loader->load(import_buffers, rows_completed, session_ptr.get())) {
3622 }
catch (
const std::exception& e) {
3629 if (str.size() == 2 && str[0] ==
'\\') {
3630 if (str[1] ==
't') {
3632 }
else if (str[1] ==
'n') {
3634 }
else if (str[1] ==
'0') {
3636 }
else if (str[1] ==
'\'') {
3638 }
else if (str[1] ==
'\\') {
3647 switch (cp.has_header) {
3648 case TImportHeaderRow::AUTODETECT:
3651 case TImportHeaderRow::NO_HEADER:
3654 case TImportHeaderRow::HAS_HEADER:
3660 copy_params.
quoted = cp.quoted;
3661 if (cp.delimiter.length() > 0) {
3666 if (cp.null_str.length() > 0) {
3667 copy_params.
null_str = cp.null_str;
3669 if (cp.quote.length() > 0) {
3672 if (cp.escape.length() > 0) {
3675 if (cp.line_delim.length() > 0) {
3678 if (cp.array_delim.length() > 0) {
3681 if (cp.array_begin.length() > 0) {
3684 if (cp.array_end.length() > 0) {
3687 if (cp.threads != 0) {
3688 copy_params.
threads = cp.threads;
3690 if (cp.s3_access_key.length() > 0) {
3693 if (cp.s3_secret_key.length() > 0) {
3696 if (cp.s3_session_token.length() > 0) {
3699 if (cp.s3_region.length() > 0) {
3702 if (cp.s3_endpoint.length() > 0) {
3707 cp.s3_secret_key.length() == 0 && cp.s3_session_token.length() == 0) {
3708 const auto& server_credentials =
3709 Aws::Auth::DefaultAWSCredentialsProviderChain().GetAWSCredentials();
3710 copy_params.
s3_access_key = server_credentials.GetAWSAccessKeyId();
3711 copy_params.
s3_secret_key = server_credentials.GetAWSSecretKey();
3716 switch (cp.source_type) {
3717 case TSourceType::DELIMITED_FILE:
3720 case TSourceType::GEO_FILE:
3723 case TSourceType::PARQUET_FILE:
3724 #ifdef ENABLE_IMPORT_PARQUET
3730 case TSourceType::ODBC:
3732 case TSourceType::RASTER_FILE:
3739 switch (cp.geo_coords_encoding) {
3740 case TEncodingType::GEOINT:
3743 case TEncodingType::NONE:
3751 switch (cp.geo_coords_type) {
3752 case TDatumType::GEOGRAPHY:
3755 case TDatumType::GEOMETRY:
3762 switch (cp.geo_coords_srid) {
3778 switch (cp.raster_point_type) {
3779 case TRasterPointType::NONE:
3782 case TRasterPointType::AUTO:
3785 case TRasterPointType::SMALLINT:
3788 case TRasterPointType::INT:
3791 case TRasterPointType::FLOAT:
3794 case TRasterPointType::DOUBLE:
3797 case TRasterPointType::POINT:
3804 if (cp.raster_scanlines_per_thread < 0) {
3810 switch (cp.raster_point_transform) {
3811 case TRasterPointTransform::NONE:
3814 case TRasterPointTransform::AUTO:
3817 case TRasterPointTransform::FILE:
3820 case TRasterPointTransform::WORLD:
3828 copy_params.
dsn = cp.odbc_dsn;
3832 copy_params.
username = cp.odbc_username;
3833 copy_params.
password = cp.odbc_password;
3841 TCopyParams copy_params;
3843 copy_params.null_str = cp.
null_str;
3846 copy_params.has_header = TImportHeaderRow::AUTODETECT;
3849 copy_params.has_header = TImportHeaderRow::NO_HEADER;
3852 copy_params.has_header = TImportHeaderRow::HAS_HEADER;
3857 copy_params.quoted = cp.
quoted;
3858 copy_params.quote = cp.
quote;
3859 copy_params.escape = cp.
escape;
3864 copy_params.threads = cp.
threads;
3872 copy_params.source_type = TSourceType::DELIMITED_FILE;
3875 copy_params.source_type = TSourceType::GEO_FILE;
3878 copy_params.source_type = TSourceType::PARQUET_FILE;
3881 copy_params.source_type = TSourceType::RASTER_FILE;
3884 copy_params.source_type = TSourceType::ODBC;
3891 copy_params.geo_coords_encoding = TEncodingType::GEOINT;
3894 copy_params.geo_coords_encoding = TEncodingType::NONE;
3900 copy_params.geo_coords_type = TDatumType::GEOGRAPHY;
3903 copy_params.geo_coords_type = TDatumType::GEOMETRY;
3916 copy_params.raster_point_type = TRasterPointType::NONE;
3919 copy_params.raster_point_type = TRasterPointType::AUTO;
3922 copy_params.raster_point_type = TRasterPointType::SMALLINT;
3925 copy_params.raster_point_type = TRasterPointType::INT;
3928 copy_params.raster_point_type = TRasterPointType::FLOAT;
3931 copy_params.raster_point_type = TRasterPointType::DOUBLE;
3934 copy_params.raster_point_type = TRasterPointType::POINT;
3943 copy_params.raster_point_transform = TRasterPointTransform::NONE;
3946 copy_params.raster_point_transform = TRasterPointTransform::AUTO;
3949 copy_params.raster_point_transform = TRasterPointTransform::FILE;
3952 copy_params.raster_point_transform = TRasterPointTransform::WORLD;
3959 copy_params.odbc_dsn = cp.
dsn;
3963 copy_params.odbc_username = cp.
username;
3964 copy_params.odbc_password = cp.
password;
3977 if (boost::istarts_with(path,
"http://") || boost::istarts_with(path,
"https://")) {
3978 if (!gdal_network) {
3980 "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3983 path =
"/vsicurl/" + path;
3984 }
else if (boost::istarts_with(path,
"s3://")) {
3985 if (!gdal_network) {
3987 "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3990 boost::replace_first(path,
"s3://",
"/vsis3/");
3996 if (boost::iends_with(path,
".gz") && !boost::iends_with(path,
".tar.gz")) {
3997 path =
"/vsigzip/" + path;
4003 if (boost::iends_with(path,
".zip")) {
4005 path =
"/vsizip/" + path;
4006 }
else if (boost::iends_with(path,
".tar") || boost::iends_with(path,
".tgz") ||
4007 boost::iends_with(path,
".tar.gz")) {
4009 path =
"/vsitar/" + path;
4014 std::string path(path_in);
4017 if (boost::istarts_with(path,
"/vsizip/")) {
4018 boost::replace_first(path,
"/vsizip/",
"");
4019 }
else if (boost::istarts_with(path,
"/vsitar/")) {
4020 boost::replace_first(path,
"/vsitar/",
"");
4021 }
else if (boost::istarts_with(path,
"/vsigzip/")) {
4022 boost::replace_first(path,
"/vsigzip/",
"");
4026 if (boost::istarts_with(path,
"/vsicurl/")) {
4027 boost::replace_first(path,
"/vsicurl/",
"");
4028 }
else if (boost::istarts_with(path,
"/vsis3/")) {
4029 boost::replace_first(path,
"/vsis3/",
"s3://");
4036 if (boost::istarts_with(path,
"s3://") || boost::istarts_with(path,
"http://") ||
4037 boost::istarts_with(path,
"https://")) {
4040 return !boost::filesystem::path(path).is_absolute();
4044 auto filename = boost::filesystem::path(path).filename().string();
4058 if (boost::iends_with(path,
".shp") || boost::iends_with(path,
".geojson") ||
4059 boost::iends_with(path,
".json") || boost::iends_with(path,
".kml") ||
4060 boost::iends_with(path,
".kmz") || boost::iends_with(path,
".gdb") ||
4061 boost::iends_with(path,
".gdb.zip") || boost::iends_with(path,
".fgb")) {
4071 if (boost::iends_with(path,
".zip") && !boost::iends_with(path,
".gdb.zip")) {
4073 }
else if (boost::iends_with(path,
".tar") || boost::iends_with(path,
".tgz") ||
4074 boost::iends_with(path,
".tar.gz")) {
4083 std::vector<std::string> files =
4087 LOG(
INFO) <<
"Found " << files.size() <<
" files in Archive "
4089 for (
const auto& file : files) {
4094 bool found_suitable_file =
false;
4095 std::string file_name;
4096 for (
const auto& file : files) {
4099 found_suitable_file =
true;
4105 if (!found_suitable_file) {
4106 LOG(
INFO) <<
"Failed to find any supported geo files in Archive: " +
4116 return (!boost::istarts_with(file_path,
"s3://") &&
4117 !boost::istarts_with(file_path,
"http://") &&
4118 !boost::istarts_with(file_path,
"https://"));
4130 const TSessionId& session_id_or_json,
4131 const std::string& file_name_in,
4132 const TCopyParams& cp) {
4139 bool is_raster =
false;
4140 boost::filesystem::path file_path;
4143 std::string file_name{file_name_in};
4147 picosha2::hash256_hex_string(request_info.
sessionId()) /
4148 boost::filesystem::path(file_name).filename();
4149 file_name = temp_file_path.string();
4162 CHECK(!file_paths.empty());
4163 file_name = file_paths[0];
4178 if (geo_file.size()) {
4179 file_name = file_name + std::string(
"/") + geo_file;
4193 file_path = boost::filesystem::path(file_name);
4195 if (!boost::istarts_with(file_name,
"s3://")) {
4196 if (!boost::filesystem::path(file_name).is_absolute()) {
4198 picosha2::hash256_hex_string(request_info.
sessionId()) /
4199 boost::filesystem::path(file_name).filename();
4200 file_name = file_path.string();
4208 "\" does not exist.")
4214 "\" does not exist.");
4222 #ifdef ENABLE_IMPORT_PARQUET
4228 std::vector<std::string> headers = detector.
get_headers();
4232 _return.row_set.row_desc.resize(best_types.size());
4233 for (
size_t col_idx = 0; col_idx < best_types.size(); col_idx++) {
4235 auto& ti = best_types[col_idx];
4236 col.col_type.precision = ti.get_precision();
4237 col.col_type.scale = ti.get_scale();
4238 col.col_type.comp_param = ti.get_comp_param();
4239 if (ti.is_geometry()) {
4243 col.col_type.precision =
static_cast<int>(copy_params.
geo_coords_type);
4249 if (ti.is_array()) {
4250 col.col_type.is_array =
true;
4255 col.col_name = headers[col_idx];
4258 _return.row_set.row_desc[col_idx] = col;
4264 for (
auto row : sample_data) {
4265 sample_row.cols.clear();
4266 for (
const auto& s : row) {
4269 td.is_null = s.empty();
4270 sample_row.cols.push_back(td);
4272 _return.row_set.rows.push_back(sample_row);
4279 for (
auto cd : cds) {
4287 std::map<std::string, std::vector<std::string>> sample_data;
4294 if (sample_data.size() > 0) {
4295 for (
size_t i = 0; i < sample_data.begin()->second.size(); i++) {
4297 for (
auto cd : cds) {
4299 td.val.str_val = sample_data[cd.sourceName].at(i);
4300 td.is_null = td.val.str_val.empty();
4301 sample_row.cols.push_back(td);
4303 _return.row_set.rows.push_back(sample_row);
4309 }
catch (
const std::exception& e) {
4315 const TSessionId& session_id_or_json,
4316 const int64_t widget_id,
4317 const std::string& vega_json,
4318 const int compression_level,
4319 const std::string& nonce) {
4325 "compression_level",
4332 stdlog.appendNameValuePairs(
"nonce", nonce);
4340 auto& non_const_vega_json =
const_cast<std::string&
>(vega_json);
4345 stdlog.getSessionInfo(),
4347 std::move(non_const_vega_json),
4350 }
catch (std::exception& e) {
4357 int32_t dashboard_id,
4362 object.loadKey(catalog);
4363 object.setPrivileges(requestedPermissions);
4364 std::vector<DBObject> privs = {
object};
4365 return SysCatalog::instance().checkPrivileges(user, privs);
4374 const TCustomExpression& t_custom_expr,
4376 if (t_custom_expr.data_source_name.empty()) {
4379 CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4380 <<
"Unexpected data source type: "
4381 <<
static_cast<int>(t_custom_expr.data_source_type);
4385 t_custom_expr.data_source_name +
"\" that does not exist.")
4388 return std::make_unique<CustomExpression>(
4389 t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4394 TCustomExpression t_custom_expr;
4395 t_custom_expr.id = custom_expr.
id;
4396 t_custom_expr.name = custom_expr.
name;
4399 t_custom_expr.is_deleted = custom_expr.
is_deleted;
4401 <<
"Unexpected data source type: "
4403 t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4406 t_custom_expr.data_source_name = td->
tableName;
4409 <<
"Custom expression references a deleted data source. Custom expression id: "
4410 << custom_expr.
id <<
", name: " << custom_expr.
name;
4412 return t_custom_expr;
4417 const TCustomExpression& t_custom_expr) {
4424 auto session_ptr = stdlog.getConstSessionInfo();
4425 if (!session_ptr->get_currentUser().isSuper) {
4428 auto& catalog = session_ptr->getCatalog();
4430 return catalog.createCustomExpression(
4435 const TSessionId& session_id_or_json) {
4441 auto session_ptr = stdlog.getConstSessionInfo();
4442 auto& catalog = session_ptr->getCatalog();
4444 auto custom_expressions =
4445 catalog.getCustomExpressionsForUser(session_ptr->get_currentUser());
4446 for (
const auto& custom_expression : custom_expressions) {
4453 const std::string& expression_json) {
4460 auto session_ptr = stdlog.getConstSessionInfo();
4461 if (!session_ptr->get_currentUser().isSuper) {
4464 auto& catalog = session_ptr->getCatalog();
4466 catalog.updateCustomExpression(
id, expression_json);
4470 const TSessionId& session_id_or_json,
4471 const std::vector<int32_t>& custom_expression_ids,
4472 const bool do_soft_delete) {
4479 auto session_ptr = stdlog.getConstSessionInfo();
4480 if (!session_ptr->get_currentUser().isSuper) {
4483 auto& catalog = session_ptr->getCatalog();
4485 catalog.deleteCustomExpressions(custom_expression_ids, do_soft_delete);
4490 const TSessionId& session_id_or_json,
4491 const int32_t dashboard_id) {
4496 auto session_ptr = stdlog.getConstSessionInfo();
4497 auto const&
cat = session_ptr->getCatalog();
4499 auto dash =
cat.getMetadataForDashboard(dashboard_id);
4510 SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4515 const TSessionId& session_id_or_json) {
4520 auto session_ptr = stdlog.getConstSessionInfo();
4521 auto const&
cat = session_ptr->getCatalog();
4523 const auto dashes =
cat.getAllDashboardsMetadata();
4525 for (
const auto dash : dashes) {
4537 const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
4540 const bool populate_state) {
4541 auto const&
cat = session_ptr->getCatalog();
4542 SysCatalog::instance().getMetadataForUserById(dash->
userId, user_meta);
4543 auto objects_list = SysCatalog::instance().getMetadataForObject(
4544 cat.getCurrentDB().dbId,
4547 TDashboard dashboard;
4549 if (populate_state) {
4556 dashboard.dashboard_owner = dash->
user;
4557 TDashboardPermissions perms;
4559 if (session_ptr->get_currentUser().isSuper) {
4560 perms.create_ =
true;
4561 perms.delete_ =
true;
4569 obj_to_find.loadKey(
cat);
4570 std::vector<std::string> grantees =
4571 SysCatalog::instance().getRoles(
true,
4572 session_ptr->get_currentUser().isSuper,
4573 session_ptr->get_currentUser().userName);
4574 for (
const auto& grantee : grantees) {
4576 auto* gr = SysCatalog::instance().getGrantee(grantee);
4577 if (gr && (object_found = gr->findDbObject(obj_to_find.getObjectKey(),
true))) {
4586 dashboard.dashboard_permissions = perms;
4587 if (objects_list.empty() ||
4588 (objects_list.size() == 1 && objects_list[0]->roleName == user_meta.
userName)) {
4589 dashboard.is_dash_shared =
false;
4591 dashboard.is_dash_shared =
true;
4596 namespace dbhandler {
4604 std::string error_message{
"Write requests/queries are not allowed in the " +
4606 if (throw_db_exception) {
4609 throw std::runtime_error(error_message);
4616 const std::string& dashboard_name,
4617 const std::string& dashboard_state,
4618 const std::string& image_hash,
4619 const std::string& dashboard_metadata) {
4624 auto session_ptr = stdlog.getConstSessionInfo();
4627 auto&
cat = session_ptr->getCatalog();
4646 dd.
userId = session_ptr->get_currentUser().userId;
4647 dd.
user = session_ptr->get_currentUser().userName;
4650 auto id =
cat.createDashboard(dd);
4652 SysCatalog::instance().createDBObject(
4655 }
catch (
const std::exception& e) {
4661 const int32_t dashboard_id,
4662 const std::string& dashboard_name,
4663 const std::string& dashboard_owner,
4664 const std::string& dashboard_state,
4665 const std::string& image_hash,
4666 const std::string& dashboard_metadata) {
4671 auto session_ptr = stdlog.getConstSessionInfo();
4674 auto&
cat = session_ptr->getCatalog();
4684 if (
auto dash =
cat.getMetadataForDashboard(
4685 std::to_string(session_ptr->get_currentUser().userId), dashboard_name)) {
4686 if (dash->dashboardId != dashboard_id) {
4697 if (!SysCatalog::instance().getMetadataForUser(dashboard_owner, user)) {
4702 dd.
user = dashboard_owner;
4706 cat.replaceDashboard(dd);
4707 }
catch (
const std::exception& e) {
4713 const int32_t dashboard_id) {
4718 const std::vector<int32_t>& dashboard_ids) {
4723 auto session_ptr = stdlog.getConstSessionInfo();
4725 auto&
cat = session_ptr->getCatalog();
4731 cat.deleteMetadataForDashboards(dashboard_ids, session_ptr->get_currentUser());
4732 }
catch (
const std::exception& e) {
4738 int32_t dashboard_id,
4739 std::vector<std::string> groups) {
4743 auto&
cat = session_info.getCatalog();
4744 auto dash = cat.getMetadataForDashboard(dashboard_id);
4748 }
else if (session_info.get_currentUser().userId != dash->userId &&
4749 !session_info.get_currentUser().isSuper) {
4750 throw std::runtime_error(
4751 "User should be either owner of dashboard or super user to share/unshare it");
4753 std::vector<std::string> valid_groups;
4755 for (
auto& group : groups) {
4757 if (!SysCatalog::instance().getGrantee(group)) {
4759 }
else if (!user_meta.
isSuper) {
4760 valid_groups.push_back(group);
4763 return valid_groups;
4767 for (
auto const& group : groups) {
4768 if (!SysCatalog::instance().getGrantee(group)) {
4776 const std::vector<int32_t>& dashboard_ids) {
4778 std::map<std::string, std::list<int32_t>> errors;
4779 for (
auto const& dashboard_id : dashboard_ids) {
4780 auto dashboard =
cat.getMetadataForDashboard(dashboard_id);
4782 errors[
"Dashboard id does not exist"].push_back(dashboard_id);
4785 errors[
"User should be either owner of dashboard or super user to share/unshare it"]
4786 .push_back(dashboard_id);
4789 if (!errors.empty()) {
4790 std::stringstream error_stream;
4791 error_stream <<
"Share/Unshare dashboard(s) failed with error(s)\n";
4792 for (
const auto& [error, id_list] : errors) {
4793 error_stream <<
"Dashboard ids " <<
join(id_list,
", ") <<
": " << error <<
"\n";
4800 const std::vector<int32_t>& dashboard_ids,
4801 const std::vector<std::string>& groups,
4802 const TDashboardPermissions& permissions,
4803 const bool do_share) {
4806 check_read_only(do_share ?
"share_dashboards" :
"unshare_dashboards");
4807 if (!permissions.create_ && !permissions.delete_ && !permissions.edit_ &&
4808 !permissions.view_) {
4810 std::string(do_share ?
"grants" :
"revokes"));
4812 auto session_ptr = stdlog.getConstSessionInfo();
4813 auto const& catalog = session_ptr->getCatalog();
4814 auto& sys_catalog = SysCatalog::instance();
4817 std::vector<DBObject> batch_objects;
4818 for (
auto const& dashboard_id : dashboard_ids) {
4821 if (permissions.delete_) {
4824 if (permissions.create_) {
4827 if (permissions.edit_) {
4830 if (permissions.view_) {
4833 object.setPrivileges(privs);
4834 batch_objects.push_back(
object);
4837 sys_catalog.grantDBObjectPrivilegesBatch(groups, batch_objects, catalog);
4839 sys_catalog.revokeDBObjectPrivilegesBatch(groups, batch_objects, catalog);
4844 const std::vector<int32_t>& dashboard_ids,
4845 const std::vector<std::string>& groups,
4846 const TDashboardPermissions& permissions) {
4850 request_info.
sessionId(), dashboard_ids, groups, permissions,
true);
4855 const int32_t dashboard_id,
4856 const std::vector<std::string>& groups,
4857 const std::vector<std::string>& objects,
4858 const TDashboardPermissions& permissions,
4859 const bool grant_role =
false) {
4864 const std::vector<int32_t>& dashboard_ids,
4865 const std::vector<std::string>& groups,
4866 const TDashboardPermissions& permissions) {
4870 request_info.
sessionId(), dashboard_ids, groups, permissions,
false);
4874 const int32_t dashboard_id,
4875 const std::vector<std::string>& groups,
4876 const std::vector<std::string>& objects,
4877 const TDashboardPermissions& permissions) {
4882 std::vector<TDashboardGrantees>& dashboard_grantees,
4883 const TSessionId& session_id_or_json,
4884 const int32_t dashboard_id) {
4889 auto session_ptr = stdlog.getConstSessionInfo();
4890 auto const&
cat = session_ptr->getCatalog();
4892 auto dash =
cat.getMetadataForDashboard(dashboard_id);
4896 }
else if (session_ptr->get_currentUser().userId != dash->userId &&
4897 !session_ptr->get_currentUser().isSuper) {
4899 "User should be either owner of dashboard or super user to access grantees");
4901 std::vector<ObjectRoleDescriptor*> objectsList;
4902 objectsList = SysCatalog::instance().getMetadataForObject(
4903 cat.getCurrentDB().dbId,
4908 SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4909 for (
auto object : objectsList) {
4910 if (user_meta.
userName == object->roleName) {
4914 TDashboardGrantees grantee;
4915 TDashboardPermissions perm;
4916 grantee.name =
object->roleName;
4917 grantee.is_user =
object->roleType;
4922 grantee.permissions = perm;
4923 dashboard_grantees.push_back(grantee);
4928 const TSessionId& session_id_or_json,
4929 const std::string& view_state,
4930 const std::string& view_metadata) {
4935 auto session_ptr = stdlog.getConstSessionInfo();
4937 auto&
cat = session_ptr->getCatalog();
4940 ld.
userId = session_ptr->get_currentUser().userId;
4945 _return =
cat.createLink(ld, 6);
4946 }
catch (
const std::exception& e) {
4952 const std::string&
name,
4953 const bool is_array) {
4956 ct.col_type.type =
type;
4957 ct.col_type.is_array = is_array;
4963 const std::list<std::string> shp_ext{
".shp",
".shx",
".dbf"};
4964 if (std::find(shp_ext.begin(),
4966 boost::algorithm::to_lower_copy(file_path.extension().string())) !=
4968 for (
auto ext : shp_ext) {
4969 auto aux_file = file_path;
4971 aux_file.replace_extension(boost::algorithm::to_upper_copy(ext)).string(),
4974 aux_file.replace_extension(ext).string(), copy_params)) {
4975 throw std::runtime_error(
"required file for shapefile does not exist: " +
4976 aux_file.filename().string());
4983 const std::string& table_name,
4984 const TRowDescriptor& rd,
4985 const TCreateParams& create_params) {
4988 auto stdlog =
STDLOG(
"table_name", table_name);
5000 std::string stmt{
"CREATE TABLE " + table_name};
5001 std::vector<std::string> col_stmts;
5003 for (
auto col : rds) {
5009 if (col.col_type.type == TDatumType::INTERVAL_DAY_TIME ||
5010 col.col_type.type == TDatumType::INTERVAL_YEAR_MONTH) {
5012 " for column: " + col.col_name);
5015 if (col.col_type.type == TDatumType::DECIMAL) {
5017 if (col.col_type.precision == 0 && col.col_type.scale == 0) {
5018 col.col_type.precision = 14;
5019 col.col_type.scale = 7;
5023 std::string col_stmt;
5024 col_stmt.append(col.col_name +
" " +
thrift_to_name(col.col_type));
5025 if (col.__isset.default_value) {
5026 col_stmt.append(
" DEFAULT " + col.default_value);
5039 col_stmt.append(
"(" +
std::to_string(col.col_type.comp_param) +
")");
5041 }
else if (col.col_type.type == TDatumType::STR) {
5043 col_stmt.append(
" ENCODING NONE");
5044 }
else if (col.col_type.type == TDatumType::POINT ||
5045 col.col_type.type == TDatumType::MULTIPOINT ||
5046 col.col_type.type == TDatumType::LINESTRING ||
5047 col.col_type.type == TDatumType::MULTILINESTRING ||
5048 col.col_type.type == TDatumType::POLYGON ||
5049 col.col_type.type == TDatumType::MULTIPOLYGON) {
5051 if (col.col_type.scale == 4326) {
5052 col_stmt.append(
" ENCODING NONE");
5055 col_stmts.push_back(col_stmt);
5060 if (create_params.is_replicated) {
5061 stmt.append(
" WITH (PARTITIONS = 'REPLICATED')");
5072 const std::string& table_name,
5073 const std::string& file_name_in,
5074 const TCopyParams& cp) {
5081 auto session_ptr = stdlog.getConstSessionInfo();
5083 LOG(
INFO) <<
"import_table " << table_name <<
" from " << file_name_in;
5085 const auto execute_read_lock =
5089 auto&
cat = session_ptr->getCatalog();
5093 executor->enrollQuerySession(request_info.
sessionId(),
5097 QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5103 executor->clearQuerySessionStatus(request_info.sessionId(),
start_time);
5106 const auto td_with_lock =
5109 const auto td = td_with_lock();
5113 std::string copy_from_source;
5118 std::string file_name{file_name_in};
5119 auto file_path = boost::filesystem::path(file_name);
5120 if (!boost::istarts_with(file_name,
"s3://")) {
5121 if (!boost::filesystem::path(file_name).is_absolute()) {
5123 picosha2::hash256_hex_string(request_info.sessionId()) /
5124 boost::filesystem::path(file_name).filename();
5125 file_name = file_path.string();
5129 "\" does not exist.");
5137 if (boost::filesystem::extension(file_path) ==
".tsv") {
5141 copy_from_source = file_path.string();
5145 session_ptr->getCatalog(), table_name);
5146 std::unique_ptr<import_export::AbstractImporter> importer;
5149 LOG(
INFO) <<
"Total Import Time: " << (double)ms / 1000.0 <<
" Seconds.";
5150 }
catch (
const TDBException& e) {
5152 }
catch (
const std::exception& e) {
5164 return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5165 t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5166 t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5170 std::stringstream ss;
5176 const std::string& file_path,
5177 const std::string& column_name,
5178 const std::string& attr,
5179 const std::string& got,
5180 const std::string& expected) {
5181 return "Issue encountered in geo/raster file '" + file_path +
5182 "' while appending to table '" + table_name +
"'. Column '" + column_name +
5183 "' " + attr +
" mismatch (got '" + got +
"', expected '" + expected +
"')";
5188 #define THROW_COLUMN_ATTR_MISMATCH_EXCEPTION(attr, got, expected) \
5189 THROW_DB_EXCEPTION("Could not append geo/raster file '" + \
5190 file_path.filename().string() + "' to table '" + table_name + \
5191 "'. Column '" + cd->columnName + "' " + attr + " mismatch (got '" + \
5192 got + "', expected '" + expected + "')");
5195 const std::string& table_name,
5196 const std::string& file_name,
5197 const TCopyParams& cp,
5198 const TRowDescriptor& row_desc,
5199 const TCreateParams& create_params) {
5214 const std::string& table_name,
5215 const std::string& file_name,
5217 const TRowDescriptor& row_desc,
5218 const TCreateParams& create_params) {
5222 std::vector<std::string> file_names;
5231 file_names.push_back(file_name);
5234 for (
auto const& file_name : file_names) {
5236 session_id, table_name, file_name, copy_params, row_desc, create_params);
5241 const std::string& table_name,
5242 const std::string& file_name_in,
5244 const TRowDescriptor& row_desc,
5245 const TCreateParams& create_params) {
5248 auto session_ptr = stdlog.getConstSessionInfo();
5251 auto&
cat = session_ptr->getCatalog();
5255 executor->enrollQuerySession(session_id,
5259 QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5265 executor->clearQuerySessionStatus(session_id,
start_time);
5269 std::string file_name{file_name_in};
5273 auto file_path =
import_path_ / picosha2::hash256_hex_string(session_id) /
5274 boost::filesystem::path(file_name).filename();
5275 file_name = file_path.string();
5279 bool is_raster =
false;
5291 if (geo_file.size()) {
5292 file_name = file_name + std::string(
"/") + geo_file;
5305 THROW_DB_EXCEPTION(
"import_geo_table called with file_type other than GEO or RASTER");
5309 VLOG(1) <<
"import_geo_table: Original filename: " << file_name_in;
5310 VLOG(1) <<
"import_geo_table: Actual filename: " << file_name;
5311 VLOG(1) <<
"import_geo_table: Raster: " << is_raster;
5314 auto file_path = boost::filesystem::path(file_name);
5322 }
catch (
const std::exception& e) {
5332 std::vector<import_export::Importer::GeoFileLayerInfo> layer_info;
5337 }
catch (
const std::exception& e) {
5343 using LayerNameToContentsMap =
5344 std::map<std::string, import_export::Importer::GeoFileLayerContents>;
5345 LayerNameToContentsMap load_layers;
5347 <<
"import_geo_table: Found the following layers in the geo file:";
5348 for (
const auto& layer : layer_info) {
5349 switch (layer.contents) {
5351 LOG(
INFO) <<
"import_geo_table: '" << layer.name
5352 <<
"' (will import as geo table)";
5353 load_layers[layer.name] = layer.contents;
5356 LOG(
INFO) <<
"import_geo_table: '" << layer.name
5357 <<
"' (will import as regular table)";
5358 load_layers[layer.name] = layer.contents;
5361 LOG(
WARNING) <<
"import_geo_table: '" << layer.name
5362 <<
"' (will not import, unsupported geo type)";
5365 LOG(
INFO) <<
"import_geo_table: '" << layer.name <<
"' (ignoring, empty)";
5373 if (!is_raster && load_layers.size() == 0) {
5381 for (
const auto& layer : layer_info) {
5386 load_layers.clear();
5387 load_layers[layer.name] = layer.contents;
5390 }
else if (layer.contents ==
5394 }
else if (layer.contents ==