OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "DBHandler.h"
24 #include "DistributedLoader.h"
25 #include "TokenCompletionHints.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "MapDRelease.h"
32 
33 #include "Calcite/Calcite.h"
34 #include "gen-cpp/CalciteServer.h"
35 
38 
39 #include "Catalog/Catalog.h"
44 #include "DistributedHandler.h"
46 #include "Geospatial/ColumnNames.h"
47 #include "Geospatial/Compression.h"
48 #include "Geospatial/GDAL.h"
49 #include "Geospatial/Types.h"
50 #include "ImportExport/Importer.h"
51 #include "LockMgr/LockMgr.h"
53 #include "Parser/ParserWrapper.h"
57 #include "QueryEngine/Execute.h"
67 #include "RequestInfo.h"
68 #ifdef HAVE_RUNTIME_LIBS
70 #endif
71 #include "Shared/ArrowUtil.h"
72 #include "Shared/DateTimeParser.h"
73 #include "Shared/StringTransform.h"
74 #include "Shared/SysDefinitions.h"
75 #include "Shared/file_path_util.h"
77 #include "Shared/import_helpers.h"
78 #include "Shared/measure.h"
79 #include "Shared/misc.h"
80 #include "Shared/scope.h"
82 
83 #ifdef HAVE_AWS_S3
84 #include <aws/core/auth/AWSCredentialsProviderChain.h>
85 #endif
86 #include <fcntl.h>
87 #include <picosha2.h>
88 #include <sys/types.h>
89 #include <algorithm>
90 #include <boost/algorithm/string.hpp>
91 #include <boost/filesystem.hpp>
92 #include <boost/make_shared.hpp>
93 #include <boost/process/search_path.hpp>
94 #include <boost/program_options.hpp>
95 #include <boost/tokenizer.hpp>
96 #include <chrono>
97 #include <cmath>
98 #include <csignal>
99 #include <fstream>
100 #include <future>
101 #include <map>
102 #include <memory>
103 #include <random>
104 #include <string>
105 #include <thread>
106 #include <typeinfo>
107 
108 #include <arrow/api.h>
109 #include <arrow/io/api.h>
110 #include <arrow/ipc/api.h>
111 
112 #include "Shared/ArrowUtil.h"
113 #include "Shared/distributed.h"
114 
115 #ifdef ENABLE_IMPORT_PARQUET
116 extern bool g_enable_parquet_import_fsi;
117 #endif
118 
119 #ifdef HAVE_AWS_S3
120 extern bool g_allow_s3_server_privileges;
121 #endif
122 
123 extern bool g_enable_system_tables;
126 extern bool g_allow_memory_status_log;
127 
130 
131 #define INVALID_SESSION_ID ""
132 
133 #define SET_REQUEST_ID(parent_request_id) \
134  if (g_uniform_request_ids_per_thrift_call && parent_request_id) \
135  logger::set_request_id(parent_request_id); \
136  else if (logger::set_new_request_id(); parent_request_id) \
137  LOG(INFO) << "This request has parent request_id(" << parent_request_id << ')'
138 
139 #define THROW_DB_EXCEPTION(errstr) \
140  { \
141  TDBException ex; \
142  ex.error_msg = errstr; \
143  LOG(ERROR) << ex.error_msg; \
144  throw ex; \
145  }
146 
147 thread_local std::string TrackingProcessor::client_address;
149 
150 namespace {
151 
153  const int32_t user_id,
154  const std::string& dashboard_name) {
155  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
156 }
157 
158 struct ForceDisconnect : public std::runtime_error {
159  ForceDisconnect(const std::string& cause) : std::runtime_error(cause) {}
160 };
161 
162 } // namespace
163 
164 #ifdef ENABLE_GEOS
165 // from Geospatial/GeosValidation.cpp
166 extern std::unique_ptr<std::string> g_libgeos_so_filename;
167 #endif
168 
169 DBHandler::DBHandler(const std::vector<LeafHostInfo>& db_leaves,
170  const std::vector<LeafHostInfo>& string_leaves,
171  const std::string& base_data_path,
172  const bool allow_multifrag,
173  const bool jit_debug,
174  const bool intel_jit_profile,
175  const bool read_only,
176  const bool allow_loop_joins,
177  const bool enable_rendering,
178  const bool renderer_prefer_igpu,
179  const unsigned renderer_vulkan_timeout_ms,
180  const bool renderer_use_parallel_executors,
181  const bool enable_auto_clear_render_mem,
182  const int render_oom_retry_threshold,
183  const size_t render_mem_bytes,
184  const size_t max_concurrent_render_sessions,
185  const size_t reserved_gpu_mem,
186  const bool render_compositor_use_last_gpu,
187  const bool renderer_enable_slab_allocation,
188  const size_t num_reader_threads,
189  const AuthMetadata& authMetadata,
190  SystemParameters& system_parameters,
191  const bool legacy_syntax,
192  const int idle_session_duration,
193  const int max_session_duration,
194  const std::string& udf_filename,
195  const std::string& clang_path,
196  const std::vector<std::string>& clang_options,
197 #ifdef ENABLE_GEOS
198  const std::string& libgeos_so_filename,
199 #endif
200 #ifdef HAVE_TORCH_TFS
201  const std::string& torch_lib_path,
202 #endif
203  const File_Namespace::DiskCacheConfig& disk_cache_config,
204  const bool is_new_db)
205  : leaf_aggregator_(db_leaves)
206  , db_leaves_(db_leaves)
207  , string_leaves_(string_leaves)
208  , base_data_path_(base_data_path)
209  , random_gen_(std::random_device{}())
210  , session_id_dist_(0, INT32_MAX)
211  , jit_debug_(jit_debug)
212  , intel_jit_profile_(intel_jit_profile)
213  , allow_multifrag_(allow_multifrag)
214  , read_only_(read_only)
215  , allow_loop_joins_(allow_loop_joins)
216  , authMetadata_(authMetadata)
217  , system_parameters_(system_parameters)
218  , legacy_syntax_(legacy_syntax)
219  , dispatch_queue_(
220  std::make_unique<QueryDispatchQueue>(system_parameters.num_executors))
221  , super_user_rights_(false)
222  , idle_session_duration_(idle_session_duration * 60)
223  , max_session_duration_(max_session_duration * 60)
224  , enable_rendering_(enable_rendering)
225  , renderer_prefer_igpu_(renderer_prefer_igpu)
226  , renderer_vulkan_timeout_(renderer_vulkan_timeout_ms)
227  , renderer_use_parallel_executors_(renderer_use_parallel_executors)
228  , enable_auto_clear_render_mem_(enable_auto_clear_render_mem)
229  , render_oom_retry_threshold_(render_oom_retry_threshold)
230  , render_mem_bytes_(render_mem_bytes)
231  , max_concurrent_render_sessions_(max_concurrent_render_sessions)
232  , reserved_gpu_mem_(reserved_gpu_mem)
233  , render_compositor_use_last_gpu_(render_compositor_use_last_gpu)
234  , renderer_enable_slab_allocation_{renderer_enable_slab_allocation}
235  , num_reader_threads_(num_reader_threads)
236 #ifdef ENABLE_GEOS
237  , libgeos_so_filename_(libgeos_so_filename)
238 #endif
239 #ifdef HAVE_TORCH_TFS
240  , torch_lib_path_(torch_lib_path)
241 #endif
242  , disk_cache_config_(disk_cache_config)
243  , udf_filename_(udf_filename)
244  , clang_path_(clang_path)
245  , clang_options_(clang_options)
246  , max_num_sessions_(-1) {
247  LOG(INFO) << "HeavyDB Server " << MAPD_RELEASE;
248  initialize(is_new_db);
249  resetSessionsStore();
250 }
251 
253  size_t num_cpu_slots{0};
254  size_t num_gpu_slots{0};
255  size_t cpu_result_mem{0};
256  size_t cpu_buffer_pool_mem{0};
257  size_t gpu_buffer_pool_mem{0};
258  LOG(INFO) << "Initializing Executor Resource Manager";
259 
260  if (g_cpu_threads_override != 0) {
261  LOG(INFO) << "\tSetting Executor resource pool avaiable CPU threads/slots to "
262  "user-specified value of "
263  << g_cpu_threads_override << ".";
264  num_cpu_slots = g_cpu_threads_override;
265  } else {
266  LOG(INFO) << "\tSetting Executor resource pool avaiable CPU threads/slots to default "
267  "value of "
268  << cpu_threads() << ".";
269  // Setting the number of CPU slots to cpu_threads() will cause the ExecutorResourceMgr
270  // to set the logical number of available cpu slots to mirror the number of threads in
271  // the tbb thread pool and used elsewhere in the system, but we may want to consider a
272  // capability to allow the executor resource pool number of threads to be set
273  // independently as some fraction of the what cpu_threads() will return, to give some
274  // breathing room for all the other processes in the system that use CPU threadds
275  num_cpu_slots = cpu_threads();
276  }
277  LOG(INFO) << "\tSetting max per-query CPU threads to ratio of "
279  << num_cpu_slots << " available threads, or "
281  num_cpu_slots)
282  << " threads.";
283 
284  // system_parameters_.num_gpus will be -1 if there are no GPUs enabled so we need to
285  // guard against this
286  num_gpu_slots = system_parameters_.num_gpus < 0 ? static_cast<size_t>(0)
288 
289  cpu_buffer_pool_mem = data_mgr_->getCpuBufferPoolSize();
292  } else {
293  const size_t system_mem_bytes = DataMgr::getTotalSystemMemory();
294  CHECK_GT(system_mem_bytes, size_t(0));
295  const size_t remaining_cpu_mem_bytes = system_mem_bytes >= cpu_buffer_pool_mem
296  ? system_mem_bytes - cpu_buffer_pool_mem
297  : 0UL;
298  cpu_result_mem =
299  std::max(static_cast<size_t>(remaining_cpu_mem_bytes *
301  static_cast<size_t>(1UL << 32));
302  }
303  // Below gets total combined size of all gpu buffer pools
304  // Likely will move to per device pool resource management,
305  // but keeping simple for now
306  gpu_buffer_pool_mem = data_mgr_->getGpuBufferPoolSize();
307 
308  // When we move to using the BufferMgrs directly in
309  // ExecutorResourcePool, there won't be a need for
310  // the buffer_pool_max_occupancy variable - a
311  // safety "fudge" factor as what the resource pool sees
312  // and what the BufferMgrs see will be exactly the same.
313 
314  // However we need to ensure we can quickly access
315  // chunk state of BufferMgrs without going through coarse lock
316  // before we do this, so use this fudge ratio for now
317 
318  // Note that if we are not conservative enough with the below and
319  // overshoot, the error will still be caught and if on GPU, the query
320  // can be re-run on CPU
321 
322  constexpr double buffer_pool_max_occupancy{0.95};
323  const size_t conservative_cpu_buffer_pool_mem =
324  static_cast<size_t>(cpu_buffer_pool_mem * buffer_pool_max_occupancy);
325  const size_t conservative_gpu_buffer_pool_mem =
326  static_cast<size_t>(gpu_buffer_pool_mem * buffer_pool_max_occupancy);
327 
328  LOG(INFO)
329  << "\tSetting Executor resource pool reserved space for CPU buffer pool memory to "
330  << format_num_bytes(conservative_cpu_buffer_pool_mem) << ".";
331  if (gpu_buffer_pool_mem > 0UL) {
332  LOG(INFO) << "\tSetting Executor resource pool reserved space for GPU buffer pool "
333  "memory to "
334  << format_num_bytes(conservative_gpu_buffer_pool_mem) << ".";
335  }
336  LOG(INFO) << "\tSetting Executor resource pool reserved space for CPU result memory to "
337  << format_num_bytes(cpu_result_mem) << ".";
338 
340  num_cpu_slots,
341  num_gpu_slots,
342  cpu_result_mem,
343  conservative_cpu_buffer_pool_mem,
344  conservative_gpu_buffer_pool_mem,
352 }
353 
355 #ifndef _WIN32
356  size_t temp;
357  CHECK(!__builtin_mul_overflow(g_num_tuple_threshold_switch_to_baseline,
359  &temp))
360  << "The product of g_num_tuple_threshold_switch_to_baseline and "
361  "g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline exceeds 64 bits.";
362 #endif
363 }
364 
366  if (sessions_store_) {
367  // Disconnect any existing sessions.
368  auto sessions = sessions_store_->getAllSessions();
369  for (auto session : sessions) {
370  sessions_store_->disconnect(session->get_session_id());
371  }
372  }
375  1,
379  [this](auto& session_ptr) { disconnect_impl(session_ptr); });
380 }
381 
382 void DBHandler::initialize(const bool is_new_db) {
383  if (!initialized_) {
384  initialized_ = true;
385  } else {
387  "Server already initialized; service restart required to activate any new "
388  "entitlements.");
389  return;
390  }
391 
394  cpu_mode_only_ = true;
395  } else {
396 #ifdef HAVE_CUDA
398  cpu_mode_only_ = false;
399 #else
401  LOG(WARNING) << "This build isn't CUDA enabled, will run on CPU";
402  cpu_mode_only_ = true;
403 #endif
404  }
405 
406  bool is_rendering_enabled = enable_rendering_;
407  if (system_parameters_.num_gpus == 0) {
408  is_rendering_enabled = false;
409  }
410 
411  const auto data_path =
412  boost::filesystem::path(base_data_path_) / shared::kDataDirectoryName;
413  // calculate the total amount of memory we need to reserve from each gpu that the Buffer
414  // manage cannot ask for
415  size_t total_reserved = reserved_gpu_mem_;
416  if (is_rendering_enabled) {
417  total_reserved += render_mem_bytes_;
418  }
419 
420  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cuda_mgr;
421 #ifdef HAVE_CUDA
422  if (!cpu_mode_only_ || is_rendering_enabled) {
423  try {
424  cuda_mgr = std::make_unique<CudaMgr_Namespace::CudaMgr>(
426  if (system_parameters_.num_gpus < 0) {
427  system_parameters_.num_gpus = cuda_mgr->getDeviceCount();
428  } else {
430  std::min(system_parameters_.num_gpus, cuda_mgr->getDeviceCount());
431  }
432  } catch (const std::exception& e) {
433  LOG(ERROR) << "Unable to instantiate CudaMgr, falling back to CPU-only mode. "
434  << e.what();
436  cpu_mode_only_ = true;
437  is_rendering_enabled = false;
438  }
439  }
440 #endif // HAVE_CUDA
441 
443 
444  try {
445  data_mgr_.reset(new Data_Namespace::DataMgr(data_path.string(),
447  std::move(cuda_mgr),
449  total_reserved,
452  } catch (const std::exception& e) {
453  LOG(FATAL) << "Failed to initialize data manager: " << e.what();
454  }
457  }
458 
459  std::string udf_ast_filename("");
460 
461  try {
462  if (!udf_filename_.empty()) {
463  const auto cuda_mgr = data_mgr_->getCudaMgr();
464  const CudaMgr_Namespace::NvidiaDeviceArch device_arch =
465  cuda_mgr ? cuda_mgr->getDeviceArch()
467  UdfCompiler compiler(device_arch, clang_path_, clang_options_);
468 
469  const auto [cpu_udf_ir_file, cuda_udf_ir_file] = compiler.compileUdf(udf_filename_);
470  Executor::addUdfIrToModule(cpu_udf_ir_file, /*is_cuda_ir=*/false);
471  if (!cuda_udf_ir_file.empty()) {
472  Executor::addUdfIrToModule(cuda_udf_ir_file, /*is_cuda_ir=*/true);
473  }
474  udf_ast_filename = compiler.getAstFileName(udf_filename_);
475  }
476  } catch (const std::exception& e) {
477  LOG(FATAL) << "Failed to initialize UDF compiler: " << e.what();
478  }
479 
480  try {
481  calcite_ =
482  std::make_shared<Calcite>(system_parameters_, base_data_path_, udf_ast_filename);
483  } catch (const std::exception& e) {
484  LOG(FATAL) << "Failed to initialize Calcite server: " << e.what();
485  }
486 
487  try {
488  ExtensionFunctionsWhitelist::add(calcite_->getExtensionFunctionWhitelist());
489  if (!udf_filename_.empty()) {
490  ExtensionFunctionsWhitelist::addUdfs(calcite_->getUserDefinedFunctionWhitelist());
491  }
492  } catch (const std::exception& e) {
493  LOG(FATAL) << "Failed to initialize extension functions: " << e.what();
494  }
495 
496  try {
498  } catch (const std::exception& e) {
499  LOG(FATAL) << "Failed to initialize table functions factory: " << e.what();
500  }
501 
502 #ifdef HAVE_RUNTIME_LIBS
503  try {
504 #ifdef HAVE_TORCH_TFS
505  RuntimeLibManager::loadRuntimeLibs(torch_lib_path_);
506 #else
508 #endif
509  } catch (const std::exception& e) {
510  LOG(ERROR) << "Failed to load runtime libraries: " << e.what();
511  LOG(ERROR) << "Support for runtime library table functions is disabled.";
512  }
513 #endif
514 
515  try {
516  auto udtfs = ThriftSerializers::to_thrift(
518  std::vector<TUserDefinedFunction> udfs = {};
519  calcite_->setRuntimeExtensionFunctions(udfs, udtfs, /*is_runtime=*/false);
520  } catch (const std::exception& e) {
521  LOG(FATAL) << "Failed to register compile-time table functions: " << e.what();
522  }
523 
524  if (!data_mgr_->gpusPresent() && !cpu_mode_only_) {
526  LOG(ERROR) << "No GPUs detected, falling back to CPU mode";
527  cpu_mode_only_ = true;
528  }
529 
530  LOG(INFO) << "Started in " << executor_device_type_ << " mode.";
531 
532  try {
534  SysCatalog::instance().init(base_data_path_,
535  data_mgr_,
537  calcite_,
538  is_new_db,
539  !db_leaves_.empty(),
541  } catch (const std::exception& e) {
542  LOG(FATAL) << "Failed to initialize system catalog: " << e.what();
543  }
544 
545  import_path_ = boost::filesystem::path(base_data_path_) / shared::kDefaultImportDirName;
546  start_time_ = std::time(nullptr);
547 
548  if (is_rendering_enabled) {
549  try {
550  render_handler_.reset(new RenderHandler(this,
554  false,
555  false,
561  } catch (const std::exception& e) {
562  LOG(ERROR) << "Backend rendering disabled: " << e.what();
563  }
564  }
565 
567 
568 #ifdef ENABLE_GEOS
569  if (!libgeos_so_filename_.empty()) {
570  g_libgeos_so_filename.reset(new std::string(libgeos_so_filename_));
571  LOG(INFO) << "Overriding default geos library with '" + *g_libgeos_so_filename + "'";
572  }
573 #endif
574 }
575 
577  shutdown();
578 }
579 
580 void DBHandler::check_read_only(const std::string& str) {
581  if (DBHandler::read_only_) {
582  THROW_DB_EXCEPTION(str + " disabled: server running in read-only mode.");
583  }
584 }
585 
587  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr) {
588  // We would create an in memory session for calcite with super user privileges which
589  // would be used for getting all tables metadata when a user runs the query. The
590  // session would be under the name of a proxy user/password which would only persist
591  // till server's lifetime or execution of calcite query(in memory) whichever is the
592  // earliest.
594  std::string session_id;
595  do {
597  } while (calcite_sessions_.find(session_id) != calcite_sessions_.end());
598  Catalog_Namespace::UserMetadata user_meta(-1,
599  calcite_->getInternalSessionProxyUserName(),
600  calcite_->getInternalSessionProxyPassword(),
601  true,
602  -1,
603  true,
604  false);
605  const auto emplace_ret = calcite_sessions_.emplace(
606  session_id,
607  std::make_shared<Catalog_Namespace::SessionInfo>(
608  catalog_ptr, user_meta, executor_device_type_, session_id));
609  CHECK(emplace_ret.second);
610  return session_id;
611 }
612 
613 void DBHandler::removeInMemoryCalciteSession(const std::string& session_id) {
614  // Remove InMemory calcite Session.
616  CHECK(calcite_sessions_.erase(session_id)) << session_id;
617 }
618 
619 // internal connection for connections with no password
620 void DBHandler::internal_connect(TSessionId& session_id,
621  const std::string& username,
622  const std::string& dbname) {
624  auto stdlog = STDLOG(); // session_id set by connect_impl()
625  std::string username2 = username; // login() may reset username given as argument
626  std::string dbname2 = dbname; // login() may reset dbname given as argument
628  std::shared_ptr<Catalog> cat = nullptr;
629  try {
630  cat =
631  SysCatalog::instance().login(dbname2, username2, std::string(), user_meta, false);
632  } catch (std::exception& e) {
633  THROW_DB_EXCEPTION(e.what());
634  }
635 
636  DBObject dbObject(dbname2, DatabaseDBObjectType);
637  dbObject.loadKey(*cat);
639  std::vector<DBObject> dbObjects;
640  dbObjects.push_back(dbObject);
641  if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
642  THROW_DB_EXCEPTION("Unauthorized Access: user " + user_meta.userLoggable() +
643  " is not allowed to access database " + dbname2 + ".");
644  }
645  connect_impl(session_id, std::string(), dbname2, user_meta, cat, stdlog);
646 }
647 
649  return leaf_aggregator_.leafCount() > 0;
650 }
651 
652 void DBHandler::krb5_connect(TKrb5Session& session,
653  const std::string& inputToken,
654  const std::string& dbname) {
655  THROW_DB_EXCEPTION("Unauthrorized Access. Kerberos login not supported");
656 }
657 
658 void DBHandler::connect(TSessionId& session_id,
659  const std::string& username,
660  const std::string& passwd,
661  const std::string& dbname) {
663  auto stdlog = STDLOG(); // session_info set by connect_impl()
664  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
665  std::string username2 = username; // login() may reset username given as argument
666  std::string dbname2 = dbname; // login() may reset dbname given as argument
668  std::shared_ptr<Catalog> cat = nullptr;
669  try {
670  cat = SysCatalog::instance().login(
671  dbname2, username2, passwd, user_meta, !super_user_rights_);
672  } catch (std::exception& e) {
673  stdlog.appendNameValuePairs("user", username, "db", dbname, "exception", e.what());
674  THROW_DB_EXCEPTION(e.what());
675  }
676 
677  DBObject dbObject(dbname2, DatabaseDBObjectType);
678  dbObject.loadKey(*cat);
680  std::vector<DBObject> dbObjects;
681  dbObjects.push_back(dbObject);
682  if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
683  stdlog.appendNameValuePairs(
684  "user", username, "db", dbname, "exception", "Missing Privileges");
685  THROW_DB_EXCEPTION("Unauthorized Access: user " + user_meta.userLoggable() +
686  " is not allowed to access database " + dbname2 + ".");
687  }
688  connect_impl(session_id, passwd, dbname2, user_meta, cat, stdlog);
689 
690  // if pki auth session_id will come back encrypted with user pubkey
691  SysCatalog::instance().check_for_session_encryption(passwd, session_id);
692 }
693 
694 void DBHandler::connect_impl(TSessionId& session_id,
695  const std::string& passwd,
696  const std::string& dbname,
697  const Catalog_Namespace::UserMetadata& user_meta,
698  std::shared_ptr<Catalog> cat,
699  query_state::StdLog& stdlog) {
700  // TODO(sy): Is there any reason to have dbname as a parameter
701  // here when the cat parameter already provides cat->name()?
702  // Should dbname and cat->name() ever differ?
703  auto session_ptr = sessions_store_->add(user_meta, cat, executor_device_type_);
704  session_id = session_ptr->get_session_id();
705  LOG(INFO) << "User " << user_meta.userLoggable() << " connected to database " << dbname;
706  stdlog.setSessionInfo(session_ptr);
707  session_ptr->set_connection_info(getConnectionInfo().toString());
708  if (!super_user_rights_) { // no need to connect to leaf_aggregator_ at this time
709  // while doing warmup
710  }
711  auto const roles =
712  stdlog.getConstSessionInfo()->get_currentUser().isSuper
713  ? std::vector<std::string>{{"super"}}
714  : SysCatalog::instance().getRoles(
715  false, false, stdlog.getConstSessionInfo()->get_currentUser().userName);
716  stdlog.appendNameValuePairs("roles", boost::algorithm::join(roles, ","));
717 }
718 
719 void DBHandler::disconnect(const TSessionId& session_id_or_json) {
720  heavyai::RequestInfo const request_info(session_id_or_json);
721  SET_REQUEST_ID(request_info.requestId());
722  auto session_ptr = get_session_ptr(request_info.sessionId());
723  auto stdlog = STDLOG(session_ptr, "client", getConnectionInfo().toString());
724  sessions_store_->disconnect(request_info.sessionId());
725 }
726 
728  const auto session_id = session_ptr->get_session_id();
729  std::exception_ptr leaf_exception = nullptr;
730  try {
731  if (leaf_aggregator_.leafCount() > 0) {
732  leaf_aggregator_.disconnect(session_id);
733  }
734  } catch (...) {
735  leaf_exception = std::current_exception();
736  }
737 
738  if (render_handler_) {
739  render_handler_->disconnect(session_id);
740  }
741 
742  if (leaf_exception) {
743  std::rethrow_exception(leaf_exception);
744  }
745 }
746 
747 void DBHandler::switch_database(const TSessionId& session_id_or_json,
748  const std::string& dbname) {
749  heavyai::RequestInfo const request_info(session_id_or_json);
750  SET_REQUEST_ID(request_info.requestId());
751  auto session_ptr = get_session_ptr(request_info.sessionId());
752  auto stdlog = STDLOG(session_ptr);
753  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
754  std::string dbname2 = dbname; // switchDatabase() may reset dbname given as argument
755  try {
756  std::shared_ptr<Catalog> cat = SysCatalog::instance().switchDatabase(
757  dbname2, session_ptr->get_currentUser().userName);
758  session_ptr->set_catalog_ptr(cat);
759  if (leaf_aggregator_.leafCount() > 0) {
760  leaf_aggregator_.switch_database(request_info.sessionId(), dbname);
761  return;
762  }
763  } catch (std::exception& e) {
764  THROW_DB_EXCEPTION(e.what());
765  }
766 }
767 
768 void DBHandler::clone_session(TSessionId& session2_id,
769  const TSessionId& session1_id_or_json) {
770  heavyai::RequestInfo const request_info(session1_id_or_json);
771  SET_REQUEST_ID(request_info.requestId());
772  auto session1_ptr = get_session_ptr(request_info.sessionId());
773  auto stdlog = STDLOG(session1_ptr);
774  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
775 
776  try {
777  const Catalog_Namespace::UserMetadata& user_meta = session1_ptr->get_currentUser();
778  std::shared_ptr<Catalog> cat = session1_ptr->get_catalog_ptr();
779  auto session2_ptr = sessions_store_->add(user_meta, cat, executor_device_type_);
780  session2_id = session2_ptr->get_session_id();
781  LOG(INFO) << "User " << user_meta.userLoggable() << " connected to database "
782  << cat->name();
783  if (leaf_aggregator_.leafCount() > 0) {
784  leaf_aggregator_.clone_session(request_info.sessionId(), session2_id);
785  return;
786  }
787  } catch (std::exception& e) {
788  THROW_DB_EXCEPTION(e.what());
789  }
790 }
791 
792 void DBHandler::interrupt(const TSessionId& query_session_id_or_json,
793  const TSessionId& interrupt_session_id_or_json) {
794  // if this is for distributed setting, query_session becomes a parent session (agg)
795  // and the interrupt session is one of existing session in the leaf node (leaf)
796  // so we can think there exists a logical mapping
797  // between query_session (agg) and interrupt_session (leaf)
798  heavyai::RequestInfo const query_request_info(query_session_id_or_json);
799  heavyai::RequestInfo const interrupt_request_info(interrupt_session_id_or_json);
800  SET_REQUEST_ID(interrupt_request_info.requestId());
801  auto session_ptr = get_session_ptr(interrupt_request_info.sessionId());
802  auto& cat = session_ptr->getCatalog();
803  auto stdlog = STDLOG(session_ptr);
804  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
805  const auto allow_query_interrupt =
807  if (g_enable_dynamic_watchdog || allow_query_interrupt) {
808  const auto dbname = cat.getCurrentDB().dbName;
810  jit_debug_ ? "/tmp" : "",
811  jit_debug_ ? "mapdquery" : "",
813  CHECK(executor);
814 
815  if (leaf_aggregator_.leafCount() > 0) {
816  leaf_aggregator_.interrupt(query_request_info.sessionId(),
817  interrupt_request_info.sessionId());
818  }
819  auto target_executor_ids =
820  executor->getExecutorIdsRunningQuery(query_request_info.sessionId());
821  if (target_executor_ids.empty()) {
823  executor->getSessionLock());
824  if (executor->checkIsQuerySessionEnrolled(query_request_info.sessionId(),
825  session_read_lock)) {
826  session_read_lock.unlock();
827  VLOG(1) << "Received interrupt: "
828  << "User " << session_ptr->get_currentUser().userLoggable()
829  << ", Database " << dbname << std::endl;
830  executor->interrupt(query_request_info.sessionId(),
831  interrupt_request_info.sessionId());
832  }
833  } else {
834  for (auto& executor_id : target_executor_ids) {
835  VLOG(1) << "Received interrupt: "
836  << "Executor " << executor_id << ", User "
837  << session_ptr->get_currentUser().userLoggable() << ", Database "
838  << dbname << std::endl;
839  auto target_executor = Executor::getExecutor(executor_id);
840  target_executor->interrupt(query_request_info.sessionId(),
841  interrupt_request_info.sessionId());
842  }
843  }
844 
845  LOG(INFO) << "User " << session_ptr->get_currentUser().userName
846  << " interrupted session with database " << dbname << std::endl;
847  }
848 }
849 
851  if (g_cluster) {
852  if (leaf_aggregator_.leafCount() > 0) {
853  return TRole::type::AGGREGATOR;
854  }
855  return TRole::type::LEAF;
856  }
857  return TRole::type::SERVER;
858 }
859 void DBHandler::get_server_status(TServerStatus& _return,
860  const TSessionId& session_id_or_json) {
861  heavyai::RequestInfo const request_info(session_id_or_json);
862  SET_REQUEST_ID(request_info.requestId());
863  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
864  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
865  const auto rendering_enabled = bool(render_handler_);
866  _return.read_only = read_only_;
867  _return.version = MAPD_RELEASE;
868  _return.rendering_enabled = rendering_enabled;
869  _return.start_time = start_time_;
870  _return.edition = MAPD_EDITION;
871  _return.host_name = heavyai::get_hostname();
872  _return.poly_rendering_enabled = rendering_enabled;
873  _return.role = getServerRole();
874  _return.renderer_status_json =
875  render_handler_ ? render_handler_->get_renderer_status_json() : "";
876 }
877 
878 void DBHandler::get_status(std::vector<TServerStatus>& _return,
879  const TSessionId& session_id_or_json) {
880  //
881  // get_status() is now called locally at startup on the aggregator
882  // in order to validate that all nodes of a cluster are running the
883  // same software version and the same renderer status
884  //
885  // In that context, it is called with the InvalidSessionID, and
886  // with the local super-user flag set.
887  //
888  // Hence, we allow this session-less mode only in distributed mode, and
889  // then on a leaf (always), or on the aggregator (only in super-user mode)
890  //
891  heavyai::RequestInfo const request_info(session_id_or_json);
892  SET_REQUEST_ID(request_info.requestId());
893  auto const allow_invalid_session = g_cluster && (!isAggregator() || super_user_rights_);
894 
895  if (!allow_invalid_session || request_info.sessionId() != getInvalidSessionId()) {
896  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
897  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
898  } else {
899  LOG(INFO) << "get_status() called in session-less mode";
900  }
901  const auto rendering_enabled = bool(render_handler_);
902  TServerStatus ret;
903  ret.read_only = read_only_;
904  ret.version = MAPD_RELEASE;
905  ret.rendering_enabled = rendering_enabled;
906  ret.start_time = start_time_;
907  ret.edition = MAPD_EDITION;
908  ret.host_name = heavyai::get_hostname();
909  ret.poly_rendering_enabled = rendering_enabled;
910  ret.role = getServerRole();
911  ret.renderer_status_json =
912  render_handler_ ? render_handler_->get_renderer_status_json() : "";
913  ret.host_id = "";
914 
915  _return.push_back(ret);
916  if (leaf_aggregator_.leafCount() > 0) {
917  std::vector<TServerStatus> leaf_status =
918  leaf_aggregator_.getLeafStatus(request_info.sessionId());
919  _return.insert(_return.end(), leaf_status.begin(), leaf_status.end());
920  }
921 }
922 
923 void DBHandler::get_hardware_info(TClusterHardwareInfo& _return,
924  const TSessionId& session_id_or_json) {
925  heavyai::RequestInfo const request_info(session_id_or_json);
926  SET_REQUEST_ID(request_info.requestId());
927  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
928  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
929  THardwareInfo ret;
930  const auto cuda_mgr = data_mgr_->getCudaMgr();
931  if (cuda_mgr) {
932  ret.num_gpu_hw = cuda_mgr->getDeviceCount();
933  ret.start_gpu = cuda_mgr->getStartGpu();
934  if (ret.start_gpu >= 0) {
935  ret.num_gpu_allocated = cuda_mgr->getDeviceCount() - cuda_mgr->getStartGpu();
936  // ^ This will break as soon as we allow non contiguous GPU allocations to MapD
937  }
938  for (int16_t device_id = 0; device_id < ret.num_gpu_hw; device_id++) {
939  TGpuSpecification gpu_spec;
940  auto deviceProperties = cuda_mgr->getDeviceProperties(device_id);
941  gpu_spec.num_sm = deviceProperties->numMPs;
942  gpu_spec.clock_frequency_kHz = deviceProperties->clockKhz;
943  gpu_spec.memory = deviceProperties->globalMem;
944  gpu_spec.compute_capability_major = deviceProperties->computeMajor;
945  gpu_spec.compute_capability_minor = deviceProperties->computeMinor;
946  ret.gpu_info.push_back(gpu_spec);
947  }
948  }
949 
950  // start hardware/OS dependent code
951  ret.num_cpu_hw = std::thread::hardware_concurrency();
952  // ^ This might return diffrent results in case of hyper threading
953  // end hardware/OS dependent code
954 
955  _return.hardware_info.push_back(ret);
956 }
957 
958 void DBHandler::get_session_info(TSessionInfo& _return,
959  const TSessionId& session_id_or_json) {
960  heavyai::RequestInfo const request_info(session_id_or_json);
961  SET_REQUEST_ID(request_info.requestId());
962  auto session_ptr = get_session_ptr(request_info.sessionId());
963  CHECK(session_ptr);
964  auto stdlog = STDLOG(session_ptr);
965  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
966  auto user_metadata = session_ptr->get_currentUser();
967  _return.user = user_metadata.userName;
968  _return.database = session_ptr->getCatalog().getCurrentDB().dbName;
969  _return.start_time = session_ptr->get_start_time();
970  _return.is_super = user_metadata.isSuper;
971 }
972 
973 void DBHandler::set_leaf_info(const TSessionId& session, const TLeafInfo& info) {
974  g_distributed_leaf_idx = info.leaf_id;
975  g_distributed_num_leaves = info.num_leaves;
976 }
977 
979  const SQLTypeInfo& ti,
980  TColumn& column) {
981  if (ti.is_array()) {
983  << "element types of arrays should always be nullable";
984  TColumn tColumn;
985  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
986  CHECK(array_tv);
987  bool is_null = !array_tv->is_initialized();
988  if (!is_null) {
989  const auto& vec = array_tv->get();
990  for (const auto& elem_tv : vec) {
991  value_to_thrift_column(elem_tv, ti.get_elem_type(), tColumn);
992  }
993  }
994  column.data.arr_col.push_back(tColumn);
995  column.nulls.push_back(is_null && !ti.get_notnull());
996  } else if (ti.is_geometry()) {
997  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
998  if (scalar_tv) {
999  auto s_n = boost::get<NullableString>(scalar_tv);
1000  auto s = boost::get<std::string>(s_n);
1001  if (s) {
1002  column.data.str_col.push_back(*s);
1003  } else {
1004  column.data.str_col.emplace_back(""); // null string
1005  auto null_p = boost::get<void*>(s_n);
1006  CHECK(null_p && !*null_p);
1007  }
1008  column.nulls.push_back(!s && !ti.get_notnull());
1009  } else {
1010  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
1011  CHECK(array_tv);
1012  bool is_null = !array_tv->is_initialized();
1013  if (!is_null) {
1014  auto elem_type = SQLTypeInfo(kDOUBLE, false);
1015  TColumn tColumn;
1016  const auto& vec = array_tv->get();
1017  for (const auto& elem_tv : vec) {
1018  value_to_thrift_column(elem_tv, elem_type, tColumn);
1019  }
1020  column.data.arr_col.push_back(tColumn);
1021  column.nulls.push_back(false);
1022  } else {
1023  TColumn tColumn;
1024  column.data.arr_col.push_back(tColumn);
1025  column.nulls.push_back(is_null && !ti.get_notnull());
1026  }
1027  }
1028  } else {
1029  CHECK(!ti.is_column());
1030  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
1031  CHECK(scalar_tv);
1032  if (boost::get<int64_t>(scalar_tv)) {
1033  int64_t data = *(boost::get<int64_t>(scalar_tv));
1034 
1035  if (ti.is_decimal()) {
1036  double val = static_cast<double>(data);
1037  if (ti.get_scale() > 0) {
1038  val /= pow(10.0, std::abs(ti.get_scale()));
1039  }
1040  column.data.real_col.push_back(val);
1041  } else {
1042  column.data.int_col.push_back(data);
1043  }
1044 
1045  switch (ti.get_type()) {
1046  case kBOOLEAN:
1047  column.nulls.push_back(data == NULL_BOOLEAN && !ti.get_notnull());
1048  break;
1049  case kTINYINT:
1050  column.nulls.push_back(data == NULL_TINYINT && !ti.get_notnull());
1051  break;
1052  case kSMALLINT:
1053  column.nulls.push_back(data == NULL_SMALLINT && !ti.get_notnull());
1054  break;
1055  case kINT:
1056  column.nulls.push_back(data == NULL_INT && !ti.get_notnull());
1057  break;
1058  case kNUMERIC:
1059  case kDECIMAL:
1060  case kBIGINT:
1061  column.nulls.push_back(data == NULL_BIGINT && !ti.get_notnull());
1062  break;
1063  case kTIME:
1064  case kTIMESTAMP:
1065  case kDATE:
1066  case kINTERVAL_DAY_TIME:
1067  case kINTERVAL_YEAR_MONTH:
1068  column.nulls.push_back(data == NULL_BIGINT && !ti.get_notnull());
1069  break;
1070  default:
1071  column.nulls.push_back(false);
1072  }
1073  } else if (boost::get<double>(scalar_tv)) {
1074  double data = *(boost::get<double>(scalar_tv));
1075  column.data.real_col.push_back(data);
1076  if (ti.get_type() == kFLOAT) {
1077  column.nulls.push_back(data == NULL_FLOAT && !ti.get_notnull());
1078  } else {
1079  column.nulls.push_back(data == NULL_DOUBLE && !ti.get_notnull());
1080  }
1081  } else if (boost::get<float>(scalar_tv)) {
1082  CHECK_EQ(kFLOAT, ti.get_type());
1083  float data = *(boost::get<float>(scalar_tv));
1084  column.data.real_col.push_back(data);
1085  column.nulls.push_back(data == NULL_FLOAT && !ti.get_notnull());
1086  } else if (boost::get<NullableString>(scalar_tv)) {
1087  auto s_n = boost::get<NullableString>(scalar_tv);
1088  auto s = boost::get<std::string>(s_n);
1089  if (s) {
1090  column.data.str_col.push_back(*s);
1091  } else {
1092  column.data.str_col.emplace_back(""); // null string
1093  auto null_p = boost::get<void*>(s_n);
1094  CHECK(null_p && !*null_p);
1095  }
1096  column.nulls.push_back(!s && !ti.get_notnull());
1097  } else {
1098  CHECK(false);
1099  }
1100  }
1101 }
1102 
1104  TDatum datum;
1105  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
1106  if (!scalar_tv) {
1107  CHECK(ti.is_array());
1109  << "element types of arrays should always be nullable";
1110  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
1111  CHECK(array_tv);
1112  if (array_tv->is_initialized()) {
1113  const auto& vec = array_tv->get();
1114  for (const auto& elem_tv : vec) {
1115  const auto scalar_col_val = value_to_thrift(elem_tv, ti.get_elem_type());
1116  datum.val.arr_val.push_back(scalar_col_val);
1117  }
1118  // Datum is not null, at worst it's an empty array Datum
1119  datum.is_null = false;
1120  } else {
1121  datum.is_null = true;
1122  }
1123  return datum;
1124  }
1125  if (boost::get<int64_t>(scalar_tv)) {
1126  int64_t data = *(boost::get<int64_t>(scalar_tv));
1127 
1128  if (ti.is_decimal()) {
1129  double val = static_cast<double>(data);
1130  if (ti.get_scale() > 0) {
1131  val /= pow(10.0, std::abs(ti.get_scale()));
1132  }
1133  datum.val.real_val = val;
1134  } else {
1135  datum.val.int_val = data;
1136  }
1137 
1138  switch (ti.get_type()) {
1139  case kBOOLEAN:
1140  datum.is_null = (datum.val.int_val == NULL_BOOLEAN);
1141  break;
1142  case kTINYINT:
1143  datum.is_null = (datum.val.int_val == NULL_TINYINT);
1144  break;
1145  case kSMALLINT:
1146  datum.is_null = (datum.val.int_val == NULL_SMALLINT);
1147  break;
1148  case kINT:
1149  datum.is_null = (datum.val.int_val == NULL_INT);
1150  break;
1151  case kDECIMAL:
1152  case kNUMERIC:
1153  case kBIGINT:
1154  datum.is_null = (datum.val.int_val == NULL_BIGINT);
1155  break;
1156  case kTIME:
1157  case kTIMESTAMP:
1158  case kDATE:
1159  case kINTERVAL_DAY_TIME:
1160  case kINTERVAL_YEAR_MONTH:
1161  datum.is_null = (datum.val.int_val == NULL_BIGINT);
1162  break;
1163  default:
1164  datum.is_null = false;
1165  }
1166  } else if (boost::get<double>(scalar_tv)) {
1167  datum.val.real_val = *(boost::get<double>(scalar_tv));
1168  if (ti.get_type() == kFLOAT) {
1169  datum.is_null = (datum.val.real_val == NULL_FLOAT);
1170  } else {
1171  datum.is_null = (datum.val.real_val == NULL_DOUBLE);
1172  }
1173  } else if (boost::get<float>(scalar_tv)) {
1174  CHECK_EQ(kFLOAT, ti.get_type());
1175  datum.val.real_val = *(boost::get<float>(scalar_tv));
1176  datum.is_null = (datum.val.real_val == NULL_FLOAT);
1177  } else if (boost::get<NullableString>(scalar_tv)) {
1178  auto s_n = boost::get<NullableString>(scalar_tv);
1179  auto s = boost::get<std::string>(s_n);
1180  if (s) {
1181  datum.val.str_val = *s;
1182  } else {
1183  auto null_p = boost::get<void*>(s_n);
1184  CHECK(null_p && !*null_p);
1185  }
1186  datum.is_null = !s;
1187  } else {
1188  CHECK(false);
1189  }
1190  return datum;
1191 }
1192 
1194  TQueryResult& _return,
1195  const QueryStateProxy& query_state_proxy,
1196  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
1197  const std::string& query_str,
1198  const bool column_format,
1199  const std::string& nonce,
1200  const int32_t first_n,
1201  const int32_t at_most_n,
1202  const bool use_calcite) {
1203  _return.total_time_ms = 0;
1204  _return.nonce = nonce;
1205  ParserWrapper pw{query_str};
1206  switch (pw.getQueryType()) {
1208  _return.query_type = TQueryType::READ;
1209  VLOG(1) << "query type: READ";
1210  break;
1211  }
1213  _return.query_type = TQueryType::WRITE;
1214  VLOG(1) << "query type: WRITE";
1215  break;
1216  }
1218  _return.query_type = TQueryType::SCHEMA_READ;
1219  VLOG(1) << "query type: SCHEMA READ";
1220  break;
1221  }
1223  _return.query_type = TQueryType::SCHEMA_WRITE;
1224  VLOG(1) << "query type: SCHEMA WRITE";
1225  break;
1226  }
1227  default: {
1228  _return.query_type = TQueryType::UNKNOWN;
1229  LOG(WARNING) << "query type: UNKNOWN";
1230  break;
1231  }
1232  }
1233 
1236  _return.total_time_ms += measure<>::execution([&]() {
1238  query_state_proxy,
1239  column_format,
1240  session_ptr->get_executor_device_type(),
1241  first_n,
1242  at_most_n,
1243  use_calcite,
1244  locks);
1246  _return, result, query_state_proxy, column_format, first_n, at_most_n);
1247  });
1248 }
1249 
1250 void DBHandler::convertData(TQueryResult& _return,
1252  const QueryStateProxy& query_state_proxy,
1253  const bool column_format,
1254  const int32_t first_n,
1255  const int32_t at_most_n) {
1256  _return.execution_time_ms += result.getExecutionTime();
1257  if (result.empty()) {
1258  return;
1259  }
1260 
1261  switch (result.getResultType()) {
1263  convertRows(_return,
1264  query_state_proxy,
1265  result.getTargetsMeta(),
1266  *result.getRows(),
1267  column_format,
1268  first_n,
1269  at_most_n);
1270  break;
1272  convertResult(_return, *result.getRows(), true);
1273  break;
1275  convertExplain(_return, *result.getRows(), true);
1276  break;
1278  convertRows(_return,
1279  query_state_proxy,
1280  result.getTargetsMeta(),
1281  *result.getRows(),
1282  column_format,
1283  -1,
1284  -1);
1285  break;
1286  }
1287 }
1288 
1289 void DBHandler::sql_execute(TQueryResult& _return,
1290  const TSessionId& session_id_or_json,
1291  const std::string& query_str,
1292  const bool column_format,
1293  const std::string& nonce,
1294  const int32_t first_n,
1295  const int32_t at_most_n) {
1296  heavyai::RequestInfo const request_info(session_id_or_json);
1297  SET_REQUEST_ID(request_info.requestId());
1298  const std::string exec_ra_prefix = "execute relalg";
1299  const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1300  auto actual_query =
1301  use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1302  auto session_ptr = get_session_ptr(request_info.sessionId());
1303  auto query_state = create_query_state(session_ptr, actual_query);
1304  auto stdlog = STDLOG(session_ptr, query_state);
1305  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
1306  stdlog.appendNameValuePairs("nonce", nonce);
1307  auto timer = DEBUG_TIMER(__func__);
1308  try {
1309  ScopeGuard reset_was_deferred_copy_from = [this, &session_ptr] {
1310  deferred_copy_from_sessions.remove(session_ptr->get_session_id());
1311  };
1312 
1313  if (first_n >= 0 && at_most_n >= 0) {
1314  THROW_DB_EXCEPTION(std::string("At most one of first_n and at_most_n can be set"));
1315  }
1316 
1317  if (leaf_aggregator_.leafCount() > 0) {
1318  if (!agg_handler_) {
1319  THROW_DB_EXCEPTION("Distributed support is disabled.");
1320  }
1321  _return.total_time_ms = measure<>::execution([&]() {
1322  agg_handler_->cluster_execute(_return,
1323  query_state->createQueryStateProxy(),
1324  query_state->getQueryStr(),
1325  column_format,
1326  nonce,
1327  first_n,
1328  at_most_n,
1330  });
1331  _return.nonce = nonce;
1332  } else {
1333  sql_execute_local(_return,
1334  query_state->createQueryStateProxy(),
1335  session_ptr,
1336  actual_query,
1337  column_format,
1338  nonce,
1339  first_n,
1340  at_most_n,
1341  use_calcite);
1342  }
1343  _return.total_time_ms += process_deferred_copy_from(request_info.sessionId());
1344  std::string debug_json = timer.stopAndGetJson();
1345  if (!debug_json.empty()) {
1346  _return.__set_debug(std::move(debug_json));
1347  }
1348  stdlog.appendNameValuePairs(
1349  "execution_time_ms",
1350  _return.execution_time_ms,
1351  "total_time_ms", // BE-3420 - Redundant with duration field
1352  stdlog.duration<std::chrono::milliseconds>());
1353  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
1354  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
1355  } catch (const std::exception& e) {
1356  if (strstr(e.what(), "java.lang.NullPointerException")) {
1357  THROW_DB_EXCEPTION("query failed from broken view or other schema related issue");
1358  } else if (strstr(e.what(), "SQL Error: Encountered \";\"")) {
1359  THROW_DB_EXCEPTION("multiple SQL statements not allowed");
1360  } else if (strstr(e.what(), "SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1361  THROW_DB_EXCEPTION("empty SQL statment not allowed");
1362  } else {
1363  THROW_DB_EXCEPTION(e.what());
1364  }
1365  }
1366 }
1367 
1369  const TSessionId& session_id_or_json,
1370  const std::string& query_str,
1371  const bool column_format,
1372  const int32_t first_n,
1373  const int32_t at_most_n,
1375  heavyai::RequestInfo const request_info(session_id_or_json);
1376  SET_REQUEST_ID(request_info.requestId());
1377  const std::string exec_ra_prefix = "execute relalg";
1378  const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1379  auto actual_query =
1380  use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1381 
1382  auto session_ptr = get_session_ptr(request_info.sessionId());
1383  CHECK(session_ptr);
1384  auto query_state = create_query_state(session_ptr, actual_query);
1385  auto stdlog = STDLOG(session_ptr, query_state);
1386  auto timer = DEBUG_TIMER(__func__);
1387 
1388  try {
1389  ScopeGuard reset_was_deferred_copy_from = [this, &session_ptr] {
1390  deferred_copy_from_sessions.remove(session_ptr->get_session_id());
1391  };
1392 
1393  if (first_n >= 0 && at_most_n >= 0) {
1394  THROW_DB_EXCEPTION(std::string("At most one of first_n and at_most_n can be set"));
1395  }
1396  auto total_time_ms = measure<>::execution([&]() {
1398  query_state->createQueryStateProxy(),
1399  column_format,
1400  session_ptr->get_executor_device_type(),
1401  first_n,
1402  at_most_n,
1403  use_calcite,
1404  locks);
1405  });
1406 
1407  _return.setExecutionTime(total_time_ms +
1408  process_deferred_copy_from(request_info.sessionId()));
1409 
1410  stdlog.appendNameValuePairs(
1411  "execution_time_ms",
1412  _return.getExecutionTime(),
1413  "total_time_ms", // BE-3420 - Redundant with duration field
1414  stdlog.duration<std::chrono::milliseconds>());
1415  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
1416  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
1417  } catch (const std::exception& e) {
1418  if (strstr(e.what(), "java.lang.NullPointerException")) {
1419  THROW_DB_EXCEPTION("query failed from broken view or other schema related issue");
1420  } else if (strstr(e.what(), "SQL Error: Encountered \";\"")) {
1421  THROW_DB_EXCEPTION("multiple SQL statements not allowed");
1422  } else if (strstr(e.what(), "SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1423  THROW_DB_EXCEPTION("empty SQL statment not allowed");
1424  } else {
1425  THROW_DB_EXCEPTION(e.what());
1426  }
1427  }
1428 }
1429 
1430 int64_t DBHandler::process_deferred_copy_from(const TSessionId& session_id) {
1431  int64_t total_time_ms(0);
1432  // if the SQL statement we just executed was a geo COPY FROM, the import
1433  // parameters were captured, and this flag set, so we do the actual import here
1434  if (auto deferred_copy_from_state = deferred_copy_from_sessions(session_id)) {
1435  // import_geo_table() calls create_table() which calls this function to
1436  // do the work, so reset the flag now to avoid executing this part a
1437  // second time at the end of that, which would fail as the table was
1438  // already created! Also reset the flag with a ScopeGuard on exiting
1439  // this function any other way, such as an exception from the code above!
1440  deferred_copy_from_sessions.remove(session_id);
1441 
1442  // create table as replicated?
1443  TCreateParams create_params;
1444  if (deferred_copy_from_state->partitions == "REPLICATED") {
1445  create_params.is_replicated = true;
1446  }
1447 
1448  // now do (and time) the import
1449  total_time_ms = measure<>::execution([&]() {
1450  importGeoTableGlobFilterSort(session_id,
1451  deferred_copy_from_state->table,
1452  deferred_copy_from_state->file_name,
1453  deferred_copy_from_state->copy_params,
1454  TRowDescriptor(),
1455  create_params);
1456  });
1457  }
1458  return total_time_ms;
1459 }
1460 
1461 void DBHandler::sql_execute_df(TDataFrame& _return,
1462  const TSessionId& session_id_or_json,
1463  const std::string& query_str,
1464  const TDeviceType::type results_device_type,
1465  const int32_t device_id,
1466  const int32_t first_n,
1467  const TArrowTransport::type transport_method) {
1468  heavyai::RequestInfo const request_info(session_id_or_json);
1469  SET_REQUEST_ID(request_info.requestId());
1470  auto session_ptr = get_session_ptr(request_info.sessionId());
1471  CHECK(session_ptr);
1472  auto query_state = create_query_state(session_ptr, query_str);
1473  auto stdlog = STDLOG(session_ptr, query_state);
1474 
1475  const auto executor_device_type = session_ptr->get_executor_device_type();
1476 
1477  if (results_device_type == TDeviceType::GPU) {
1478  if (executor_device_type != ExecutorDeviceType::GPU) {
1479  THROW_DB_EXCEPTION(std::string("GPU mode is not allowed in this session"));
1480  }
1481  if (!data_mgr_->gpusPresent()) {
1482  THROW_DB_EXCEPTION(std::string("No GPU is available in this server"));
1483  }
1484  if (device_id < 0 || device_id >= data_mgr_->getCudaMgr()->getDeviceCount()) {
1486  std::string("Invalid device_id or unavailable GPU with this ID"));
1487  }
1488  }
1489  ParserWrapper pw{query_str};
1490  if (pw.getQueryType() != ParserWrapper::QueryType::Read) {
1491  THROW_DB_EXCEPTION(std::string(
1492  "Only read queries supported for the Arrow sql_execute_df endpoint."));
1493  }
1494  if (ExplainInfo(query_str).isCalciteExplain()) {
1495  THROW_DB_EXCEPTION(std::string(
1496  "Explain is currently unsupported by the Arrow sql_execute_df endpoint."));
1497  }
1498 
1499  ExecutionResult execution_result;
1501  sql_execute_impl(execution_result,
1502  query_state->createQueryStateProxy(),
1503  true, /* column_format - does this do anything? */
1504  executor_device_type,
1505  first_n,
1506  -1, /* at_most_n */
1507  true,
1508  locks);
1509 
1510  const auto result_set = execution_result.getRows();
1511  const auto executor_results_device_type = results_device_type == TDeviceType::CPU
1514  _return.execution_time_ms =
1515  execution_result.getExecutionTime() - result_set->getQueueTime();
1516  const auto converter = std::make_unique<ArrowResultSetConverter>(
1517  result_set,
1518  data_mgr_,
1519  executor_results_device_type,
1520  device_id,
1521  getTargetNames(execution_result.getTargetsMeta()),
1522  first_n,
1523  ArrowTransport(transport_method));
1524  ArrowResult arrow_result;
1525  _return.arrow_conversion_time_ms +=
1526  measure<>::execution([&] { arrow_result = converter->getArrowResult(); });
1527  _return.sm_handle =
1528  std::string(arrow_result.sm_handle.begin(), arrow_result.sm_handle.end());
1529  _return.sm_size = arrow_result.sm_size;
1530  _return.df_handle =
1531  std::string(arrow_result.df_handle.begin(), arrow_result.df_handle.end());
1532  _return.df_buffer =
1533  std::string(arrow_result.df_buffer.begin(), arrow_result.df_buffer.end());
1534  if (executor_results_device_type == ExecutorDeviceType::GPU) {
1535  std::lock_guard<std::mutex> map_lock(handle_to_dev_ptr_mutex_);
1536  CHECK(!ipc_handle_to_dev_ptr_.count(_return.df_handle));
1537  ipc_handle_to_dev_ptr_.insert(
1538  std::make_pair(_return.df_handle, arrow_result.serialized_cuda_handle));
1539  }
1540  _return.df_size = arrow_result.df_size;
1541 }
1542 
1543 void DBHandler::sql_execute_gdf(TDataFrame& _return,
1544  const TSessionId& session_id_or_json,
1545  const std::string& query_str,
1546  const int32_t device_id,
1547  const int32_t first_n) {
1548  heavyai::RequestInfo request_info(session_id_or_json);
1549  SET_REQUEST_ID(request_info.requestId());
1550  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1551  request_info.setRequestId(logger::request_id());
1552  sql_execute_df(_return,
1553  request_info.json(),
1554  query_str,
1555  TDeviceType::GPU,
1556  device_id,
1557  first_n,
1558  TArrowTransport::SHARED_MEMORY);
1559 }
1560 
1561 // For now we have only one user of a data frame in all cases.
1562 void DBHandler::deallocate_df(const TSessionId& session_id_or_json,
1563  const TDataFrame& df,
1564  const TDeviceType::type device_type,
1565  const int32_t device_id) {
1566  heavyai::RequestInfo const request_info(session_id_or_json);
1567  SET_REQUEST_ID(request_info.requestId());
1568  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1569  std::string serialized_cuda_handle = "";
1570  if (device_type == TDeviceType::GPU) {
1571  std::lock_guard<std::mutex> map_lock(handle_to_dev_ptr_mutex_);
1572  if (ipc_handle_to_dev_ptr_.count(df.df_handle) != size_t(1)) {
1573  TDBException ex;
1574  ex.error_msg = std::string(
1575  "Current data frame handle is not bookkept or been inserted "
1576  "twice");
1577  LOG(ERROR) << ex.error_msg;
1578  throw ex;
1579  }
1580  serialized_cuda_handle = ipc_handle_to_dev_ptr_[df.df_handle];
1581  ipc_handle_to_dev_ptr_.erase(df.df_handle);
1582  }
1583  std::vector<char> sm_handle(df.sm_handle.begin(), df.sm_handle.end());
1584  std::vector<char> df_handle(df.df_handle.begin(), df.df_handle.end());
1586  sm_handle, df.sm_size, df_handle, df.df_size, serialized_cuda_handle};
1588  result,
1589  device_type == TDeviceType::CPU ? ExecutorDeviceType::CPU : ExecutorDeviceType::GPU,
1590  device_id,
1591  data_mgr_);
1592 }
1593 
1594 void DBHandler::sql_validate(TRowDescriptor& _return,
1595  const TSessionId& session_id_or_json,
1596  const std::string& query_str) {
1597  heavyai::RequestInfo const request_info(session_id_or_json);
1598  SET_REQUEST_ID(request_info.requestId());
1599  try {
1600  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1601  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
1602  auto query_state = create_query_state(stdlog.getSessionInfo(), query_str);
1603  stdlog.setQueryState(query_state);
1604 
1605  ParserWrapper pw{query_str};
1606  if (ExplainInfo(query_str).isExplain() || pw.is_ddl || pw.is_update_dml) {
1607  throw std::runtime_error("Can only validate SELECT statements.");
1608  }
1609 
1610  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
1611 
1612  TPlanResult parse_result;
1614  std::tie(parse_result, locks) = parse_to_ra(query_state->createQueryStateProxy(),
1615  query_state->getQueryStr(),
1616  {},
1617  true,
1619  /*check_privileges=*/true);
1620  const auto query_ra = parse_result.plan_result;
1621  _return = validateRelAlg(query_ra, query_state->createQueryStateProxy());
1622  } catch (const std::exception& e) {
1623  THROW_DB_EXCEPTION(std::string(e.what()));
1624  }
1625 }
1626 
1627 namespace {
1628 
1630  std::unordered_set<std::string> uc_column_names;
1631  std::unordered_set<std::string> uc_column_table_qualifiers;
1632 };
1633 
1634 // Extract what looks like a (qualified) identifier from the partial query.
1635 // The results will be used to rank the auto-completion results: tables which
1636 // contain at least one of the identifiers first.
1638  const std::string& sql) {
1639  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1640  boost::regex::extended | boost::regex::icase};
1641  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1642  boost::sregex_token_iterator end;
1643  std::unordered_set<std::string> uc_column_names;
1644  std::unordered_set<std::string> uc_column_table_qualifiers;
1645  for (; tok_it != end; ++tok_it) {
1646  std::string column_name = *tok_it;
1647  std::vector<std::string> column_tokens;
1648  boost::split(column_tokens, column_name, boost::is_any_of("."));
1649  if (column_tokens.size() == 2) {
1650  // If the column name is qualified, take user's word.
1651  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1652  } else {
1653  uc_column_names.insert(to_upper(column_name));
1654  }
1655  }
1656  return {uc_column_names, uc_column_table_qualifiers};
1657 }
1658 
1659 } // namespace
1660 
1661 void DBHandler::get_completion_hints(std::vector<TCompletionHint>& hints,
1662  const TSessionId& session_id_or_json,
1663  const std::string& sql,
1664  const int cursor) {
1665  heavyai::RequestInfo const request_info(session_id_or_json);
1666  SET_REQUEST_ID(request_info.requestId());
1667  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1668  std::vector<std::string> visible_tables; // Tables allowed for the given session.
1669  get_completion_hints_unsorted(hints, visible_tables, stdlog, sql, cursor);
1670  const auto proj_tokens = extract_projection_tokens_for_completion(sql);
1671  auto compatible_table_names = get_uc_compatible_table_names_by_column(
1672  proj_tokens.uc_column_names, visible_tables, stdlog);
1673  // Add the table qualifiers explicitly specified by the user.
1674  compatible_table_names.insert(proj_tokens.uc_column_table_qualifiers.begin(),
1675  proj_tokens.uc_column_table_qualifiers.end());
1676  // Sort the hints by category, from COLUMN (most specific) to KEYWORD.
1677  std::sort(
1678  hints.begin(),
1679  hints.end(),
1680  [&compatible_table_names](const TCompletionHint& lhs, const TCompletionHint& rhs) {
1681  if (lhs.type == TCompletionHintType::TABLE &&
1682  rhs.type == TCompletionHintType::TABLE) {
1683  // Between two tables, one which is compatible with the specified
1684  // projections and one which isn't, pick the one which is compatible.
1685  if (compatible_table_names.find(to_upper(lhs.hints.back())) !=
1686  compatible_table_names.end() &&
1687  compatible_table_names.find(to_upper(rhs.hints.back())) ==
1688  compatible_table_names.end()) {
1689  return true;
1690  }
1691  }
1692  return lhs.type < rhs.type;
1693  });
1694 }
1695 
1696 void DBHandler::get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
1697  std::vector<std::string>& visible_tables,
1698  query_state::StdLog& stdlog,
1699  const std::string& sql,
1700  const int cursor) {
1701  const auto& session_info = *stdlog.getConstSessionInfo();
1702  try {
1703  get_tables_impl(visible_tables, session_info, GET_PHYSICAL_TABLES_AND_VIEWS);
1704 
1705  // Filter out keywords suggested by Calcite which we don't support.
1707  calcite_->getCompletionHints(session_info, visible_tables, sql, cursor));
1708  } catch (const std::exception& e) {
1709  TDBException ex;
1710  ex.error_msg = std::string(e.what());
1711  LOG(ERROR) << ex.error_msg;
1712  throw ex;
1713  }
1714  boost::regex from_expr{R"(\s+from\s+)", boost::regex::extended | boost::regex::icase};
1715  const size_t length_to_cursor =
1716  cursor < 0 ? sql.size() : std::min(sql.size(), static_cast<size_t>(cursor));
1717  // Trust hints from Calcite after the FROM keyword.
1718  if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, from_expr)) {
1719  return;
1720  }
1721  // Before FROM, the query is too incomplete for context-sensitive completions.
1722  get_token_based_completions(hints, stdlog, visible_tables, sql, cursor);
1723 }
1724 
1725 void DBHandler::get_token_based_completions(std::vector<TCompletionHint>& hints,
1726  query_state::StdLog& stdlog,
1727  std::vector<std::string>& visible_tables,
1728  const std::string& sql,
1729  const int cursor) {
1730  const auto last_word =
1731  find_last_word_from_cursor(sql, cursor < 0 ? sql.size() : cursor);
1732  boost::regex select_expr{R"(\s*select\s+)",
1733  boost::regex::extended | boost::regex::icase};
1734  const size_t length_to_cursor =
1735  cursor < 0 ? sql.size() : std::min(sql.size(), static_cast<size_t>(cursor));
1736  // After SELECT but before FROM, look for all columns in all tables which match the
1737  // prefix.
1738  if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, select_expr)) {
1739  const auto column_names_by_table = fill_column_names_by_table(visible_tables, stdlog);
1740  // Trust the fully qualified columns the most.
1741  if (get_qualified_column_hints(hints, last_word, column_names_by_table)) {
1742  return;
1743  }
1744  // Not much information to use, just retrieve column names which match the prefix.
1745  if (should_suggest_column_hints(sql)) {
1746  get_column_hints(hints, last_word, column_names_by_table);
1747  return;
1748  }
1749  const std::string kFromKeyword{"FROM"};
1750  if (boost::istarts_with(kFromKeyword, last_word)) {
1751  TCompletionHint keyword_hint;
1752  keyword_hint.type = TCompletionHintType::KEYWORD;
1753  keyword_hint.replaced = last_word;
1754  keyword_hint.hints.emplace_back(kFromKeyword);
1755  hints.push_back(keyword_hint);
1756  }
1757  } else {
1758  const std::string kSelectKeyword{"SELECT"};
1759  if (boost::istarts_with(kSelectKeyword, last_word)) {
1760  TCompletionHint keyword_hint;
1761  keyword_hint.type = TCompletionHintType::KEYWORD;
1762  keyword_hint.replaced = last_word;
1763  keyword_hint.hints.emplace_back(kSelectKeyword);
1764  hints.push_back(keyword_hint);
1765  }
1766  }
1767 }
1768 
1769 std::unordered_map<std::string, std::unordered_set<std::string>>
1770 DBHandler::fill_column_names_by_table(std::vector<std::string>& table_names,
1771  query_state::StdLog& stdlog) {
1772  std::unordered_map<std::string, std::unordered_set<std::string>> column_names_by_table;
1773  for (auto it = table_names.begin(); it != table_names.end();) {
1774  TTableDetails table_details;
1775  try {
1776  get_table_details_impl(table_details, stdlog, *it, false, false);
1777  } catch (const TDBException& e) {
1778  // Remove the corrupted Table/View name from the list for further processing.
1779  it = table_names.erase(it);
1780  continue;
1781  }
1782  for (const auto& column_type : table_details.row_desc) {
1783  column_names_by_table[*it].emplace(column_type.col_name);
1784  }
1785  ++it;
1786  }
1787  return column_names_by_table;
1788 }
1789 
1793 }
1794 
1796  const std::unordered_set<std::string>& uc_column_names,
1797  std::vector<std::string>& table_names,
1798  query_state::StdLog& stdlog) {
1799  std::unordered_set<std::string> compatible_table_names_by_column;
1800  for (auto it = table_names.begin(); it != table_names.end();) {
1801  TTableDetails table_details;
1802  try {
1803  get_table_details_impl(table_details, stdlog, *it, false, false);
1804  } catch (const TDBException& e) {
1805  // Remove the corrupted Table/View name from the list for further processing.
1806  it = table_names.erase(it);
1807  continue;
1808  }
1809  for (const auto& column_type : table_details.row_desc) {
1810  if (uc_column_names.find(to_upper(column_type.col_name)) != uc_column_names.end()) {
1811  compatible_table_names_by_column.emplace(to_upper(*it));
1812  break;
1813  }
1814  }
1815  ++it;
1816  }
1817  return compatible_table_names_by_column;
1818 }
1819 
1820 void DBHandler::dispatch_query_task(std::shared_ptr<QueryDispatchQueue::Task> query_task,
1821  const bool is_update_delete) {
1823  dispatch_queue_->submit(std::move(query_task), is_update_delete);
1824 }
1825 
1826 TRowDescriptor DBHandler::validateRelAlg(const std::string& query_ra,
1827  QueryStateProxy query_state_proxy) {
1828  TQueryResult query_result;
1829  ExecutionResult execution_result;
1830  auto execute_rel_alg_task = std::make_shared<QueryDispatchQueue::Task>(
1831  [this,
1832  &execution_result,
1833  query_state_proxy,
1834  &query_ra,
1835  parent_thread_local_ids =
1836  logger::thread_local_ids()](const size_t executor_index) {
1837  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
1838  execute_rel_alg(execution_result,
1839  query_state_proxy,
1840  query_ra,
1841  true,
1843  -1,
1844  -1,
1845  /*just_validate=*/true,
1846  /*find_filter_push_down_candidates=*/false,
1847  ExplainInfo(),
1848  executor_index);
1849  });
1850  dispatch_query_task(execute_rel_alg_task, /*is_update_delete=*/false);
1851  auto result_future = execute_rel_alg_task->get_future();
1852  result_future.get();
1853  DBHandler::convertData(query_result, execution_result, query_state_proxy, true, -1, -1);
1854 
1855  const auto& row_desc = query_result.row_set.row_desc;
1856  const auto& targets_meta = execution_result.getTargetsMeta();
1857  CHECK_EQ(row_desc.size(), targets_meta.size());
1858 
1859  // TODO: Below fixup logic should no longer be needed after the comp_param refactor
1860  TRowDescriptor fixedup_row_desc;
1861  for (size_t i = 0; i < row_desc.size(); i++) {
1862  const auto& col_desc = row_desc[i];
1863  auto fixedup_col_desc = col_desc;
1864  if (col_desc.col_type.encoding == TEncodingType::DICT &&
1865  col_desc.col_type.comp_param > 0) {
1866  const auto& type_info = targets_meta[i].get_type_info();
1867  CHECK_EQ(type_info.get_compression(), kENCODING_DICT);
1869  type_info.getStringDictKey().db_id);
1870  const auto dd = cat->getMetadataForDict(col_desc.col_type.comp_param, false);
1871  CHECK(dd);
1872  fixedup_col_desc.col_type.comp_param = dd->dictNBits;
1873  }
1874  fixedup_row_desc.push_back(fixedup_col_desc);
1875  }
1876  return fixedup_row_desc;
1877 }
1878 
1879 void DBHandler::get_roles(std::vector<std::string>& roles,
1880  const TSessionId& session_id_or_json) {
1881  heavyai::RequestInfo const request_info(session_id_or_json);
1882  SET_REQUEST_ID(request_info.requestId());
1883  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1884  auto session_ptr = stdlog.getConstSessionInfo();
1885  if (!session_ptr->get_currentUser().isSuper) {
1886  // WARNING: This appears to not include roles a user is a member of,
1887  // if the role has no permissions granted to it.
1888  roles =
1889  SysCatalog::instance().getRoles(session_ptr->get_currentUser().userName,
1890  session_ptr->getCatalog().getCurrentDB().dbId);
1891  } else {
1892  roles = SysCatalog::instance().getRoles(
1893  false, true, session_ptr->get_currentUser().userName);
1894  }
1895 }
1896 
1897 bool DBHandler::has_role(const TSessionId& session_id_or_json,
1898  const std::string& granteeName,
1899  const std::string& roleName) {
1900  heavyai::RequestInfo const request_info(session_id_or_json);
1901  SET_REQUEST_ID(request_info.requestId());
1902  const auto session_ptr = get_session_ptr(request_info.sessionId());
1903  const auto stdlog = STDLOG(session_ptr);
1904  const auto current_user = session_ptr->get_currentUser();
1905  if (!current_user.isSuper) {
1906  if (const auto* user = SysCatalog::instance().getUserGrantee(granteeName);
1907  user && current_user.userName != granteeName) {
1908  THROW_DB_EXCEPTION("Only super users can check other user's roles.");
1909  } else if (!SysCatalog::instance().isRoleGrantedToGrantee(
1910  current_user.userName, granteeName, true)) {
1912  "Only super users can check roles assignment that have not been directly "
1913  "granted to a user.");
1914  }
1915  }
1916  return SysCatalog::instance().isRoleGrantedToGrantee(granteeName, roleName, false);
1917 }
1918 
1919 static TDBObject serialize_db_object(const std::string& roleName,
1920  const DBObject& inObject) {
1921  TDBObject outObject;
1922  outObject.objectName = inObject.getName();
1923  outObject.grantee = roleName;
1924  outObject.objectId = inObject.getObjectKey().objectId;
1925  const auto ap = inObject.getPrivileges();
1926  switch (inObject.getObjectKey().permissionType) {
1927  case DatabaseDBObjectType:
1928  outObject.privilegeObjectType = TDBObjectType::DatabaseDBObjectType;
1929  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::CREATE_DATABASE));
1930  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::DROP_DATABASE));
1931  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::VIEW_SQL_EDITOR));
1932  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::ACCESS));
1933 
1934  break;
1935  case TableDBObjectType:
1936  outObject.privilegeObjectType = TDBObjectType::TableDBObjectType;
1937  outObject.privs.push_back(ap.hasPermission(TablePrivileges::CREATE_TABLE));
1938  outObject.privs.push_back(ap.hasPermission(TablePrivileges::DROP_TABLE));
1939  outObject.privs.push_back(ap.hasPermission(TablePrivileges::SELECT_FROM_TABLE));
1940  outObject.privs.push_back(ap.hasPermission(TablePrivileges::INSERT_INTO_TABLE));
1941  outObject.privs.push_back(ap.hasPermission(TablePrivileges::UPDATE_IN_TABLE));
1942  outObject.privs.push_back(ap.hasPermission(TablePrivileges::DELETE_FROM_TABLE));
1943  outObject.privs.push_back(ap.hasPermission(TablePrivileges::TRUNCATE_TABLE));
1944  outObject.privs.push_back(ap.hasPermission(TablePrivileges::ALTER_TABLE));
1945 
1946  break;
1947  case DashboardDBObjectType:
1948  outObject.privilegeObjectType = TDBObjectType::DashboardDBObjectType;
1949  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::CREATE_DASHBOARD));
1950  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::DELETE_DASHBOARD));
1951  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::VIEW_DASHBOARD));
1952  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::EDIT_DASHBOARD));
1953 
1954  break;
1955  case ViewDBObjectType:
1956  outObject.privilegeObjectType = TDBObjectType::ViewDBObjectType;
1957  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::CREATE_VIEW));
1958  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::DROP_VIEW));
1959  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::SELECT_FROM_VIEW));
1960  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::INSERT_INTO_VIEW));
1961  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::UPDATE_IN_VIEW));
1962  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::DELETE_FROM_VIEW));
1963 
1964  break;
1965  case ServerDBObjectType:
1966  outObject.privilegeObjectType = TDBObjectType::ServerDBObjectType;
1967  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::CREATE_SERVER));
1968  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::DROP_SERVER));
1969  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::ALTER_SERVER));
1970  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::SERVER_USAGE));
1971 
1972  break;
1973  default:
1974  CHECK(false);
1975  }
1976  const int type_val = static_cast<int>(inObject.getType());
1977  CHECK(type_val >= 0 && type_val < 6);
1978  outObject.objectType = static_cast<TDBObjectType::type>(type_val);
1979  return outObject;
1980 }
1981 
1983  const TDBObjectPermissions& permissions) {
1984  if (!permissions.__isset.database_permissions_) {
1985  THROW_DB_EXCEPTION("Database permissions not set for check.")
1986  }
1987  auto perms = permissions.database_permissions_;
1988  if ((perms.create_ && !privs.hasPermission(DatabasePrivileges::CREATE_DATABASE)) ||
1989  (perms.delete_ && !privs.hasPermission(DatabasePrivileges::DROP_DATABASE)) ||
1990  (perms.view_sql_editor_ &&
1992  (perms.access_ && !privs.hasPermission(DatabasePrivileges::ACCESS))) {
1993  return false;
1994  } else {
1995  return true;
1996  }
1997 }
1998 
2000  const TDBObjectPermissions& permissions) {
2001  if (!permissions.__isset.table_permissions_) {
2002  THROW_DB_EXCEPTION("Table permissions not set for check.")
2003  }
2004  auto perms = permissions.table_permissions_;
2005  if ((perms.create_ && !privs.hasPermission(TablePrivileges::CREATE_TABLE)) ||
2006  (perms.drop_ && !privs.hasPermission(TablePrivileges::DROP_TABLE)) ||
2007  (perms.select_ && !privs.hasPermission(TablePrivileges::SELECT_FROM_TABLE)) ||
2008  (perms.insert_ && !privs.hasPermission(TablePrivileges::INSERT_INTO_TABLE)) ||
2009  (perms.update_ && !privs.hasPermission(TablePrivileges::UPDATE_IN_TABLE)) ||
2010  (perms.delete_ && !privs.hasPermission(TablePrivileges::DELETE_FROM_TABLE)) ||
2011  (perms.truncate_ && !privs.hasPermission(TablePrivileges::TRUNCATE_TABLE)) ||
2012  (perms.alter_ && !privs.hasPermission(TablePrivileges::ALTER_TABLE))) {
2013  return false;
2014  } else {
2015  return true;
2016  }
2017 }
2018 
2020  const TDBObjectPermissions& permissions) {
2021  if (!permissions.__isset.dashboard_permissions_) {
2022  THROW_DB_EXCEPTION("Dashboard permissions not set for check.")
2023  }
2024  auto perms = permissions.dashboard_permissions_;
2025  if ((perms.create_ && !privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD)) ||
2026  (perms.delete_ && !privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD)) ||
2027  (perms.view_ && !privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD)) ||
2028  (perms.edit_ && !privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD))) {
2029  return false;
2030  } else {
2031  return true;
2032  }
2033 }
2034 
2036  const TDBObjectPermissions& permissions) {
2037  if (!permissions.__isset.view_permissions_) {
2038  THROW_DB_EXCEPTION("View permissions not set for check.")
2039  }
2040  auto perms = permissions.view_permissions_;
2041  if ((perms.create_ && !privs.hasPermission(ViewPrivileges::CREATE_VIEW)) ||
2042  (perms.drop_ && !privs.hasPermission(ViewPrivileges::DROP_VIEW)) ||
2043  (perms.select_ && !privs.hasPermission(ViewPrivileges::SELECT_FROM_VIEW)) ||
2044  (perms.insert_ && !privs.hasPermission(ViewPrivileges::INSERT_INTO_VIEW)) ||
2045  (perms.update_ && !privs.hasPermission(ViewPrivileges::UPDATE_IN_VIEW)) ||
2046  (perms.delete_ && !privs.hasPermission(ViewPrivileges::DELETE_FROM_VIEW))) {
2047  return false;
2048  } else {
2049  return true;
2050  }
2051 }
2052 
2054  const TDBObjectPermissions& permissions) {
2055  CHECK(permissions.__isset.server_permissions_);
2056  auto perms = permissions.server_permissions_;
2057  if ((perms.create_ && !privs.hasPermission(ServerPrivileges::CREATE_SERVER)) ||
2058  (perms.drop_ && !privs.hasPermission(ServerPrivileges::DROP_SERVER)) ||
2059  (perms.alter_ && !privs.hasPermission(ServerPrivileges::ALTER_SERVER)) ||
2060  (perms.usage_ && !privs.hasPermission(ServerPrivileges::SERVER_USAGE))) {
2061  return false;
2062  } else {
2063  return true;
2064  }
2065 }
2066 
2067 bool DBHandler::has_object_privilege(const TSessionId& session_id_or_json,
2068  const std::string& granteeName,
2069  const std::string& objectName,
2070  const TDBObjectType::type objectType,
2071  const TDBObjectPermissions& permissions) {
2072  heavyai::RequestInfo const request_info(session_id_or_json);
2073  SET_REQUEST_ID(request_info.requestId());
2074  auto session_ptr = get_session_ptr(request_info.sessionId());
2075  auto stdlog = STDLOG(session_ptr);
2076  auto const& cat = session_ptr->getCatalog();
2077  auto const& current_user = session_ptr->get_currentUser();
2078  if (!current_user.isSuper && !SysCatalog::instance().isRoleGrantedToGrantee(
2079  current_user.userName, granteeName, false)) {
2081  "Users except superusers can only check privileges for self or roles granted "
2082  "to "
2083  "them.")
2084  }
2086  if (SysCatalog::instance().getMetadataForUser(granteeName, user_meta) &&
2087  user_meta.isSuper) {
2088  return true;
2089  }
2090  Grantee* grnt = SysCatalog::instance().getGrantee(granteeName);
2091  if (!grnt) {
2092  THROW_DB_EXCEPTION("User or Role " + granteeName + " does not exist.")
2093  }
2095  std::string func_name;
2096  switch (objectType) {
2099  func_name = "database";
2100  break;
2103  func_name = "table";
2104  break;
2107  func_name = "dashboard";
2108  break;
2111  func_name = "view";
2112  break;
2115  func_name = "server";
2116  break;
2117  default:
2118  THROW_DB_EXCEPTION("Invalid object type (" + std::to_string(objectType) + ").");
2119  }
2120  DBObject req_object(objectName, type);
2121  req_object.loadKey(cat);
2122 
2123  auto grantee_object = grnt->findDbObject(req_object.getObjectKey(), false);
2124  if (grantee_object) {
2125  // if grantee has privs on the object
2126  return permissionFuncMap_[func_name](grantee_object->getPrivileges(), permissions);
2127  } else {
2128  // no privileges on that object
2129  return false;
2130  }
2131 }
2132 
2133 void DBHandler::get_db_objects_for_grantee(std::vector<TDBObject>& TDBObjectsForRole,
2134  const TSessionId& session_id_or_json,
2135  const std::string& roleName) {
2136  heavyai::RequestInfo const request_info(session_id_or_json);
2137  SET_REQUEST_ID(request_info.requestId());
2138  auto session_ptr = get_session_ptr(request_info.sessionId());
2139  auto stdlog = STDLOG(session_ptr);
2140  auto const& user = session_ptr->get_currentUser();
2141  if (!user.isSuper &&
2142  !SysCatalog::instance().isRoleGrantedToGrantee(user.userName, roleName, false)) {
2143  return;
2144  }
2145  auto* rl = SysCatalog::instance().getGrantee(roleName);
2146  if (rl) {
2147  auto dbId = session_ptr->getCatalog().getCurrentDB().dbId;
2148  for (auto& dbObject : *rl->getDbObjects(true)) {
2149  if (dbObject.first.dbId != dbId) {
2150  // TODO (max): it doesn't scale well in case we have many DBs (not a typical
2151  // usecase for now, though)
2152  continue;
2153  }
2154  TDBObject tdbObject = serialize_db_object(roleName, *dbObject.second);
2155  TDBObjectsForRole.push_back(tdbObject);
2156  }
2157  } else {
2158  THROW_DB_EXCEPTION("User or role " + roleName + " does not exist.");
2159  }
2160 }
2161 
2162 void DBHandler::get_db_object_privs(std::vector<TDBObject>& TDBObjects,
2163  const TSessionId& session_id_or_json,
2164  const std::string& objectName,
2165  const TDBObjectType::type type) {
2166  heavyai::RequestInfo const request_info(session_id_or_json);
2167  SET_REQUEST_ID(request_info.requestId());
2168  auto session_ptr = get_session_ptr(request_info.sessionId());
2169  auto stdlog = STDLOG(session_ptr);
2170  const auto& cat = session_ptr->getCatalog();
2171  DBObjectType object_type;
2172  switch (type) {
2174  object_type = DBObjectType::DatabaseDBObjectType;
2175  break;
2177  object_type = DBObjectType::TableDBObjectType;
2178  break;
2181  break;
2183  object_type = DBObjectType::ViewDBObjectType;
2184  break;
2186  object_type = DBObjectType::ServerDBObjectType;
2187  break;
2188  default:
2189  THROW_DB_EXCEPTION("Failed to get object privileges for " + objectName +
2190  ": unknown object type (" + std::to_string(type) + ").");
2191  }
2192  DBObject object_to_find(objectName, object_type);
2193 
2194  // TODO(adb): Use DatabaseLock to protect method
2195  try {
2196  if (object_type == DashboardDBObjectType) {
2197  if (objectName == "") {
2198  object_to_find = DBObject(-1, object_type);
2199  } else {
2200  object_to_find = DBObject(std::stoi(objectName), object_type);
2201  }
2202  } else if ((object_type == TableDBObjectType || object_type == ViewDBObjectType) &&
2203  !objectName.empty()) {
2204  // special handling for view / table
2205  auto td = cat.getMetadataForTable(objectName, false);
2206  if (td) {
2207  object_type = td->isView ? ViewDBObjectType : TableDBObjectType;
2208  object_to_find = DBObject(objectName, object_type);
2209  }
2210  }
2211  object_to_find.loadKey(cat);
2212  } catch (const std::exception&) {
2213  THROW_DB_EXCEPTION("Object with name " + objectName + " does not exist.");
2214  }
2215 
2216  // object type on database level
2217  DBObject object_to_find_dblevel("", object_type);
2218  object_to_find_dblevel.loadKey(cat);
2219  // if user is superuser respond with a full priv
2220  if (session_ptr->get_currentUser().isSuper) {
2221  // using ALL_TABLE here to set max permissions
2222  DBObject dbObj{object_to_find.getObjectKey(),
2224  session_ptr->get_currentUser().userId};
2225  dbObj.setName("super");
2226  TDBObjects.push_back(
2227  serialize_db_object(session_ptr->get_currentUser().userName, dbObj));
2228  };
2229 
2230  std::vector<std::string> grantees =
2231  SysCatalog::instance().getRoles(true,
2232  session_ptr->get_currentUser().isSuper,
2233  session_ptr->get_currentUser().userName);
2234  for (const auto& grantee : grantees) {
2235  DBObject* object_found;
2236  auto* gr = SysCatalog::instance().getGrantee(grantee);
2237  if (gr && (object_found = gr->findDbObject(object_to_find.getObjectKey(), true))) {
2238  TDBObjects.push_back(serialize_db_object(grantee, *object_found));
2239  }
2240  // check object permissions on Database level
2241  if (gr &&
2242  (object_found = gr->findDbObject(object_to_find_dblevel.getObjectKey(), true))) {
2243  TDBObjects.push_back(serialize_db_object(grantee, *object_found));
2244  }
2245  }
2246 }
2247 
2249  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
2250  std::vector<std::string>& roles,
2251  const std::string& granteeName,
2252  bool effective) {
2253  auto* grantee = SysCatalog::instance().getGrantee(granteeName);
2254  if (grantee) {
2255  if (session_ptr->get_currentUser().isSuper) {
2256  roles = grantee->getRoles(/*only_direct=*/!effective);
2257  } else if (grantee->isUser()) {
2258  if (session_ptr->get_currentUser().userName == granteeName) {
2259  roles = grantee->getRoles(/*only_direct=*/!effective);
2260  } else {
2262  "Only a superuser is authorized to request list of roles granted to another "
2263  "user.");
2264  }
2265  } else {
2266  CHECK(!grantee->isUser());
2267  // granteeName is actually a roleName here and we can check a role
2268  // only if it is granted to us
2269  if (SysCatalog::instance().isRoleGrantedToGrantee(
2270  session_ptr->get_currentUser().userName, granteeName, false)) {
2271  roles = grantee->getRoles(/*only_direct=*/!effective);
2272  } else {
2273  THROW_DB_EXCEPTION("A user can check only roles granted to him.");
2274  }
2275  }
2276  } else {
2277  THROW_DB_EXCEPTION("Grantee " + granteeName + " does not exist.");
2278  }
2279 }
2280 
2281 void DBHandler::get_all_roles_for_user(std::vector<std::string>& roles,
2282  const TSessionId& session_id_or_json,
2283  const std::string& granteeName) {
2284  // WARNING: This function only returns directly granted roles.
2285  // See also: get_all_effective_roles_for_user() for all of a user's roles.
2286  heavyai::RequestInfo const request_info(session_id_or_json);
2287  SET_REQUEST_ID(request_info.requestId());
2288  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2289  auto session_ptr = stdlog.getConstSessionInfo();
2290  getAllRolesForUserImpl(session_ptr, roles, granteeName, /*effective=*/false);
2291 }
2292 
2293 void DBHandler::get_all_effective_roles_for_user(std::vector<std::string>& roles,
2294  const TSessionId& session_id_or_json,
2295  const std::string& granteeName) {
2296  heavyai::RequestInfo const request_info(session_id_or_json);
2297  SET_REQUEST_ID(request_info.requestId());
2298  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2299  auto session_ptr = stdlog.getConstSessionInfo();
2300  getAllRolesForUserImpl(session_ptr, roles, granteeName, /*effective=*/true);
2301 }
2302 
2303 namespace {
2305  const std::map<std::string, std::vector<std::string>>& table_col_names) {
2306  std::ostringstream oss;
2307  for (const auto& [table_name, col_names] : table_col_names) {
2308  oss << ":" << table_name;
2309  for (const auto& col_name : col_names) {
2310  oss << "," << col_name;
2311  }
2312  }
2313  return oss.str();
2314 }
2315 } // namespace
2316 
2318  TPixelTableRowResult& _return,
2319  const TSessionId& session_id_or_json,
2320  const int64_t widget_id,
2321  const TPixel& pixel,
2322  const std::map<std::string, std::vector<std::string>>& table_col_names,
2323  const bool column_format,
2324  const int32_t pixel_radius,
2325  const std::string& nonce) {
2326  heavyai::RequestInfo const request_info(session_id_or_json);
2327  SET_REQUEST_ID(request_info.requestId());
2328  auto session_ptr = get_session_ptr(request_info.sessionId());
2329  auto stdlog = STDLOG(session_ptr,
2330  "widget_id",
2331  widget_id,
2332  "pixel.x",
2333  pixel.x,
2334  "pixel.y",
2335  pixel.y,
2336  "column_format",
2337  column_format,
2338  "pixel_radius",
2339  pixel_radius,
2340  "table_col_names",
2341  dump_table_col_names(table_col_names),
2342  "nonce",
2343  nonce);
2344  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2345  if (!render_handler_) {
2346  THROW_DB_EXCEPTION("Backend rendering is disabled.");
2347  }
2348 
2349  try {
2350  render_handler_->get_result_row_for_pixel(_return,
2351  session_ptr,
2352  widget_id,
2353  pixel,
2354  table_col_names,
2355  column_format,
2356  pixel_radius,
2357  nonce);
2358  } catch (std::exception& e) {
2359  THROW_DB_EXCEPTION(e.what());
2360  }
2361 }
2362 
2364  const ColumnDescriptor* cd) {
2365  TColumnType col_type;
2366  col_type.col_name = cd->columnName;
2367  col_type.src_name = cd->sourceName;
2368  col_type.col_id = cd->columnId;
2369  col_type.col_type.type = type_to_thrift(cd->columnType);
2370  col_type.col_type.encoding = encoding_to_thrift(cd->columnType);
2371  col_type.col_type.nullable = !cd->columnType.get_notnull();
2372  col_type.col_type.is_array = cd->columnType.get_type() == kARRAY;
2373  if (col_type.col_type.is_array || cd->columnType.get_type() == kDATE) {
2374  col_type.col_type.size = cd->columnType.get_size(); // only for arrays and dates
2375  }
2376  if (IS_GEO(cd->columnType.get_type())) {
2378  col_type, cd->columnType.get_subtype(), cd->columnType.get_output_srid());
2379  } else {
2380  col_type.col_type.precision = cd->columnType.get_precision();
2381  col_type.col_type.scale = cd->columnType.get_scale();
2382  }
2383  col_type.is_system = cd->isSystemCol;
2385  cat != nullptr) {
2386  // have to get the actual size of the encoding from the dictionary definition
2387  const int dict_id = cd->columnType.get_comp_param();
2388  if (!cat->getMetadataForDict(dict_id, false)) {
2389  col_type.col_type.comp_param = 0;
2390  return col_type;
2391  }
2392  auto dd = cat->getMetadataForDict(dict_id, false);
2393  if (!dd) {
2394  THROW_DB_EXCEPTION("Dictionary doesn't exist");
2395  }
2396  col_type.col_type.comp_param = dd->dictNBits;
2397  } else {
2398  col_type.col_type.comp_param =
2399  (cd->columnType.is_date_in_days() && cd->columnType.get_comp_param() == 0)
2400  ? 32
2401  : cd->columnType.get_comp_param();
2402  }
2403  col_type.is_reserved_keyword = ImportHelpers::is_reserved_name(col_type.col_name);
2404  if (cd->default_value.has_value()) {
2405  col_type.__set_default_value(cd->getDefaultValueLiteral());
2406  }
2407  return col_type;
2408 }
2409 
2410 void DBHandler::get_internal_table_details(TTableDetails& _return,
2411  const TSessionId& session_id_or_json,
2412  const std::string& table_name,
2413  const bool include_system_columns) {
2414  heavyai::RequestInfo const request_info(session_id_or_json);
2415  SET_REQUEST_ID(request_info.requestId());
2416  auto stdlog =
2417  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2418  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2419  get_table_details_impl(_return, stdlog, table_name, include_system_columns, false);
2420 }
2421 
2423  TTableDetails& _return,
2424  const TSessionId& session_id_or_json,
2425  const std::string& table_name,
2426  const std::string& database_name) {
2427  heavyai::RequestInfo const request_info(session_id_or_json);
2428  SET_REQUEST_ID(request_info.requestId());
2429  auto stdlog =
2430  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2431  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2432  get_table_details_impl(_return, stdlog, table_name, true, false, database_name);
2433 }
2434 
2435 void DBHandler::get_table_details(TTableDetails& _return,
2436  const TSessionId& session_id_or_json,
2437  const std::string& table_name) {
2438  heavyai::RequestInfo const request_info(session_id_or_json);
2439  SET_REQUEST_ID(request_info.requestId());
2440  auto stdlog =
2441  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2442  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2443 
2444  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2445  get_table_details_impl(_return, stdlog, table_name, false, false);
2446 }
2447 
2448 void DBHandler::get_table_details_for_database(TTableDetails& _return,
2449  const TSessionId& session_id_or_json,
2450  const std::string& table_name,
2451  const std::string& database_name) {
2452  heavyai::RequestInfo const request_info(session_id_or_json);
2453  SET_REQUEST_ID(request_info.requestId());
2454  auto stdlog =
2455  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2456  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2457 
2458  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2459  get_table_details_impl(_return, stdlog, table_name, false, false, database_name);
2460 }
2461 
2462 namespace {
2463 TTableRefreshInfo get_refresh_info(const TableDescriptor* td) {
2464  CHECK(td->isForeignTable());
2465  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2466  CHECK(foreign_table);
2467  TTableRefreshInfo refresh_info;
2468  const auto& update_type =
2470  CHECK(update_type.has_value());
2471  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2472  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2473  } else if (update_type.value() ==
2475  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2476  } else {
2477  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2478  }
2479 
2480  const auto& timing_type =
2482  CHECK(timing_type.has_value());
2483  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2484  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2485  refresh_info.interval_count = -1;
2486  } else if (timing_type.value() ==
2488  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2489  const auto& start_date_time = foreign_table->getOption(
2491  CHECK(start_date_time.has_value());
2492  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2493  refresh_info.start_date_time =
2494  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2495  const auto& interval =
2496  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2497  CHECK(interval.has_value());
2498  const auto& interval_str = interval.value();
2499  refresh_info.interval_count =
2500  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2501  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2502  if (interval_type == 'H') {
2503  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2504  } else if (interval_type == 'D') {
2505  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2506  } else if (interval_type == 'S') {
2507  // This use case is for development only.
2508  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2509  } else {
2510  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2511  }
2512  } else {
2513  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2514  }
2515  if (foreign_table->last_refresh_time !=
2517  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2518  {kTIMESTAMP}, foreign_table->last_refresh_time);
2519  }
2520  if (foreign_table->next_refresh_time !=
2522  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2523  {kTIMESTAMP}, foreign_table->next_refresh_time);
2524  }
2525  return refresh_info;
2526 }
2527 } // namespace
2528 
2529 void DBHandler::get_table_details_impl(TTableDetails& _return,
2530  query_state::StdLog& stdlog,
2531  const std::string& table_name,
2532  const bool get_system,
2533  const bool get_physical,
2534  const std::string& database_name) {
2535  try {
2536  auto session_info = stdlog.getSessionInfo();
2537  auto cat = (database_name.empty())
2538  ? &session_info->getCatalog()
2539  : SysCatalog::instance().getCatalog(database_name).get();
2540  if (!cat) {
2541  THROW_DB_EXCEPTION("Database " + database_name + " does not exist.");
2542  }
2543  const auto td_with_lock =
2545  *cat, table_name, false);
2546  const auto td = td_with_lock();
2547  CHECK(td);
2548 
2549  bool have_privileges_on_view_sources = true;
2550  if (td->isView) {
2551  auto query_state = create_query_state(session_info, td->viewSQL);
2552  stdlog.setQueryState(query_state);
2553  try {
2554  if (hasTableAccessPrivileges(td, *session_info)) {
2555  const auto [query_ra, locks] = parse_to_ra(query_state->createQueryStateProxy(),
2556  query_state->getQueryStr(),
2557  {},
2558  true,
2560  false);
2561  try {
2562  calcite_->checkAccessedObjectsPrivileges(query_state->createQueryStateProxy(),
2563  query_ra);
2564  } catch (const std::runtime_error&) {
2565  have_privileges_on_view_sources = false;
2566  }
2567 
2568  _return.row_desc =
2569  validateRelAlg(query_ra.plan_result, query_state->createQueryStateProxy());
2570  } else {
2571  throw std::runtime_error(
2572  "Unable to access view " + table_name +
2573  ". The view may not exist, or the logged in user may not "
2574  "have permission to access the view.");
2575  }
2576  } catch (const std::exception& e) {
2577  throw std::runtime_error("View '" + table_name +
2578  "' query has failed with an error: '" +
2579  std::string(e.what()) +
2580  "'.\nThe view must be dropped and re-created to "
2581  "resolve the error. \nQuery:\n" +
2582  query_state->getQueryStr());
2583  }
2584  } else {
2585  if (hasTableAccessPrivileges(td, *session_info)) {
2586  const auto col_descriptors = cat->getAllColumnMetadataForTable(
2587  td->tableId, get_system, true, get_physical);
2588  const auto deleted_cd = cat->getDeletedColumn(td);
2589  for (const auto cd : col_descriptors) {
2590  if (cd == deleted_cd) {
2591  continue;
2592  }
2593  _return.row_desc.push_back(populateThriftColumnType(cat, cd));
2594  }
2595  } else {
2596  throw std::runtime_error(
2597  "Unable to access table " + table_name +
2598  ". The table may not exist, or the logged in user may not "
2599  "have permission to access the table.");
2600  }
2601  }
2602  _return.fragment_size = td->maxFragRows;
2603  _return.page_size = td->fragPageSize;
2604  _return.max_rows = td->maxRows;
2605  _return.view_sql =
2606  (have_privileges_on_view_sources ? td->viewSQL
2607  : "[Not enough privileges to see the view SQL]");
2608  _return.shard_count = td->nShards * std::max(g_leaf_count, size_t(1));
2609  if (td->nShards > 0) {
2610  auto cd = cat->getMetadataForColumn(td->tableId, td->shardedColumnId);
2611  CHECK(cd);
2612  _return.sharded_column_name = cd->columnName;
2613  }
2614  _return.key_metainfo = td->keyMetainfo;
2615  _return.is_temporary = td->persistenceLevel == Data_Namespace::MemoryLevel::CPU_LEVEL;
2616  _return.partition_detail =
2617  td->partitions.empty()
2618  ? TPartitionDetail::DEFAULT
2619  : (table_is_replicated(td)
2620  ? TPartitionDetail::REPLICATED
2621  : (td->partitions == "SHARDED" ? TPartitionDetail::SHARDED
2622  : TPartitionDetail::OTHER));
2623  if (td->isView) {
2624  _return.table_type = TTableType::VIEW;
2625  } else if (td->isTemporaryTable()) {
2626  _return.table_type = TTableType::TEMPORARY;
2627  } else if (td->isForeignTable()) {
2628  _return.table_type = TTableType::FOREIGN;
2629  _return.refresh_info = get_refresh_info(td);
2630  } else {
2631  _return.table_type = TTableType::DEFAULT;
2632  }
2633 
2634  } catch (const std::runtime_error& e) {
2635  THROW_DB_EXCEPTION(std::string(e.what()));
2636  }
2637 }
2638 
2639 void DBHandler::get_link_view(TFrontendView& _return,
2640  const TSessionId& session_id_or_json,
2641  const std::string& link) {
2642  heavyai::RequestInfo const request_info(session_id_or_json);
2643  SET_REQUEST_ID(request_info.requestId());
2644  auto session_ptr = get_session_ptr(request_info.sessionId());
2645  auto stdlog = STDLOG(session_ptr);
2646  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2647  auto const& cat = session_ptr->getCatalog();
2648  auto ld = cat.getMetadataForLink(std::to_string(cat.getCurrentDB().dbId) + link);
2649  if (!ld) {
2650  THROW_DB_EXCEPTION("Link " + link + " is not valid.");
2651  }
2652  _return.view_state = ld->viewState;
2653  _return.view_name = ld->link;
2654  _return.update_time = ld->updateTime;
2655  _return.view_metadata = ld->viewMetadata;
2656 }
2657 
2659  const TableDescriptor* td,
2660  const Catalog_Namespace::SessionInfo& session_info) {
2661  auto& cat = session_info.getCatalog();
2662  auto user_metadata = session_info.get_currentUser();
2663 
2664  if (user_metadata.isSuper) {
2665  return true;
2666  }
2667 
2669  dbObject.loadKey(cat);
2670  std::vector<DBObject> privObjects = {dbObject};
2671 
2672  return SysCatalog::instance().hasAnyPrivileges(user_metadata, privObjects);
2673 }
2674 
2675 void DBHandler::get_tables_impl(std::vector<std::string>& table_names,
2676  const Catalog_Namespace::SessionInfo& session_info,
2677  const GetTablesType get_tables_type,
2678  const std::string& database_name) {
2679  if (database_name.empty()) {
2680  table_names = session_info.getCatalog().getTableNamesForUser(
2681  session_info.get_currentUser(), get_tables_type);
2682  } else {
2683  auto request_cat = SysCatalog::instance().getCatalog(database_name);
2684  if (!request_cat) {
2685  THROW_DB_EXCEPTION("Database " + database_name + " does not exist.");
2686  }
2687  table_names = request_cat->getTableNamesForUser(session_info.get_currentUser(),
2688  get_tables_type);
2689  }
2690 }
2691 
2692 void DBHandler::get_tables(std::vector<std::string>& table_names,
2693  const TSessionId& session_id_or_json) {
2694  heavyai::RequestInfo const request_info(session_id_or_json);
2695  SET_REQUEST_ID(request_info.requestId());
2696  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2697  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2699  table_names, *stdlog.getConstSessionInfo(), GET_PHYSICAL_TABLES_AND_VIEWS);
2700 }
2701 
2702 void DBHandler::get_tables_for_database(std::vector<std::string>& table_names,
2703  const TSessionId& session_id_or_json,
2704  const std::string& database_name) {
2705  heavyai::RequestInfo const request_info(session_id_or_json);
2706  SET_REQUEST_ID(request_info.requestId());
2707  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2708  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2709 
2710  get_tables_impl(table_names,
2711  *stdlog.getConstSessionInfo(),
2713  database_name);
2714 }
2715 
2716 void DBHandler::get_physical_tables(std::vector<std::string>& table_names,
2717  const TSessionId& session_id_or_json) {
2718  heavyai::RequestInfo const request_info(session_id_or_json);
2719  SET_REQUEST_ID(request_info.requestId());
2720  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2721  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2722  get_tables_impl(table_names, *stdlog.getConstSessionInfo(), GET_PHYSICAL_TABLES);
2723 }
2724 
2725 void DBHandler::get_views(std::vector<std::string>& table_names,
2726  const TSessionId& session_id_or_json) {
2727  heavyai::RequestInfo const request_info(session_id_or_json);
2728  SET_REQUEST_ID(request_info.requestId());
2729  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2730  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2731  get_tables_impl(table_names, *stdlog.getConstSessionInfo(), GET_VIEWS);
2732 }
2733 
2734 void DBHandler::get_tables_meta_impl(std::vector<TTableMeta>& _return,
2735  QueryStateProxy query_state_proxy,
2736  const Catalog_Namespace::SessionInfo& session_info,
2737  const bool with_table_locks) {
2738  const auto& cat = session_info.getCatalog();
2739  // Get copies of table descriptors here in order to avoid possible use of dangling
2740  // pointers, if tables are concurrently dropped.
2741  const auto tables = cat.getAllTableMetadataCopy();
2742  _return.reserve(tables.size());
2743 
2744  for (const auto& td : tables) {
2745  if (td.shard >= 0) {
2746  // skip shards, they're not standalone tables
2747  continue;
2748  }
2749  if (!hasTableAccessPrivileges(&td, session_info)) {
2750  // skip table, as there are no privileges to access it
2751  continue;
2752  }
2753 
2754  TTableMeta ret;
2755  ret.table_name = td.tableName;
2756  ret.is_view = td.isView;
2757  ret.is_replicated = table_is_replicated(&td);
2758  ret.shard_count = td.nShards;
2759  ret.max_rows = td.maxRows;
2760  ret.table_id = td.tableId;
2761 
2762  std::vector<TTypeInfo> col_types;
2763  std::vector<std::string> col_names;
2764  size_t num_cols = 0;
2765  if (td.isView) {
2766  try {
2767  TPlanResult parse_result;
2769  std::tie(parse_result, locks) = parse_to_ra(
2770  query_state_proxy, td.viewSQL, {}, with_table_locks, system_parameters_);
2771  const auto query_ra = parse_result.plan_result;
2772 
2773  ExecutionResult ex_result;
2774  execute_rel_alg(ex_result,
2775  query_state_proxy,
2776  query_ra,
2777  true,
2779  -1,
2780  -1,
2781  /*just_validate=*/true,
2782  /*find_push_down_candidates=*/false,
2783  ExplainInfo());
2784  TQueryResult result;
2785  DBHandler::convertData(result, ex_result, query_state_proxy, true, -1, -1);
2786  num_cols = result.row_set.row_desc.size();
2787  for (const auto& col : result.row_set.row_desc) {
2788  if (col.is_physical) {
2789  num_cols--;
2790  continue;
2791  }
2792  col_types.push_back(col.col_type);
2793  col_names.push_back(col.col_name);
2794  }
2795  } catch (std::exception& e) {
2796  LOG(WARNING) << "get_tables_meta: Ignoring broken view: " << td.tableName;
2797  }
2798  } else {
2799  try {
2800  if (hasTableAccessPrivileges(&td, session_info)) {
2801  const auto col_descriptors =
2802  cat.getAllColumnMetadataForTable(td.tableId, false, true, false);
2803  const auto deleted_cd = cat.getDeletedColumn(&td);
2804  for (const auto cd : col_descriptors) {
2805  if (cd == deleted_cd) {
2806  continue;
2807  }
2808  col_types.push_back(ThriftSerializers::type_info_to_thrift(cd->columnType));
2809  col_names.push_back(cd->columnName);
2810  }
2811  num_cols = col_descriptors.size();
2812  } else {
2813  continue;
2814  }
2815  } catch (const std::runtime_error& e) {
2816  THROW_DB_EXCEPTION(e.what());
2817  }
2818  }
2819 
2820  ret.num_cols = num_cols;
2821  std::copy(col_types.begin(), col_types.end(), std::back_inserter(ret.col_types));
2822  std::copy(col_names.begin(), col_names.end(), std::back_inserter(ret.col_names));
2823 
2824  _return.push_back(ret);
2825  }
2826 }
2827 
2828 void DBHandler::get_tables_meta(std::vector<TTableMeta>& _return,
2829  const TSessionId& session_id_or_json) {
2830  heavyai::RequestInfo const request_info(session_id_or_json);
2831  SET_REQUEST_ID(request_info.requestId());
2832  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2833  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2834  auto session_ptr = stdlog.getConstSessionInfo();
2835  auto query_state = create_query_state(session_ptr, "");
2836  stdlog.setQueryState(query_state);
2837 
2838  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2839 
2840  try {
2841  get_tables_meta_impl(_return, query_state->createQueryStateProxy(), *session_ptr);
2842  } catch (const std::exception& e) {
2843  THROW_DB_EXCEPTION(e.what());
2844  }
2845 }
2846 
2847 void DBHandler::get_users(std::vector<std::string>& user_names,
2848  const TSessionId& session_id_or_json) {
2849  heavyai::RequestInfo const request_info(session_id_or_json);
2850  SET_REQUEST_ID(request_info.requestId());
2851  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2852  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2853  auto session_ptr = stdlog.getConstSessionInfo();
2854  std::list<Catalog_Namespace::UserMetadata> user_list;
2855 
2856  if (!session_ptr->get_currentUser().isSuper) {
2857  user_list = SysCatalog::instance().getAllUserMetadata(
2858  session_ptr->getCatalog().getCurrentDB().dbId);
2859  } else {
2860  user_list = SysCatalog::instance().getAllUserMetadata();
2861  }
2862  for (auto u : user_list) {
2863  user_names.push_back(u.userName);
2864  }
2865 }
2866 
2867 void DBHandler::get_version(std::string& version) {
2868  version = MAPD_RELEASE;
2869 }
2870 
2871 namespace {
2872 
2876  return [] {
2877  // we need to resume erm queue if we throw any exception
2878  // that heavydb server can handle w/o shutting it down
2880  };
2881  }
2882  return [] {};
2883 }
2884 
2885 } // namespace
2886 
2887 void DBHandler::clear_gpu_memory(const TSessionId& session_id_or_json) {
2888  heavyai::RequestInfo const request_info(session_id_or_json);
2889  SET_REQUEST_ID(request_info.requestId());
2890  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2891  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2892  auto session_ptr = stdlog.getConstSessionInfo();
2893  if (!session_ptr->get_currentUser().isSuper) {
2894  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_gpu_memory");
2895  }
2897  // clear renderer memory first
2898  // this will block until any running render finishes
2899  if (render_handler_) {
2900  render_handler_->clear_gpu_memory();
2901  }
2902  // then clear the QE memory
2903  // the renderer will have disconnected from any QE memory
2904  try {
2906  } catch (const std::exception& e) {
2907  THROW_DB_EXCEPTION(e.what());
2908  }
2909 }
2910 
2911 void DBHandler::clear_cpu_memory(const TSessionId& session_id_or_json) {
2912  heavyai::RequestInfo const request_info(session_id_or_json);
2913  SET_REQUEST_ID(request_info.requestId());
2914  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2915  auto session_ptr = stdlog.getConstSessionInfo();
2916  if (!session_ptr->get_currentUser().isSuper) {
2917  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_cpu_memory");
2918  }
2920  // clear renderer memory first
2921  // this will block until any running render finishes
2922  if (render_handler_) {
2923  render_handler_->clear_cpu_memory();
2924  }
2925  // then clear the QE memory
2926  // the renderer will have disconnected from any QE memory
2927  try {
2929  } catch (const std::exception& e) {
2930  THROW_DB_EXCEPTION(e.what());
2931  }
2932 }
2933 
2934 void DBHandler::clearRenderMemory(const TSessionId& session_id_or_json) {
2935  heavyai::RequestInfo const request_info(session_id_or_json);
2936  SET_REQUEST_ID(request_info.requestId());
2937  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2938  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2939  auto session_ptr = stdlog.getConstSessionInfo();
2940  if (!session_ptr->get_currentUser().isSuper) {
2941  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_render_memory");
2942  }
2943  if (render_handler_) {
2945  render_handler_->clear_cpu_memory();
2946  render_handler_->clear_gpu_memory();
2947  }
2948 }
2949 
2950 void DBHandler::pause_executor_queue(const TSessionId& session) {
2951  auto stdlog = STDLOG(get_session_ptr(session));
2952  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2953  auto session_ptr = stdlog.getConstSessionInfo();
2954  if (!session_ptr->get_currentUser().isSuper) {
2955  THROW_DB_EXCEPTION("Superuser privilege is required to run PAUSE EXECUTOR QUEUE");
2956  }
2957  try {
2959  } catch (const std::exception& e) {
2960  THROW_DB_EXCEPTION(e.what());
2961  }
2962 }
2963 
2964 void DBHandler::resume_executor_queue(const TSessionId& session) {
2965  auto stdlog = STDLOG(get_session_ptr(session));
2966  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2967  auto session_ptr = stdlog.getConstSessionInfo();
2968  if (!session_ptr->get_currentUser().isSuper) {
2969  THROW_DB_EXCEPTION("Superuser privilege is required to run RESUME EXECUTOR QUEUE");
2970  }
2971  try {
2973  } catch (const std::exception& e) {
2974  THROW_DB_EXCEPTION(e.what());
2975  }
2976 }
2977 
2978 void DBHandler::set_cur_session(const TSessionId& parent_session_id_or_json,
2979  const TSessionId& leaf_session_id_or_json,
2980  const std::string& start_time_str,
2981  const std::string& label,
2982  bool for_running_query_kernel) {
2983  // internal API to manage query interruption in distributed mode
2984  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
2985  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
2986  SET_REQUEST_ID(leaf_request_info.requestId());
2987  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
2988  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2989  auto session_ptr = stdlog.getConstSessionInfo();
2990 
2992  executor->enrollQuerySession(parent_request_info.sessionId(),
2993  label,
2994  start_time_str,
2996  for_running_query_kernel
2997  ? QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL
2998  : QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
2999 }
3000 
3001 void DBHandler::invalidate_cur_session(const TSessionId& parent_session_id_or_json,
3002  const TSessionId& leaf_session_id_or_json,
3003  const std::string& start_time_str,
3004  const std::string& label,
3005  bool for_running_query_kernel) {
3006  // internal API to manage query interruption in distributed mode
3007  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
3008  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
3009  SET_REQUEST_ID(leaf_request_info.requestId());
3010  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
3011  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3013  executor->clearQuerySessionStatus(parent_request_info.sessionId(), start_time_str);
3014 }
3015 
3017  return INVALID_SESSION_ID;
3018 }
3019 
3020 void DBHandler::get_memory(std::vector<TNodeMemoryInfo>& _return,
3021  const TSessionId& session_id_or_json,
3022  const std::string& memory_level) {
3023  heavyai::RequestInfo const request_info(session_id_or_json);
3024  SET_REQUEST_ID(request_info.requestId());
3025  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
3026  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3027  std::vector<Data_Namespace::MemoryInfo> internal_memory;
3028  if (!memory_level.compare("gpu")) {
3029  internal_memory =
3030  SysCatalog::instance().getDataMgr().getMemoryInfo(MemoryLevel::GPU_LEVEL);
3031  } else {
3032  internal_memory =
3033  SysCatalog::instance().getDataMgr().getMemoryInfo(MemoryLevel::CPU_LEVEL);
3034  }
3035 
3036  for (auto memInfo : internal_memory) {
3037  TNodeMemoryInfo nodeInfo;
3038  nodeInfo.page_size = memInfo.pageSize;
3039  nodeInfo.max_num_pages = memInfo.maxNumPages;
3040  nodeInfo.num_pages_allocated = memInfo.numPageAllocated;
3041  nodeInfo.is_allocation_capped = memInfo.isAllocationCapped;
3042  for (auto gpu : memInfo.nodeMemoryData) {
3043  TMemoryData md;
3044  md.slab = gpu.slabNum;
3045  md.start_page = gpu.startPage;
3046  md.num_pages = gpu.numPages;
3047  md.touch = gpu.touch;
3048  md.chunk_key.insert(md.chunk_key.end(), gpu.chunk_key.begin(), gpu.chunk_key.end());
3049  md.is_free = gpu.memStatus == Buffer_Namespace::MemStatus::FREE;
3050  nodeInfo.node_memory_data.push_back(md);
3051  }
3052  _return.push_back(nodeInfo);
3053  }
3054 }
3055 
3056 void DBHandler::get_databases(std::vector<TDBInfo>& dbinfos,
3057  const TSessionId& session_id_or_json) {
3058  heavyai::RequestInfo const request_info(session_id_or_json);
3059  SET_REQUEST_ID(request_info.requestId());
3060  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
3061  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3062  auto session_ptr = stdlog.getConstSessionInfo();
3063  const auto& user = session_ptr->get_currentUser();
3065  SysCatalog::instance().getDatabaseListForUser(user);
3066  for (auto& db : dbs) {
3067  TDBInfo dbinfo;
3068  dbinfo.db_name = std::move(db.dbName);
3069  dbinfo.db_owner = std::move(db.dbOwnerName);
3070  dbinfos.push_back(std::move(dbinfo));
3071  }
3072 }
3073 
3074 TExecuteMode::type DBHandler::getExecutionMode(const TSessionId& session_id) {
3075  auto executor = get_session_ptr(session_id)->get_executor_device_type();
3076  switch (executor) {
3078  return TExecuteMode::CPU;
3080  return TExecuteMode::GPU;
3081  default:
3082  UNREACHABLE();
3083  }
3084  UNREACHABLE();
3085  return TExecuteMode::CPU;
3086 }
3087 void DBHandler::set_execution_mode(const TSessionId& session_id_or_json,
3088  const TExecuteMode::type mode) {
3089  heavyai::RequestInfo const request_info(session_id_or_json);
3090  SET_REQUEST_ID(request_info.requestId());
3091  auto session_ptr = get_session_ptr(request_info.sessionId());
3092  auto stdlog = STDLOG(session_ptr);
3093  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3094  DBHandler::set_execution_mode_nolock(session_ptr.get(), mode);
3095 }
3096 
3097 namespace {
3098 
3100  if (td && td->nShards) {
3101  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
3102  }
3103 }
3104 
3105 void check_valid_column_names(const std::list<const ColumnDescriptor*>& descs,
3106  const std::vector<std::string>& column_names) {
3107  std::unordered_set<std::string> unique_names;
3108  for (const auto& name : column_names) {
3109  auto lower_name = to_lower(name);
3110  if (unique_names.find(lower_name) != unique_names.end()) {
3111  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
3112  } else {
3113  unique_names.insert(lower_name);
3114  }
3115  }
3116  for (const auto& cd : descs) {
3117  auto iter = unique_names.find(to_lower(cd->columnName));
3118  if (iter != unique_names.end()) {
3119  unique_names.erase(iter);
3120  }
3121  }
3122  if (!unique_names.empty()) {
3123  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
3124  }
3125 }
3126 
3127 // Return vector of IDs mapping column descriptors to the list of comumn names.
3128 // The size of the vector is the number of actual columns (geophisical columns excluded).
3129 // ID is either a position in column_names matching the descriptor, or -1 if the column
3130 // is missing from the column_names
3131 std::vector<int> column_ids_by_names(const std::list<const ColumnDescriptor*>& descs,
3132  const std::vector<std::string>& column_names) {
3133  std::vector<int> desc_to_column_ids;
3134  if (column_names.empty()) {
3135  int col_idx = 0;
3136  for (const auto& cd : descs) {
3137  if (!cd->isGeoPhyCol) {
3138  desc_to_column_ids.push_back(col_idx);
3139  ++col_idx;
3140  }
3141  }
3142  } else {
3143  for (const auto& cd : descs) {
3144  if (!cd->isGeoPhyCol) {
3145  bool found = false;
3146  for (size_t j = 0; j < column_names.size(); ++j) {
3147  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
3148  found = true;
3149  desc_to_column_ids.push_back(j);
3150  break;
3151  }
3152  }
3153  if (!found) {
3154  if (!cd->columnType.get_notnull()) {
3155  desc_to_column_ids.push_back(-1);
3156  } else {
3157  THROW_DB_EXCEPTION("Column '" + cd->columnName +
3158  "' cannot be omitted due to NOT NULL constraint");
3159  }
3160  }
3161  }
3162  }
3163  }
3164  return desc_to_column_ids;
3165 }
3166 
3168  std::ostringstream oss;
3169  oss << "Cache size information {";
3171  // 1. Data recycler
3172  // 1.a Resultset Recycler
3173  auto resultset_cache_size =
3174  executor->getResultSetRecyclerHolder()
3175  .getResultSetRecycler()
3176  ->getResultSetRecyclerMetricTracker()
3177  .getCurrentCacheSize(DataRecyclerUtil::CPU_DEVICE_IDENTIFIER);
3178  if (resultset_cache_size) {
3179  oss << "\"query_resultset\": " << *resultset_cache_size << " bytes, ";
3180  }
3181 
3182  // 1.b Join Hash Table Recycler
3183  auto perfect_join_ht_cache_size =
3186  auto baseline_join_ht_cache_size =
3189  auto bbox_intersect_ht_cache_size =
3193  auto bbox_intersect_ht_tuner_cache_size =
3197  auto sum_hash_table_cache_size =
3198  perfect_join_ht_cache_size + baseline_join_ht_cache_size +
3199  bbox_intersect_ht_cache_size + bbox_intersect_ht_tuner_cache_size;
3200  oss << "\"hash_tables\": " << sum_hash_table_cache_size << " bytes, ";
3201 
3202  // 1.c Chunk Metadata Recycler
3203  auto chunk_metadata_cache_size =
3204  executor->getResultSetRecyclerHolder()
3205  .getChunkMetadataRecycler()
3206  ->getCurrentCacheSizeForDevice(CacheItemType::CHUNK_METADATA,
3208  oss << "\"chunk_metadata\": " << chunk_metadata_cache_size << " bytes, ";
3209 
3210  // 2. Query Plan Dag
3211  auto query_plan_dag_cache_size =
3212  executor->getQueryPlanDagCache().getCurrentNodeMapSize();
3213  oss << "\"query_plan_dag\": " << query_plan_dag_cache_size << " bytes, ";
3214 
3215  // 3. Compiled (GPU) Code
3216  oss << "\"compiled_GPU code\": "
3217  << QueryEngine::getInstance()->gpu_code_accessor->getCacheSize() << " bytes, ";
3218 
3219  // 4. String Dictionary
3220  oss << "\"string_dictionary\": " << cat.getTotalMemorySizeForDictionariesForDatabase()
3221  << " bytes";
3222  oss << "}";
3223  LOG(INFO) << oss.str();
3224 }
3225 
3226 void log_system_cpu_memory_status(std::string const& query,
3229  std::ostringstream oss;
3230  oss << query << "\n" << cat.getDataMgr().getSystemMemoryUsage();
3231  LOG(INFO) << oss.str();
3232  log_cache_size(cat);
3233  }
3234 }
3235 } // namespace
3236 
3238  const TSessionId& session_id,
3239  const Catalog& catalog,
3240  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
3241  const ColumnDescriptor* cd,
3242  size_t& col_idx,
3243  size_t num_rows,
3244  const std::string& table_name) {
3245  auto geo_col_idx = col_idx - 1;
3246  const auto wkt_or_wkb_hex_column = import_buffers[geo_col_idx]->getGeoStringBuffer();
3247  std::vector<std::vector<double>> coords_column, bounds_column;
3248  std::vector<std::vector<int>> ring_sizes_column, poly_rings_column;
3249  SQLTypeInfo ti = cd->columnType;
3250  const bool validate_with_geos_if_available = false;
3251  if (num_rows != wkt_or_wkb_hex_column->size() ||
3252  !Geospatial::GeoTypesFactory::getGeoColumns(wkt_or_wkb_hex_column,
3253  ti,
3254  coords_column,
3255  bounds_column,
3256  ring_sizes_column,
3257  poly_rings_column,
3258  validate_with_geos_if_available)) {
3259  std::ostringstream oss;
3260  oss << "Invalid geometry in column " << cd->columnName;
3261  THROW_DB_EXCEPTION(oss.str());
3262  }
3263 
3264  // Populate physical columns, advance col_idx
3266  cd,
3267  import_buffers,
3268  col_idx,
3269  coords_column,
3270  bounds_column,
3271  ring_sizes_column,
3272  poly_rings_column);
3273 }
3274 
3276  const TSessionId& session_id,
3277  const Catalog& catalog,
3278  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
3279  const std::list<const ColumnDescriptor*>& cds,
3280  const std::vector<int>& desc_id_to_column_id,
3281  size_t num_rows,
3282  const std::string& table_name) {
3283  size_t skip_physical_cols = 0;
3284  size_t col_idx = 0, import_idx = 0;
3285  for (const auto& cd : cds) {
3286  if (skip_physical_cols > 0) {
3287  CHECK(cd->isGeoPhyCol);
3288  skip_physical_cols--;
3289  continue;
3290  } else if (cd->columnType.is_geometry()) {
3291  skip_physical_cols = cd->columnType.get_physical_cols();
3292  }
3293  if (desc_id_to_column_id[import_idx] == -1) {
3294  import_buffers[col_idx]->addDefaultValues(cd, num_rows);
3295  col_idx++;
3296  if (cd->columnType.is_geometry()) {
3298  session_id, catalog, import_buffers, cd, col_idx, num_rows, table_name);
3299  }
3300  } else {
3301  col_idx++;
3302  col_idx += skip_physical_cols;
3303  }
3304  import_idx++;
3305  }
3306 }
3307 
3308 namespace {
3309 std::string get_load_tag(const std::string& load_tag, const std::string& table_name) {
3310  std::ostringstream oss;
3311  oss << load_tag << "(" << table_name << ")";
3312  return oss.str();
3313 }
3314 
3315 std::string get_import_tag(const std::string& import_tag,
3316  const std::string& table_name,
3317  const std::string& file_path) {
3318  std::ostringstream oss;
3319  oss << import_tag << "(" << table_name << ", file_path:" << file_path << ")";
3320  return oss.str();
3321 }
3322 } // namespace
3323 
3324 void DBHandler::load_table_binary(const TSessionId& session_id_or_json,
3325  const std::string& table_name,
3326  const std::vector<TRow>& rows,
3327  const std::vector<std::string>& column_names) {
3328  try {
3329  heavyai::RequestInfo const request_info(session_id_or_json);
3330  SET_REQUEST_ID(request_info.requestId());
3331  auto stdlog =
3332  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3333  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3334  auto session_ptr = stdlog.getConstSessionInfo();
3335 
3336  if (rows.empty()) {
3337  THROW_DB_EXCEPTION("No rows to insert");
3338  }
3339 
3340  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3341  std::unique_ptr<import_export::Loader> loader;
3342  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3343  auto schema_read_lock = prepare_loader_generic(*session_ptr,
3344  table_name,
3345  rows.front().cols.size(),
3346  &loader,
3347  &import_buffers,
3348  column_names,
3349  "load_table_binary");
3350 
3351  auto col_descs = loader->get_column_descs();
3352  auto desc_id_to_column_id = column_ids_by_names(col_descs, column_names);
3353 
3354  size_t rows_completed = 0;
3355  auto const load_tag = get_load_tag("load_table_binary", table_name);
3356  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3357  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3358  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3359  };
3360  for (auto const& row : rows) {
3361  size_t col_idx = 0;
3362  try {
3363  for (auto cd : col_descs) {
3364  auto mapped_idx = desc_id_to_column_id[col_idx];
3365  if (mapped_idx != -1) {
3366  import_buffers[col_idx]->add_value(
3367  cd, row.cols[mapped_idx], row.cols[mapped_idx].is_null);
3368  }
3369  col_idx++;
3370  }
3371  rows_completed++;
3372  } catch (const std::exception& e) {
3373  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
3374  import_buffers[col_idx_to_pop]->pop_value();
3375  }
3376  LOG(ERROR) << "Input exception thrown: " << e.what()
3377  << ". Row discarded, issue at column : " << (col_idx + 1)
3378  << " data :" << row;
3379  }
3380  }
3381  fillMissingBuffers(request_info.sessionId(),
3382  session_ptr->getCatalog(),
3383  import_buffers,
3384  col_descs,
3385  desc_id_to_column_id,
3386  rows_completed,
3387  table_name);
3388  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3389  session_ptr->getCatalog(), table_name);
3390  if (!loader->load(import_buffers, rows.size(), session_ptr.get())) {
3391  THROW_DB_EXCEPTION(loader->getErrorMessage());
3392  }
3393  } catch (const std::exception& e) {
3394  THROW_DB_EXCEPTION(std::string(e.what()));
3395  }
3396 }
3397 
3398 std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
3400  const Catalog_Namespace::SessionInfo& session_info,
3401  const std::string& table_name,
3402  size_t num_cols,
3403  std::unique_ptr<import_export::Loader>* loader,
3404  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
3405  const std::vector<std::string>& column_names,
3406  std::string load_type) {
3407  if (num_cols == 0) {
3408  THROW_DB_EXCEPTION("No columns to insert");
3409  }
3410  check_read_only(load_type);
3411  auto& cat = session_info.getCatalog();
3412  auto td_with_lock =
3413  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3415  cat, table_name, true));
3416  const auto td = (*td_with_lock)();
3417  CHECK(td);
3418 
3419  if (g_cluster && !leaf_aggregator_.leafCount()) {
3420  // Sharded table rows need to be routed to the leaf by an aggregator.
3422  }
3423  check_table_load_privileges(session_info, table_name);
3424 
3425  loader->reset(new import_export::Loader(cat, td));
3426 
3427  auto col_descs = (*loader)->get_column_descs();
3428  check_valid_column_names(col_descs, column_names);
3429  if (column_names.empty()) {
3430  // TODO(andrew): nColumns should be number of non-virtual/non-system columns.
3431  // Subtracting 1 (rowid) until TableDescriptor is updated.
3432  auto geo_physical_cols = std::count_if(
3433  col_descs.begin(), col_descs.end(), [](auto cd) { return cd->isGeoPhyCol; });
3434  const auto num_table_cols = static_cast<size_t>(td->nColumns) - geo_physical_cols -
3435  (td->hasDeletedCol ? 2 : 1);
3436  if (num_cols != num_table_cols) {
3437  throw std::runtime_error("Number of columns to load (" + std::to_string(num_cols) +
3438  ") does not match number of columns in table " +
3439  td->tableName + " (" + std::to_string(num_table_cols) +
3440  ")");
3441  }
3442  } else if (num_cols != column_names.size()) {
3444  "Number of columns specified does not match the "
3445  "number of columns given (" +
3446  std::to_string(num_cols) + " vs " + std::to_string(column_names.size()) + ")");
3447  }
3448 
3449  *import_buffers = import_export::setup_column_loaders(td, loader->get());
3450  return std::move(td_with_lock);
3451 }
3452 namespace {
3453 
3454 size_t get_column_size(const TColumn& column) {
3455  if (!column.nulls.empty()) {
3456  return column.nulls.size();
3457  } else {
3458  // it is a very bold estimate but later we check it against REAL data
3459  // and if this function returns a wrong result (e.g. both int and string
3460  // vectors are filled with values), we get an error
3461  return column.data.int_col.size() + column.data.arr_col.size() +
3462  column.data.real_col.size() + column.data.str_col.size();
3463  }
3464 }
3465 
3466 } // namespace
3467 
3468 void DBHandler::load_table_binary_columnar(const TSessionId& session_id_or_json,
3469  const std::string& table_name,
3470  const std::vector<TColumn>& cols,
3471  const std::vector<std::string>& column_names) {
3472  heavyai::RequestInfo const request_info(session_id_or_json);
3473  SET_REQUEST_ID(request_info.requestId());
3474  auto stdlog =
3475  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3476  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3477  auto session_ptr = stdlog.getConstSessionInfo();
3478 
3479  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3480  std::unique_ptr<import_export::Loader> loader;
3481  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3482  auto schema_read_lock = prepare_loader_generic(*session_ptr,
3483  table_name,
3484  cols.size(),
3485  &loader,
3486  &import_buffers,
3487  column_names,
3488  "load_table_binary_columnar");
3489 
3490  auto desc_id_to_column_id =
3491  column_ids_by_names(loader->get_column_descs(), column_names);
3492  size_t num_rows = get_column_size(cols.front());
3493  size_t import_idx = 0; // index into the TColumn vector being loaded
3494  size_t col_idx = 0; // index into column description vector
3495  auto const load_tag = get_load_tag("load_table_binary_columnar", table_name);
3496  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3497  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3498  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3499  };
3500  try {
3501  size_t skip_physical_cols = 0;
3502  for (auto cd : loader->get_column_descs()) {
3503  if (skip_physical_cols > 0) {
3504  CHECK(cd->isGeoPhyCol);
3505  skip_physical_cols--;
3506  continue;
3507  }
3508  auto mapped_idx = desc_id_to_column_id[import_idx];
3509  if (mapped_idx != -1) {
3510  size_t col_rows = import_buffers[col_idx]->add_values(cd, cols[mapped_idx]);
3511  if (col_rows != num_rows) {
3512  std::ostringstream oss;
3513  oss << "load_table_binary_columnar: Inconsistent number of rows in column "
3514  << cd->columnName << " , expecting " << num_rows << " rows, column "
3515  << col_idx << " has " << col_rows << " rows";
3516  THROW_DB_EXCEPTION(oss.str());
3517  }
3518  // Advance to the next column in the table
3519  col_idx++;
3520  // For geometry columns: process WKT strings and fill physical columns
3521  if (cd->columnType.is_geometry()) {
3522  fillGeoColumns(request_info.sessionId(),
3523  session_ptr->getCatalog(),
3524  import_buffers,
3525  cd,
3526  col_idx,
3527  num_rows,
3528  table_name);
3529  skip_physical_cols = cd->columnType.get_physical_cols();
3530  }
3531  } else {
3532  col_idx++;
3533  if (cd->columnType.is_geometry()) {
3534  skip_physical_cols = cd->columnType.get_physical_cols();
3535  col_idx += skip_physical_cols;
3536  }
3537  }
3538  // Advance to the next column of values being loaded
3539  import_idx++;
3540  }
3541  } catch (const std::exception& e) {
3542  std::ostringstream oss;
3543  oss << "load_table_binary_columnar: Input exception thrown: " << e.what()
3544  << ". Issue at column : " << (col_idx + 1) << ". Import aborted";
3545  THROW_DB_EXCEPTION(oss.str());
3546  }
3547  fillMissingBuffers(request_info.sessionId(),
3548  session_ptr->getCatalog(),
3549  import_buffers,
3550  loader->get_column_descs(),
3551  desc_id_to_column_id,
3552  num_rows,
3553  table_name);
3554  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3555  session_ptr->getCatalog(), table_name);
3556  if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3557  THROW_DB_EXCEPTION(loader->getErrorMessage());
3558  }
3559 }
3560 
3561 using RecordBatchVector = std::vector<std::shared_ptr<arrow::RecordBatch>>;
3562 
3563 #define ARROW_THRIFT_THROW_NOT_OK(s) \
3564  do { \
3565  ::arrow::Status _s = (s); \
3566  if (UNLIKELY(!_s.ok())) { \
3567  TDBException ex; \
3568  ex.error_msg = _s.ToString(); \
3569  LOG(ERROR) << s.ToString(); \
3570  throw ex; \
3571  } \
3572  } while (0)
3573 
3574 namespace {
3575 
3576 RecordBatchVector loadArrowStream(const std::string& stream) {
3577  RecordBatchVector batches;
3578  try {
3579  // TODO(wesm): Make this simpler in general, see ARROW-1600
3580  auto stream_buffer =
3581  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3582  static_cast<int64_t>(stream.size()));
3583 
3584  arrow::io::BufferReader buf_reader(stream_buffer);
3585  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3586  ARROW_ASSIGN_OR_THROW(batch_reader,
3587  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3588 
3589  while (true) {
3590  std::shared_ptr<arrow::RecordBatch> batch;
3591  // Read batch (zero-copy) from the stream
3592  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3593  if (batch == nullptr) {
3594  break;
3595  }
3596  batches.emplace_back(std::move(batch));
3597  }
3598  } catch (const std::exception& e) {
3599  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3600  }
3601  return batches;
3602 }
3603 
3604 } // namespace
3605 
3606 void DBHandler::load_table_binary_arrow(const TSessionId& session_id_or_json,
3607  const std::string& table_name,
3608  const std::string& arrow_stream,
3609  const bool use_column_names) {
3610  heavyai::RequestInfo const request_info(session_id_or_json);
3611  SET_REQUEST_ID(request_info.requestId());
3612  auto stdlog =
3613  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3614  auto session_ptr = stdlog.getConstSessionInfo();
3615 
3616  RecordBatchVector batches = loadArrowStream(arrow_stream);
3617  // Assuming have one batch for now
3618  if (batches.size() != 1) {
3619  THROW_DB_EXCEPTION("Expected a single Arrow record batch. Import aborted");
3620  }
3621 
3622  std::shared_ptr<arrow::RecordBatch> batch = batches[0];
3623  std::unique_ptr<import_export::Loader> loader;
3624  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3625  std::vector<std::string> column_names;
3626  if (use_column_names) {
3627  column_names = batch->schema()->field_names();
3628  }
3629  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3630  auto schema_read_lock =
3631  prepare_loader_generic(*session_ptr,
3632  table_name,
3633  static_cast<size_t>(batch->num_columns()),
3634  &loader,
3635  &import_buffers,
3636  column_names,
3637  "load_table_binary_arrow");
3638 
3639  auto desc_id_to_column_id =
3640  column_ids_by_names(loader->get_column_descs(), column_names);
3641  size_t num_rows = 0;
3642 
3643  // col_idx indexes "desc_id_to_column_id"
3644  size_t col_idx = 0;
3645  auto const load_tag = get_load_tag("load_table_binary_arrow", table_name);
3646  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3647  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3648  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3649  };
3650  try {
3651  for (auto cd : loader->get_column_descs()) {
3652  if (cd->isGeoPhyCol) {
3653  // Skip in the case of "cd" being a physical cols, as they are generated
3654  // in fillGeoColumns:
3655  // * Point: coords col
3656  // * MultiPoint/LineString: coords/bounds cols
3657  // etc...
3658  continue;
3659  }
3660  auto mapped_idx = desc_id_to_column_id[col_idx];
3661  if (mapped_idx != -1) {
3662  auto& array = *batch->column(mapped_idx);
3663  import_export::ArraySliceRange row_slice(0, array.length());
3664 
3665  // col_id indexes "import_buffers"
3666  size_t col_id = cd->columnId;
3667 
3668  // When importing a buffer with "add_arrow_values", the index in
3669  // "importing_buffers" is given by the "columnId" attribute of a ColumnDescriptor.
3670  // This index will differ from "col_idx" if any of the importing columns is a
3671  // geometry column as they have physical columns for other properties (i.e. a
3672  // LineString also has "coords" and "bounds").
3673  num_rows = import_buffers[col_id - 1]->add_arrow_values(
3674  cd, array, true, row_slice, nullptr);
3675  // For geometry columns: process WKT strings and fill physical columns
3676  if (cd->columnType.is_geometry()) {
3677  fillGeoColumns(request_info.sessionId(),
3678  session_ptr->getCatalog(),
3679  import_buffers,
3680  cd,
3681  col_id,
3682  num_rows,
3683  table_name);
3684  }
3685  }
3686  // Advance to the next column in the table
3687  col_idx++;
3688  }
3689  } catch (const std::exception& e) {
3690  LOG(ERROR) << "Input exception thrown: " << e.what()
3691  << ". Issue at column : " << (col_idx + 1) << ". Import aborted";
3692  // TODO(tmostak): Go row-wise on binary columnar import to be consistent with our
3693  // other import paths
3694  THROW_DB_EXCEPTION(e.what());
3695  }
3696  fillMissingBuffers(request_info.sessionId(),
3697  session_ptr->getCatalog(),
3698  import_buffers,
3699  loader->get_column_descs(),
3700  desc_id_to_column_id,
3701  num_rows,
3702  table_name);
3703  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3704  session_ptr->getCatalog(), table_name);
3705  if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3706  THROW_DB_EXCEPTION(loader->getErrorMessage());
3707  }
3708 }
3709 
3710 void DBHandler::load_table(const TSessionId& session_id_or_json,
3711  const std::string& table_name,
3712  const std::vector<TStringRow>& rows,
3713  const std::vector<std::string>& column_names) {
3714  try {
3715  heavyai::RequestInfo const request_info(session_id_or_json);
3716  SET_REQUEST_ID(request_info.requestId());
3717  auto stdlog =
3718  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3719  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3720  auto session_ptr = stdlog.getConstSessionInfo();
3721 
3722  if (rows.empty()) {
3723  THROW_DB_EXCEPTION("No rows to insert");
3724  }
3725  auto const load_tag = get_load_tag("load_table", table_name);
3726  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3727  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3728  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3729  };
3730  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3731  std::unique_ptr<import_export::Loader> loader;
3732  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3733  auto schema_read_lock =
3734  prepare_loader_generic(*session_ptr,
3735  table_name,
3736  static_cast<size_t>(rows.front().cols.size()),
3737  &loader,
3738  &import_buffers,
3739  column_names,
3740  "load_table");
3741 
3742  auto col_descs = loader->get_column_descs();
3743  auto desc_id_to_column_id = column_ids_by_names(col_descs, column_names);
3744  import_export::CopyParams copy_params;
3745  size_t rows_completed = 0;
3746  for (auto const& row : rows) {
3747  size_t import_idx = 0; // index into the TStringRow being loaded
3748  size_t col_idx = 0; // index into column description vector
3749  try {
3750  size_t skip_physical_cols = 0;
3751  for (auto cd : col_descs) {
3752  if (skip_physical_cols > 0) {
3753  CHECK(cd->isGeoPhyCol);
3754  skip_physical_cols--;
3755  continue;
3756  }
3757  auto mapped_idx = desc_id_to_column_id[import_idx];
3758  if (mapped_idx != -1) {
3759  import_buffers[col_idx]->add_value(cd,
3760  row.cols[mapped_idx].str_val,
3761  row.cols[mapped_idx].is_null,
3762  copy_params);
3763  }
3764  col_idx++;
3765  if (cd->columnType.is_geometry()) {
3766  // physical geo columns will be filled separately lately
3767  skip_physical_cols = cd->columnType.get_physical_cols();
3768  col_idx += skip_physical_cols;
3769  }
3770  // Advance to the next field within the row
3771  import_idx++;
3772  }
3773  rows_completed++;
3774  } catch (const std::exception& e) {
3775  LOG(ERROR) << "Input exception thrown: " << e.what()
3776  << ". Row discarded, issue at column : " << (col_idx + 1)
3777  << " data :" << row;
3778  THROW_DB_EXCEPTION(std::string("Exception: ") + e.what());
3779  }
3780  }
3781  // do batch filling of geo columns separately
3782  if (rows.size() != 0) {
3783  const auto& row = rows[0];
3784  size_t col_idx = 0; // index into column description vector
3785  try {
3786  size_t import_idx = 0;
3787  size_t skip_physical_cols = 0;
3788  for (auto cd : col_descs) {
3789  if (skip_physical_cols > 0) {
3790  skip_physical_cols--;
3791  continue;
3792  }
3793  auto mapped_idx = desc_id_to_column_id[import_idx];
3794  col_idx++;
3795  if (cd->columnType.is_geometry()) {
3796  skip_physical_cols = cd->columnType.get_physical_cols();
3797  if (mapped_idx != -1) {
3798  fillGeoColumns(request_info.sessionId(),
3799  session_ptr->getCatalog(),
3800  import_buffers,
3801  cd,
3802  col_idx,
3803  rows_completed,
3804  table_name);
3805  } else {
3806  col_idx += skip_physical_cols;
3807  }
3808  }
3809  import_idx++;
3810  }
3811  } catch (const std::exception& e) {
3812  LOG(ERROR) << "Input exception thrown: " << e.what()
3813  << ". Row discarded, issue at column : " << (col_idx + 1)
3814  << " data :" << row;
3815  THROW_DB_EXCEPTION(e.what());
3816  }
3817  }
3818  fillMissingBuffers(request_info.sessionId(),
3819  session_ptr->getCatalog(),
3820  import_buffers,
3821  col_descs,
3822  desc_id_to_column_id,
3823  rows_completed,
3824  table_name);
3825  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3826  session_ptr->getCatalog(), table_name);
3827  if (!loader->load(import_buffers, rows_completed, session_ptr.get())) {
3828  THROW_DB_EXCEPTION(loader->getErrorMessage());
3829  }
3830 
3831  } catch (const std::exception& e) {
3832  THROW_DB_EXCEPTION(std::string(e.what()));
3833  }
3834 }
3835 
3836 char DBHandler::unescape_char(std::string str) {
3837  char out = str[0];
3838  if (str.size() == 2 && str[0] == '\\') {
3839  if (str[1] == 't') {
3840  out = '\t';
3841  } else if (str[1] == 'n') {
3842  out = '\n';
3843  } else if (str[1] == '0') {
3844  out = '\0';
3845  } else if (str[1] == '\'') {
3846  out = '\'';
3847  } else if (str[1] == '\\') {
3848  out = '\\';
3849  }
3850  }
3851  return out;
3852 }
3853 
3855  import_export::CopyParams copy_params;
3856  switch (cp.has_header) {
3857  case TImportHeaderRow::AUTODETECT:
3859  break;
3860  case TImportHeaderRow::NO_HEADER:
3862  break;
3863  case TImportHeaderRow::HAS_HEADER:
3865  break;
3866  default:
3867  CHECK(false);
3868  }
3869  copy_params.quoted = cp.quoted;
3870  if (cp.delimiter.length() > 0) {
3871  copy_params.delimiter = unescape_char(cp.delimiter);
3872  } else {
3873  copy_params.delimiter = '\0';
3874  }
3875  if (cp.null_str.length() > 0) {
3876  copy_params.null_str = cp.null_str;
3877  }
3878  if (cp.quote.length() > 0) {
3879  copy_params.quote = unescape_char(cp.quote);
3880  }
3881  if (cp.escape.length() > 0) {
3882  copy_params.escape = unescape_char(cp.escape);
3883  }
3884  if (cp.line_delim.length() > 0) {
3885  copy_params.line_delim = unescape_char(cp.line_delim);
3886  }
3887  if (cp.array_delim.length() > 0) {
3888  copy_params.array_delim = unescape_char(cp.array_delim);
3889  }
3890  if (cp.array_begin.length() > 0) {
3891  copy_params.array_begin = unescape_char(cp.array_begin);
3892  }
3893  if (cp.array_end.length() > 0) {
3894  copy_params.array_end = unescape_char(cp.array_end);
3895  }
3896  if (cp.threads != 0) {
3897  copy_params.threads = cp.threads;
3898  }
3899  if (cp.s3_access_key.length() > 0) {
3900  copy_params.s3_access_key = cp.s3_access_key;
3901  }
3902  if (cp.s3_secret_key.length() > 0) {
3903  copy_params.s3_secret_key = cp.s3_secret_key;
3904  }
3905  if (cp.s3_session_token.length() > 0) {
3906  copy_params.s3_session_token = cp.s3_session_token;
3907  }
3908  if (cp.s3_region.length() > 0) {
3909  copy_params.s3_region = cp.s3_region;
3910  }
3911  if (cp.s3_endpoint.length() > 0) {
3912  copy_params.s3_endpoint = cp.s3_endpoint;
3913  }
3914 #ifdef HAVE_AWS_S3
3915  if (g_allow_s3_server_privileges && cp.s3_access_key.length() == 0 &&
3916  cp.s3_secret_key.length() == 0 && cp.s3_session_token.length() == 0) {
3917  const auto& server_credentials =
3918  Aws::Auth::DefaultAWSCredentialsProviderChain().GetAWSCredentials();
3919  copy_params.s3_access_key = server_credentials.GetAWSAccessKeyId();
3920  copy_params.s3_secret_key = server_credentials.GetAWSSecretKey();
3921  copy_params.s3_session_token = server_credentials.GetSessionToken();
3922  }
3923 #endif
3924 
3925  switch (cp.source_type) {
3926  case TSourceType::DELIMITED_FILE:
3928  break;
3929  case TSourceType::GEO_FILE:
3931  break;
3932  case TSourceType::PARQUET_FILE:
3933 #ifdef ENABLE_IMPORT_PARQUET
3935  break;
3936 #else
3937  THROW_DB_EXCEPTION("Parquet not supported");
3938 #endif
3939  case TSourceType::ODBC:
3940  THROW_DB_EXCEPTION("ODBC source not supported");
3941  case TSourceType::RASTER_FILE:
3943  break;
3944  default:
3945  CHECK(false);
3946  }
3947 
3948  switch (cp.geo_coords_encoding) {
3949  case TEncodingType::GEOINT:
3950  copy_params.geo_coords_encoding = kENCODING_GEOINT;
3951  break;
3952  case TEncodingType::NONE:
3953  copy_params.geo_coords_encoding = kENCODING_NONE;
3954  break;
3955  default:
3956  THROW_DB_EXCEPTION("Invalid geo_coords_encoding in TCopyParams: " +
3957  std::to_string((int)cp.geo_coords_encoding));
3958  }
3959  copy_params.geo_coords_comp_param = cp.geo_coords_comp_param;
3960  switch (cp.geo_coords_type) {
3961  case TDatumType::GEOGRAPHY:
3962  copy_params.geo_coords_type = kGEOGRAPHY;
3963  break;
3964  case TDatumType::GEOMETRY:
3965  copy_params.geo_coords_type = kGEOMETRY;
3966  break;
3967  default:
3968  THROW_DB_EXCEPTION("Invalid geo_coords_type in TCopyParams: " +
3969  std::to_string((int)cp.geo_coords_type));
3970  }
3971  switch (cp.geo_coords_srid) {
3972  case 4326:
3973  case 3857:
3974  case 900913:
3975  copy_params.geo_coords_srid = cp.geo_coords_srid;
3976  break;
3977  default:
3978  THROW_DB_EXCEPTION("Invalid geo_coords_srid in TCopyParams (" +
3979  std::to_string((int)cp.geo_coords_srid));
3980  }
3981  copy_params.sanitize_column_names = cp.sanitize_column_names;
3982  copy_params.geo_layer_name = cp.geo_layer_name;
3983  copy_params.geo_explode_collections = cp.geo_explode_collections;
3984  copy_params.source_srid = cp.source_srid;
3985  switch (cp.raster_point_type) {
3986  case TRasterPointType::NONE:
3988  break;
3989  case TRasterPointType::AUTO:
3991  break;
3992  case TRasterPointType::SMALLINT:
3994  break;
3995  case TRasterPointType::INT:
3997  break;
3998  case TRasterPointType::FLOAT:
4000  break;
4001  case TRasterPointType::DOUBLE:
4003  break;
4004  case TRasterPointType::POINT:
4006  break;
4007  default:
4008  CHECK(false);
4009  }
4010  copy_params.raster_import_bands = cp.raster_import_bands;
4011  if (cp.raster_scanlines_per_thread < 0) {
4012  THROW_DB_EXCEPTION("Invalid raster_scanlines_per_thread in TCopyParams (" +
4013  std::to_string((int)cp.raster_scanlines_per_thread));
4014  } else {
4015  copy_params.raster_scanlines_per_thread = cp.raster_scanlines_per_thread;
4016  }
4017  switch (cp.raster_point_transform) {
4018  case TRasterPointTransform::NONE:
4020  break;
4021  case TRasterPointTransform::AUTO:
4023  break;
4024  case TRasterPointTransform::FILE:
4026  break;
4027  case TRasterPointTransform::WORLD:
4029  break;
4030  default:
4031  CHECK(false);
4032  }
4033  copy_params.raster_point_compute_angle = cp.raster_point_compute_angle;
4034  copy_params.raster_import_dimensions = cp.raster_import_dimensions;
4035  copy_params.dsn = cp.odbc_dsn;
4036  copy_params.connection_string = cp.odbc_connection_string;
4037  copy_params.sql_select = cp.odbc_sql_select;
4038  copy_params.sql_order_by = cp.odbc_sql_order_by;
4039  copy_params.username = cp.odbc_username;
4040  copy_params.password = cp.odbc_password;
4041  copy_params.credential_string = cp.odbc_credential_string;
4042  copy_params.add_metadata_columns = cp.add_metadata_columns;
4043  copy_params.trim_spaces = cp.trim_spaces;
4044  copy_params.geo_validate_geometry = cp.geo_validate_geometry;
4045  return copy_params;
4046 }
4047 
4049  TCopyParams copy_params;
4050  copy_params.delimiter = cp.delimiter;
4051  copy_params.null_str = cp.null_str;
4052  switch (cp.has_header) {
4054  copy_params.has_header = TImportHeaderRow::AUTODETECT;
4055  break;
4057  copy_params.has_header = TImportHeaderRow::NO_HEADER;
4058  break;
4060  copy_params.has_header = TImportHeaderRow::HAS_HEADER;
4061  break;
4062  default:
4063  CHECK(false);
4064  }
4065  copy_params.quoted = cp.quoted;
4066  copy_params.quote = cp.quote;
4067  copy_params.escape = cp.escape;
4068  copy_params.line_delim = cp.line_delim;
4069  copy_params.array_delim = cp.array_delim;
4070  copy_params.array_begin = cp.array_begin;
4071  copy_params.array_end = cp.array_end;
4072  copy_params.threads = cp.threads;
4073  copy_params.s3_access_key = cp.s3_access_key;
4074  copy_params.s3_secret_key = cp.s3_secret_key;
4075  copy_params.s3_session_token = cp.s3_session_token;
4076  copy_params.s3_region = cp.s3_region;
4077  copy_params.s3_endpoint = cp.s3_endpoint;
4078  switch (cp.source_type) {
4080  copy_params.source_type = TSourceType::DELIMITED_FILE;
4081  break;
4083  copy_params.source_type = TSourceType::GEO_FILE;
4084  break;
4086  copy_params.source_type = TSourceType::PARQUET_FILE;
4087  break;
4089  copy_params.source_type = TSourceType::RASTER_FILE;
4090  break;
4092  copy_params.source_type = TSourceType::ODBC;
4093  break;
4094  default:
4095  CHECK(false);
4096  }
4097  switch (cp.geo_coords_encoding) {
4098  case kENCODING_GEOINT:
4099  copy_params.geo_coords_encoding = TEncodingType::GEOINT;
4100  break;
4101  default:
4102  copy_params.geo_coords_encoding = TEncodingType::NONE;
4103  break;
4104  }
4105  copy_params.geo_coords_comp_param = cp.geo_coords_comp_param;
4106  switch (cp.geo_coords_type) {
4107  case kGEOGRAPHY:
4108  copy_params.geo_coords_type = TDatumType::GEOGRAPHY;
4109  break;
4110  case kGEOMETRY:
4111  copy_params.geo_coords_type = TDatumType::GEOMETRY;
4112  break;
4113  default:
4114  CHECK(false);
4115  }
4116  copy_params.geo_coords_srid = cp.geo_coords_srid;
4117  copy_params.sanitize_column_names = cp.sanitize_column_names;
4118  copy_params.geo_layer_name = cp.geo_layer_name;
4119  copy_params.geo_assign_render_groups = false;
4120  copy_params.geo_explode_collections = cp.geo_explode_collections;
4121  copy_params.source_srid = cp.source_srid;
4122  switch (cp.raster_point_type) {
4124  copy_params.raster_point_type = TRasterPointType::NONE;
4125  break;
4127  copy_params.raster_point_type = TRasterPointType::AUTO;
4128  break;
4130  copy_params.raster_point_type = TRasterPointType::SMALLINT;
4131  break;
4133  copy_params.raster_point_type = TRasterPointType::INT;
4134  break;
4136  copy_params.raster_point_type = TRasterPointType::FLOAT;
4137  break;
4139  copy_params.raster_point_type = TRasterPointType::DOUBLE;
4140  break;
4142  copy_params.raster_point_type = TRasterPointType::POINT;
4143  break;
4144  default:
4145  CHECK(false);
4146  }
4147  copy_params.raster_import_bands = cp.raster_import_bands;
4148  copy_params.raster_scanlines_per_thread = cp.raster_scanlines_per_thread;
4149  switch (cp.raster_point_transform) {
4151  copy_params.raster_point_transform = TRasterPointTransform::NONE;
4152  break;
4154  copy_params.raster_point_transform = TRasterPointTransform::AUTO;
4155  break;
4157  copy_params.raster_point_transform = TRasterPointTransform::FILE;
4158  break;
4160  copy_params.raster_point_transform = TRasterPointTransform::WORLD;
4161  break;
4162  default:
4163  CHECK(false);
4164  }
4165  copy_params.raster_point_compute_angle = cp.raster_point_compute_angle;
4166  copy_params.raster_import_dimensions = cp.raster_import_dimensions;
4167  copy_params.odbc_dsn = cp.dsn;
4168  copy_params.odbc_connection_string = cp.connection_string;
4169  copy_params.odbc_sql_select = cp.sql_select;
4170  copy_params.odbc_sql_order_by = cp.sql_order_by;
4171  copy_params.odbc_username = cp.username;
4172  copy_params.odbc_password = cp.password;
4173  copy_params.odbc_credential_string = cp.credential_string;
4174  copy_params.add_metadata_columns = cp.add_metadata_columns;
4175  copy_params.trim_spaces = cp.trim_spaces;
4176  copy_params.geo_validate_geometry = cp.geo_validate_geometry;
4177  return copy_params;
4178 }
4179 
4180 namespace {
4181 void add_vsi_network_prefix(std::string& path) {
4182  // do we support network file access?
4183  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
4184 
4185  // modify head of filename based on source location
4186  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
4187  if (!gdal_network) {
4189  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
4190  }
4191  // invoke GDAL CURL virtual file reader
4192  path = "/vsicurl/" + path;
4193  } else if (boost::istarts_with(path, "s3://")) {
4194  if (!gdal_network) {
4196  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
4197  }
4198  // invoke GDAL S3 virtual file reader
4199  boost::replace_first(path, "s3://", "/vsis3/");
4200  }
4201 }
4202 
4203 void add_vsi_geo_prefix(std::string& path) {
4204  // single gzip'd file (not an archive)?
4205  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
4206  path = "/vsigzip/" + path;
4207  }
4208 }
4209 
4210 void add_vsi_archive_prefix(std::string& path) {
4211  // check for compressed file or file bundle
4212  if (boost::iends_with(path, ".zip")) {
4213  // zip archive
4214  path = "/vsizip/" + path;
4215  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4216  boost::iends_with(path, ".tar.gz")) {
4217  // tar archive (compressed or uncompressed)
4218  path = "/vsitar/" + path;
4219  }
4220 }
4221 
4222 std::string remove_vsi_prefixes(const std::string& path_in) {
4223  std::string path(path_in);
4224 
4225  // these will be first
4226  if (boost::istarts_with(path, "/vsizip/")) {
4227  boost::replace_first(path, "/vsizip/", "");
4228  } else if (boost::istarts_with(path, "/vsitar/")) {
4229  boost::replace_first(path, "/vsitar/", "");
4230  } else if (boost::istarts_with(path, "/vsigzip/")) {
4231  boost::replace_first(path, "/vsigzip/", "");
4232  }
4233 
4234  // then these
4235  if (boost::istarts_with(path, "/vsicurl/")) {
4236  boost::replace_first(path, "/vsicurl/", "");
4237  } else if (boost::istarts_with(path, "/vsis3/")) {
4238  boost::replace_first(path, "/vsis3/", "s3://");
4239  }
4240 
4241  return path;
4242 }
4243 
4244 bool path_is_relative(const std::string& path) {
4245  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4246  boost::istarts_with(path, "https://")) {
4247  return false;
4248  }
4249  return !boost::filesystem::path(path).is_absolute();
4250 }
4251 
4252 bool path_has_valid_filename(const std::string& path) {
4253  auto filename = boost::filesystem::path(path).filename().string();
4254  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4255  return false;
4256  }
4257  return true;
4258 }
4259 
4260 bool is_a_supported_geo_file(const std::string& path) {
4261  if (!path_has_valid_filename(path)) {
4262  return false;
4263  }
4264  // this is now just for files that we want to recognize
4265  // as geo when inside an archive (see below)
4266  // @TODO(se) make this more flexible?
4267  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4268  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4269  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4270  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4271  return true;
4272  }
4273  return false;
4274 }
4275 
4276 bool is_a_supported_archive_file(const std::string& path) {
4277  if (!path_has_valid_filename(path)) {
4278  return false;
4279  }
4280  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4281  return true;
4282  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4283  boost::iends_with(path, ".tar.gz")) {
4284  return true;
4285  }
4286  return false;
4287 }
4288 
4289 std::string find_first_geo_file_in_archive(const std::string& archive_path,
4290  const import_export::CopyParams& copy_params) {
4291  // get the recursive list of all files in the archive
4292  std::vector<std::string> files =
4293  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4294 
4295  // report the list
4296  LOG(INFO) << "Found " << files.size() << " files in Archive "
4297  << remove_vsi_prefixes(archive_path);
4298  for (const auto& file : files) {
4299  LOG(INFO) << " " << file;
4300  }
4301 
4302  // scan the list for the first candidate file
4303  bool found_suitable_file = false;
4304  std::string file_name;
4305  for (const auto& file : files) {
4306  if (is_a_supported_geo_file(file)) {
4307  file_name = file;
4308  found_suitable_file = true;
4309  break;
4310  }
4311  }
4312 
4313  // if we didn't find anything
4314  if (!found_suitable_file) {
4315  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4316  remove_vsi_prefixes(archive_path);
4317  file_name.clear();
4318  }
4319 
4320  // done
4321  return file_name;
4322 }
4323 
4324 bool is_local_file(const std::string& file_path) {
4325  return (!boost::istarts_with(file_path, "s3://") &&
4326  !boost::istarts_with(file_path, "http://") &&
4327  !boost::istarts_with(file_path, "https://"));
4328 }
4329 
4330 void validate_import_file_path_if_local(const std::string& file_path) {
4331  if (is_local_file(file_path)) {
4333  file_path, ddl_utils::DataTransferType::IMPORT, true);
4334  }
4335 }
4336 } // namespace
4337 
4338 void DBHandler::detect_column_types(TDetectResult& _return,
4339  const TSessionId& session_id_or_json,
4340  const std::string& file_name_in,
4341  const TCopyParams& cp) {
4342  heavyai::RequestInfo const request_info(session_id_or_json);
4343  SET_REQUEST_ID(request_info.requestId());
4344  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4345  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4346  check_read_only("detect_column_types");
4347 
4348  bool is_raster = false;
4349  boost::filesystem::path file_path;
4351  if (copy_params.source_type != import_export::SourceType::kOdbc) {
4352  std::string file_name{file_name_in};
4353  if (path_is_relative(file_name)) {
4354  // assume relative paths are relative to data_path / import / <session>
4355  auto temp_file_path = import_path_ /
4356  picosha2::hash256_hex_string(request_info.sessionId()) /
4357  boost::filesystem::path(file_name).filename();
4358  file_name = temp_file_path.string();
4359  }
4361 
4362  if ((copy_params.source_type == import_export::SourceType::kGeoFile ||
4364  is_local_file(file_name)) {
4365  const shared::FilePathOptions options{copy_params.regex_path_filter,
4366  copy_params.file_sort_order_by,
4367  copy_params.file_sort_regex};
4368  auto file_paths = shared::local_glob_filter_sort_files(file_name, options, false);
4369  // For geo and raster detect, pick the first file, if multiple files are provided
4370  // (e.g. through file globbing).
4371  CHECK(!file_paths.empty());
4372  file_name = file_paths[0];
4373  }
4374 
4375  // if it's a geo or raster import, handle alternative paths (S3, HTTP, archive etc.)
4376  if (copy_params.source_type == import_export::SourceType::kGeoFile) {
4377  if (is_a_supported_archive_file(file_name)) {
4378  // find the archive file
4379  add_vsi_network_prefix(file_name);
4380  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
4381  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
4382  }
4383  // find geo file in archive
4384  add_vsi_archive_prefix(file_name);
4385  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
4386  // prepare to detect that geo file
4387  if (geo_file.size()) {
4388  file_name = file_name + std::string("/") + geo_file;
4389  }
4390  } else {
4391  // prepare to detect geo file directly
4392  add_vsi_network_prefix(file_name);
4393  add_vsi_geo_prefix(file_name);
4394  }
4395  } else if (copy_params.source_type == import_export::SourceType::kRasterFile) {
4396  // prepare to detect raster file directly
4397  add_vsi_network_prefix(file_name);
4398  add_vsi_geo_prefix(file_name);
4399  is_raster = true;
4400  }
4401 
4402  file_path = boost::filesystem::path(file_name);
4403  // can be a s3 url
4404  if (!boost::istarts_with(file_name, "s3://")) {
4405  if (!boost::filesystem::path(file_name).is_absolute()) {
4406  file_path = import_path_ /
4407  picosha2::hash256_hex_string(request_info.sessionId()) /
4408  boost::filesystem::path(file_name).filename();
4409  file_name = file_path.string();
4410  }
4411 
4412  if (copy_params.source_type == import_export::SourceType::kGeoFile ||
4414  // check for geo or raster file
4415  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
4416  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
4417  "\" does not exist.")
4418  }
4419  } else {
4420  // check for regular file
4421  if (!shared::file_or_glob_path_exists(file_path.string())) {
4422  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
4423  "\" does not exist.");
4424  }
4425  }
4426  }
4427  }
4428 
4429  try {
4431 #ifdef ENABLE_IMPORT_PARQUET
4433 #endif
4434  ) {
4435  import_export::Detector detector(file_path, copy_params);
4436  auto best_types = detector.getBestColumnTypes();
4437  std::vector<std::string> headers = detector.get_headers();
4438  copy_params = detector.get_copy_params();
4439 
4440  _return.copy_params = copyparams_to_thrift(copy_params);
4441  _return.row_set.row_desc.resize(best_types.size());
4442  for (size_t col_idx = 0; col_idx < best_types.size(); col_idx++) {
4443  TColumnType col;
4444  auto& ti = best_types[col_idx];
4445  col.col_type.precision = ti.get_precision();
4446  col.col_type.scale = ti.get_scale();
4447  col.col_type.comp_param = ti.get_comp_param();
4448  if (ti.is_geometry()) {
4449  // set this so encoding_to_thrift does the right thing
4450  ti.set_compression(copy_params.geo_coords_encoding);
4451  // fill in these directly
4452  col.col_type.precision = static_cast<int>(copy_params.geo_coords_type);
4453  col.col_type.scale = copy_params.geo_coords_srid;
4454  col.col_type.comp_param = copy_params.geo_coords_comp_param;
4455  }
4456  col.col_type.type = type_to_thrift(ti);
4457  col.col_type.encoding = encoding_to_thrift(ti);
4458  if (ti.is_array()) {
4459  col.col_type.is_array = true;
4460  }
4461  if (copy_params.sanitize_column_names) {
4462  col.col_name = ImportHelpers::sanitize_name(headers[col_idx]);
4463  } else {
4464  col.col_name = headers[col_idx];
4465  }
4466  col.is_reserved_keyword = ImportHelpers::is_reserved_name(col.col_name);
4467  _return.row_set.row_desc[col_idx] = col;
4468  }
4469  auto sample_data = detector.get_sample_rows(shared::kDefaultSampleRowsCount);
4470 
4471  TRow sample_row;
4472  for (auto row : sample_data) {
4473  sample_row.cols.clear();
4474  for (const auto& s : row) {
4475  TDatum td;
4476  td.val.str_val = s;
4477  td.is_null = s.empty();
4478  sample_row.cols.push_back(td);
4479  }
4480  _return.row_set.rows.push_back(sample_row);
4481  }
4482  } else if (copy_params.source_type == import_export::SourceType::kGeoFile ||
4484  check_geospatial_files(file_path, copy_params);
4485  std::list<ColumnDescriptor> cds = import_export::Importer::gdalToColumnDescriptors(
4486  file_path.string(), is_raster, Geospatial::kGeoColumnName, copy_params);
4487  for (auto cd : cds) {
4488  if (copy_params.sanitize_column_names) {
4489  cd.columnName = ImportHelpers::sanitize_name(cd.columnName);
4490  }
4491  _return.row_set.row_desc.push_back(populateThriftColumnType(nullptr, &cd));
4492  }
4493  if (!is_raster) {
4494  // @TODO(se) support for raster?
4495  std::map<std::string, std::vector<std::string>> sample_data;
4498  sample_data,
4500  copy_params);
4501  if (sample_data.size() > 0) {
4502  for (size_t i = 0; i < sample_data.begin()->second.size(); i++) {
4503  TRow sample_row;
4504  for (auto cd : cds) {
4505  TDatum td;
4506  td.val.str_val = sample_data[cd.sourceName].at(i);
4507  td.is_null = td.val.str_val.empty();
4508  sample_row.cols.push_back(td);
4509  }
4510  _return.row_set.rows.push_back(sample_row);
4511  }
4512  }
4513  }
4514  _return.copy_params = copyparams_to_thrift(copy_params);
4515  }
4516  } catch (const std::exception& e) {
4517  THROW_DB_EXCEPTION("detect_column_types error: " + std::string(e.what()));
4518  }
4519 }
4520 
4521 void DBHandler::render_vega(TRenderResult& _return,
4522  const TSessionId& session_id_or_json,
4523  const int64_t widget_id,
4524  const std::string& vega_json,
4525  const int compression_level,
4526  const std::string& nonce) {
4527  heavyai::RequestInfo const request_info(session_id_or_json);
4528  SET_REQUEST_ID(request_info.requestId());
4529  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
4530  "widget_id",
4531  widget_id,
4532  "compression_level",
4533  compression_level,
4534  "vega_json",
4535  vega_json,
4536  "nonce",
4537  nonce);
4538  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4539  stdlog.appendNameValuePairs("nonce", nonce);
4540  if (!render_handler_) {
4541  THROW_DB_EXCEPTION("Backend rendering is disabled.");
4542  }
4543 
4544  // cast away const-ness of incoming Thrift string ref
4545  // to allow it to be passed down as an r-value and
4546  // ultimately std::moved into the RenderSession
4547  auto& non_const_vega_json = const_cast<std::string&>(vega_json);
4548 
4549  _return.total_time_ms = measure<>::execution([&]() {
4550  try {
4551  render_handler_->render_vega(_return,
4552  stdlog.getSessionInfo(),
4553  widget_id,
4554  std::move(non_const_vega_json),
4555  compression_level,
4556  nonce);
4557  } catch (std::exception& e) {
4558  THROW_DB_EXCEPTION(e.what());
4559  }
4560  });
4561 }
4562 
4564  int32_t dashboard_id,
4565  AccessPrivileges requestedPermissions) {
4566  DBObject object(dashboard_id, DashboardDBObjectType);
4567  auto& catalog = session_info.getCatalog();
4568  auto& user = session_info.get_currentUser();
4569  object.loadKey(catalog);
4570  object.setPrivileges(requestedPermissions);
4571  std::vector<DBObject> privs = {object};
4572  return SysCatalog::instance().checkPrivileges(user, privs);
4573 }
4574 
4575 // custom expressions
4576 namespace {
4579 
4580 std::unique_ptr<Catalog_Namespace::CustomExpression> create_custom_expr_from_thrift_obj(
4581  const TCustomExpression& t_custom_expr,
4582  const Catalog& catalog) {
4583  if (t_custom_expr.data_source_name.empty()) {
4584  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4585  }
4586  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4587  << "Unexpected data source type: "
4588  << static_cast<int>(t_custom_expr.data_source_type);
4589  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4590  if (!td) {
4591  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4592  t_custom_expr.data_source_name + "\" that does not exist.")
4593  }
4594  DataSourceType data_source_type = DataSourceType::TABLE;
4595  return std::make_unique<CustomExpression>(
4596  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4597 }
4598 
4599 TCustomExpression create_thrift_obj_from_custom_expr(const CustomExpression& custom_expr,
4600  const Catalog& catalog) {
4601  TCustomExpression t_custom_expr;
4602  t_custom_expr.id = custom_expr.id;
4603  t_custom_expr.name = custom_expr.name;
4604  t_custom_expr.expression_json = custom_expr.expression_json;
4605  t_custom_expr.data_source_id = custom_expr.data_source_id;
4606  t_custom_expr.is_deleted = custom_expr.is_deleted;
4607  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4608  << "Unexpected data source type: "
4609  << static_cast<int>(custom_expr.data_source_type);
4610  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4611  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4612  if (td) {
4613  t_custom_expr.data_source_name = td->tableName;
4614  } else {
4615  LOG(WARNING)
4616  << "Custom expression references a deleted data source. Custom expression id: "
4617  << custom_expr.id << ", name: " << custom_expr.name;
4618  }
4619  return t_custom_expr;
4620 }
4621 } // namespace
4622 
4623 int32_t DBHandler::create_custom_expression(const TSessionId& session_id_or_json,
4624  const TCustomExpression& t_custom_expr) {
4625  heavyai::RequestInfo const request_info(session_id_or_json);
4626  SET_REQUEST_ID(request_info.requestId());
4627  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4628  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4629  check_read_only("create_custom_expression");
4630 
4631  auto session_ptr = stdlog.getConstSessionInfo();
4632  if (!session_ptr->get_currentUser().isSuper) {
4633  THROW_DB_EXCEPTION("Custom expressions can only be created by super users.")
4634  }
4635  auto& catalog = session_ptr->getCatalog();
4637  return catalog.createCustomExpression(
4638  create_custom_expr_from_thrift_obj(t_custom_expr, catalog));
4639 }
4640 
4641 void DBHandler::get_custom_expressions(std::vector<TCustomExpression>& _return,
4642  const TSessionId& session_id_or_json) {
4643  heavyai::RequestInfo const request_info(session_id_or_json);
4644  SET_REQUEST_ID(request_info.requestId());
4645  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4646  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4647 
4648  auto session_ptr = stdlog.getConstSessionInfo();
4649  auto& catalog = session_ptr->getCatalog();
4651  auto custom_expressions =
4652  catalog.getCustomExpressionsForUser(session_ptr->get_currentUser());
4653  for (const auto& custom_expression : custom_expressions) {
4654  _return.emplace_back(create_thrift_obj_from_custom_expr(*custom_expression, catalog));
4655  }
4656 }
4657 
4658 void DBHandler::update_custom_expression(const TSessionId& session_id_or_json,
4659  const int32_t id,
4660  const std::string& expression_json) {
4661  heavyai::RequestInfo const request_info(session_id_or_json);
4662  SET_REQUEST_ID(request_info.requestId());
4663  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4664  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4665  check_read_only("update_custom_expression");
4666 
4667  auto session_ptr = stdlog.getConstSessionInfo();
4668  if (!session_ptr->get_currentUser().isSuper) {
4669  THROW_DB_EXCEPTION("Custom expressions can only be updated by super users.")
4670  }
4671  auto& catalog = session_ptr->getCatalog();
4673  catalog.updateCustomExpression(id, expression_json);
4674 }
4675 
4677  const TSessionId& session_id_or_json,
4678  const std::vector<int32_t>& custom_expression_ids,
4679  const bool do_soft_delete) {
4680  heavyai::RequestInfo const request_info(session_id_or_json);
4681  SET_REQUEST_ID(request_info.requestId());
4682  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4683  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4684  check_read_only("delete_custom_expressions");
4685 
4686  auto session_ptr = stdlog.getConstSessionInfo();
4687  if (!session_ptr->get_currentUser().isSuper) {
4688  THROW_DB_EXCEPTION("Custom expressions can only be deleted by super users.")
4689  }
4690  auto& catalog = session_ptr->getCatalog();
4692  catalog.deleteCustomExpressions(custom_expression_ids, do_soft_delete);
4693 }
4694 
4695 // dashboards
4696 void DBHandler::get_dashboard(TDashboard& dashboard,
4697  const TSessionId& session_id_or_json,
4698  const int32_t dashboard_id) {
4699  heavyai::RequestInfo const request_info(session_id_or_json);
4700  SET_REQUEST_ID(request_info.requestId());
4701  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4702  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4703  auto session_ptr = stdlog.getConstSessionInfo();
4704  auto const& cat = session_ptr->getCatalog();
4706  auto dash = cat.getMetadataForDashboard(dashboard_id);
4707  if (!dash) {
4708  THROW_DB_EXCEPTION("Dashboard with dashboard id " + std::to_string(dashboard_id) +
4709  " doesn't exist");
4710  }
4712  *session_ptr, dash->dashboardId, AccessPrivileges::VIEW_DASHBOARD)) {
4713  THROW_DB_EXCEPTION("User has no view privileges for the dashboard with id " +
4714  std::to_string(dashboard_id));
4715  }
4716  user_meta.userName = "";
4717  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4718  dashboard = get_dashboard_impl(session_ptr, user_meta, dash);
4719 }
4720 
4721 void DBHandler::get_dashboards(std::vector<TDashboard>& dashboards,
4722  const TSessionId& session_id_or_json) {
4723  heavyai::RequestInfo const request_info(session_id_or_json);
4724  SET_REQUEST_ID(request_info.requestId());
4725  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4726  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4727  auto session_ptr = stdlog.getConstSessionInfo();
4728  auto const& cat = session_ptr->getCatalog();
4730  const auto dashes = cat.getAllDashboardsMetadata();
4731  user_meta.userName = "";
4732  for (const auto dash : dashes) {
4734  *session_ptr, dash->dashboardId, AccessPrivileges::VIEW_DASHBOARD)) {
4735  // dashboardState is intentionally not populated here
4736  // for payload reasons
4737  // use get_dashboard call to get state
4738  dashboards.push_back(get_dashboard_impl(session_ptr, user_meta, dash, false));
4739  }
4740  }
4741 }
4742 
4744  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
4746  const DashboardDescriptor* dash,
4747  const bool populate_state) {
4748  auto const& cat = session_ptr->getCatalog();
4749  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4750  auto objects_list = SysCatalog::instance().getMetadataForObject(
4751  cat.getCurrentDB().dbId,
4752  static_cast<int>(DBObjectType::DashboardDBObjectType),
4753  dash->dashboardId);
4754  TDashboard dashboard;
4755  dashboard.dashboard_name = dash->dashboardName;
4756  if (populate_state) {
4757  dashboard.dashboard_state = dash->dashboardState;
4758  }
4759  dashboard.image_hash = dash->imageHash;
4760  dashboard.update_time = dash->updateTime;
4761  dashboard.dashboard_metadata = dash->dashboardMetadata;
4762  dashboard.dashboard_id = dash->dashboardId;
4763  dashboard.dashboard_owner = dash->user;
4764  TDashboardPermissions perms;
4765  // Super user has all permissions.
4766  if (session_ptr->get_currentUser().isSuper) {
4767  perms.create_ = true;
4768  perms.delete_ = true;
4769  perms.edit_ = true;
4770  perms.view_ = true;
4771  } else {
4772  // Collect all grants on current user
4773  // add them to the permissions.
4774  auto obj_to_find =
4775  DBObject(dashboard.dashboard_id, DBObjectType::DashboardDBObjectType);
4776  obj_to_find.loadKey(cat);
4777  std::vector<std::string> grantees =
4778  SysCatalog::instance().getRoles(true,
4779  session_ptr->get_currentUser().isSuper,
4780  session_ptr->get_currentUser().userName);
4781  for (const auto& grantee : grantees) {
4782  DBObject* object_found;
4783  auto* gr = SysCatalog::instance().getGrantee(grantee);
4784  if (gr && (object_found = gr->findDbObject(obj_to_find.getObjectKey(), true))) {
4785  const auto obj_privs = object_found->getPrivileges();
4786  perms.create_ |= obj_privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD);
4787  perms.delete_ |= obj_privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD);
4788  perms.edit_ |= obj_privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD);
4789  perms.view_ |= obj_privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD);
4790  }
4791  }
4792  }
4793  dashboard.dashboard_permissions = perms;
4794  if (objects_list.empty() ||
4795  (objects_list.size() == 1 && objects_list[0]->roleName == user_meta.userName)) {
4796  dashboard.is_dash_shared = false;
4797  } else {
4798  dashboard.is_dash_shared = true;
4799  }
4800  return dashboard;
4801 }
4802 
4803 namespace dbhandler {
4804 bool is_info_schema_db(const std::string& db_name) {
4805  return (db_name == shared::kInfoSchemaDbName &&
4806  SysCatalog::instance().hasExecutedMigration(shared::kInfoSchemaMigrationName));
4807 }
4808 
4809 void check_not_info_schema_db(const std::string& db_name, bool throw_db_exception) {
4810  if (is_info_schema_db(db_name)) {
4811  std::string error_message{"Write requests/queries are not allowed in the " +
4812  shared::kInfoSchemaDbName + " database."};
4813  if (throw_db_exception) {
4814  THROW_DB_EXCEPTION(error_message)
4815  } else {
4816  throw std::runtime_error(error_message);
4817  }
4818  }
4819 }
4820 } // namespace dbhandler
4821 
4822 int32_t DBHandler::create_dashboard(const TSessionId& session_id_or_json,
4823  const std::string& dashboard_name,
4824  const std::string& dashboard_state,
4825  const std::string& image_hash,
4826  const std::string& dashboard_metadata) {
4827  heavyai::RequestInfo const request_info(session_id_or_json);
4828  SET_REQUEST_ID(request_info.requestId());
4829  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4830  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4831  auto session_ptr = stdlog.getConstSessionInfo();
4832  CHECK(session_ptr);
4833  check_read_only("create_dashboard");
4834  auto& cat = session_ptr->getCatalog();
4837  }
4838 
4839  if (!session_ptr->checkDBAccessPrivileges(DBObjectType::DashboardDBObjectType,
4841  THROW_DB_EXCEPTION("Not enough privileges to create a dashboard.");
4842  }
4843 
4844  if (dashboard_exists(cat, session_ptr->get_currentUser().userId, dashboard_name)) {
4845  THROW_DB_EXCEPTION("Dashboard with name: " + dashboard_name + " already exists.");
4846  }
4847 
4849  dd.dashboardName = dashboard_name;
4850  dd.dashboardState = dashboard_state;
4851  dd.imageHash = image_hash;
4852  dd.dashboardMetadata = dashboard_metadata;
4853  dd.userId = session_ptr->get_currentUser().userId;
4854  dd.user = session_ptr->get_currentUser().userName;
4855 
4856  try {
4857  auto id = cat.createDashboard(dd);
4858  // TODO: transactionally unsafe
4859  SysCatalog::instance().createDBObject(
4860  session_ptr->get_currentUser(), dashboard_name, DashboardDBObjectType, cat, id);
4861  return id;
4862  } catch (const std::exception& e) {
4863  THROW_DB_EXCEPTION(e.what());
4864  }
4865 }
4866 
4867 void DBHandler::replace_dashboard(const TSessionId& session_id_or_json,
4868  const int32_t dashboard_id,
4869  const std::string& dashboard_name,
4870  const std::string& dashboard_owner,
4871  const std::string& dashboard_state,
4872  const std::string& image_hash,
4873  const std::string& dashboard_metadata) {
4874  heavyai::RequestInfo const request_info(session_id_or_json);
4875  SET_REQUEST_ID(request_info.requestId());
4876  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4877  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4878  auto session_ptr = stdlog.getConstSessionInfo();
4879  CHECK(session_ptr);
4880  check_read_only("replace_dashboard");
4881  auto& cat = session_ptr->getCatalog();
4884  }
4885 
4887  *session_ptr, dashboard_id, AccessPrivileges::EDIT_DASHBOARD)) {
4888  THROW_DB_EXCEPTION("Not enough privileges to replace a dashboard.");
4889  }
4890 
4891  if (auto dash = cat.getMetadataForDashboard(
4892  std::to_string(session_ptr->get_currentUser().userId), dashboard_name)) {
4893  if (dash->dashboardId != dashboard_id) {
4894  THROW_DB_EXCEPTION("Dashboard with name: " + dashboard_name + " already exists.");
4895  }
4896  }
4897 
4899  dd.dashboardName = dashboard_name;
4900  dd.dashboardState = dashboard_state;
4901  dd.imageHash = image_hash;
4902  dd.dashboardMetadata = dashboard_metadata;
4904  if (!SysCatalog::instance().getMetadataForUser(dashboard_owner, user)) {
4905  THROW_DB_EXCEPTION(std::string("Dashboard owner ") + dashboard_owner +
4906  " does not exist");
4907  }
4908  dd.userId = user.userId;
4909  dd.user = dashboard_owner;
4910  dd.dashboardId = dashboard_id;
4911 
4912  try {
4913  cat.replaceDashboard(dd);
4914  } catch (const std::exception& e) {
4915  THROW_DB_EXCEPTION(e.what());
4916  }
4917 }
4918 
4919 void DBHandler::delete_dashboard(const TSessionId& session_id_or_json,
4920  const int32_t dashboard_id) {
4921  delete_dashboards(session_id_or_json, {dashboard_id});
4922 }
4923 
4924 void DBHandler::delete_dashboards(const TSessionId& session_id_or_json,
4925  const std::vector<int32_t>& dashboard_ids) {
4926  heavyai::RequestInfo const request_info(session_id_or_json);
4927  SET_REQUEST_ID(request_info.requestId());
4928  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4929  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4930  auto session_ptr = stdlog.getConstSessionInfo();
4931  check_read_only("delete_dashboards");
4932  auto& cat = session_ptr->getCatalog();
4935  }
4936  // Checks will be performed in catalog
4937  try {
4938  cat.deleteMetadataForDashboards(dashboard_ids, session_ptr->get_currentUser());
4939  } catch (const std::exception& e) {
4940  THROW_DB_EXCEPTION(e.what());
4941  }
4942 }
4943 
4944 std::vector<std::string> DBHandler::get_valid_groups(const TSessionId& session_id_or_json,
4945  int32_t dashboard_id,
4946  std::vector<std::string> groups) {
4947  heavyai::RequestInfo const request_info(session_id_or_json);
4948  SET_REQUEST_ID(request_info.requestId());
4949  const auto session_info = get_session_copy(request_info.sessionId());
4950  auto& cat = session_info.getCatalog();
4951  auto dash = cat.getMetadataForDashboard(dashboard_id);
4952  if (!dash) {
4953  THROW_DB_EXCEPTION("Dashboard id " + std::to_string(dashboard_id) +
4954  " does not exist");
4955  } else if (session_info.get_currentUser().userId != dash->userId &&
4956  !session_info.get_currentUser().isSuper) {
4957  throw std::runtime_error(
4958  "User should be either owner of dashboard or super user to share/unshare it");
4959  }
4960  std::vector<std::string> valid_groups;
4962  for (auto& group : groups) {
4963  user_meta.isSuper = false; // initialize default flag
4964  if (!SysCatalog::instance().getGrantee(group)) {
4965  THROW_DB_EXCEPTION("User/Role " + group + " does not exist");
4966  } else if (!user_meta.isSuper) {
4967  valid_groups.push_back(group);
4968  }
4969  }
4970  return valid_groups;
4971 }
4972 
4973 void DBHandler::validateGroups(const std::vector<std::string>& groups) {
4974  for (auto const& group : groups) {
4975  if (!SysCatalog::instance().getGrantee(group)) {
4976  THROW_DB_EXCEPTION("User/Role '" + group + "' does not exist");
4977  }
4978  }
4979 }
4980 
4982  const Catalog_Namespace::SessionInfo& session_info,
4983  const std::vector<int32_t>& dashboard_ids) {
4984  auto& cat = session_info.getCatalog();
4985  std::map<std::string, std::list<int32_t>> errors;
4986  for (auto const& dashboard_id : dashboard_ids) {
4987  auto dashboard = cat.getMetadataForDashboard(dashboard_id);
4988  if (!dashboard) {
4989  errors["Dashboard id does not exist"].push_back(dashboard_id);
4990  } else if (session_info.get_currentUser().userId != dashboard->userId &&
4991  !session_info.get_currentUser().isSuper) {
4992  errors["User should be either owner of dashboard or super user to share/unshare it"]
4993  .push_back(dashboard_id);
4994  }
4995  }
4996  if (!errors.empty()) {
4997  std::stringstream error_stream;
4998  error_stream << "Share/Unshare dashboard(s) failed with error(s)\n";
4999  for (const auto& [error, id_list] : errors) {
5000  error_stream << "Dashboard ids " << join(id_list, ", ") << ": " << error << "\n";
5001  }
5002  THROW_DB_EXCEPTION(error_stream.str());
5003  }
5004 }
5005 
5006 void DBHandler::shareOrUnshareDashboards(const TSessionId& session_id,
5007  const std::vector<int32_t>& dashboard_ids,
5008  const std::vector<std::string>& groups,
5009  const TDashboardPermissions& permissions,
5010  const bool do_share) {
5011  auto stdlog = STDLOG(get_session_ptr(session_id));
5012  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5013  check_read_only(do_share ? "share_dashboards" : "unshare_dashboards");
5014  if (!permissions.create_ && !permissions.delete_ && !permissions.edit_ &&
5015  !permissions.view_) {
5016  THROW_DB_EXCEPTION("At least one privilege should be assigned for " +
5017  std::string(do_share ? "grants" : "revokes"));
5018  }
5019  auto session_ptr = stdlog.getConstSessionInfo();
5020  auto const& catalog = session_ptr->getCatalog();
5021  auto& sys_catalog = SysCatalog::instance();
5022  validateGroups(groups);
5023  validateDashboardIdsForSharing(*session_ptr, dashboard_ids);
5024  std::vector<DBObject> batch_objects;
5025  for (auto const& dashboard_id : dashboard_ids) {
5026  DBObject object(dashboard_id, DBObjectType::DashboardDBObjectType);
5027  AccessPrivileges privs;
5028  if (permissions.delete_) {
5030  }
5031  if (permissions.create_) {
5033  }
5034  if (permissions.edit_) {
5036  }
5037  if (permissions.view_) {
5039  }
5040  object.setPrivileges(privs);
5041  batch_objects.push_back(object);
5042  }
5043  if (do_share) {
5044  sys_catalog.grantDBObjectPrivilegesBatch(groups, batch_objects, catalog);
5045  } else {
5046  sys_catalog.revokeDBObjectPrivilegesBatch(groups, batch_objects, catalog);
5047  }
5048 }
5049 
5050 void DBHandler::share_dashboards(const TSessionId& session_id_or_json,
5051  const std::vector<int32_t>& dashboard_ids,
5052  const std::vector<std::string>& groups,
5053  const TDashboardPermissions& permissions) {
5054  heavyai::RequestInfo const request_info(session_id_or_json);
5055  SET_REQUEST_ID(request_info.requestId());
5057  request_info.sessionId(), dashboard_ids, groups, permissions, true);
5058 }
5059 
5060 // NOOP: Grants not available for objects as of now
5061 void DBHandler::share_dashboard(const TSessionId& session_id_or_json,
5062  const int32_t dashboard_id,
5063  const std::vector<std::string>& groups,
5064  const std::vector<std::string>& objects,
5065  const TDashboardPermissions& permissions,
5066  const bool grant_role = false) {
5067  share_dashboards(session_id_or_json, {dashboard_id}, groups, permissions);
5068 }
5069 
5070 void DBHandler::unshare_dashboards(const TSessionId& session_id_or_json,
5071  const std::vector<int32_t>& dashboard_ids,
5072  const std::vector<std::string>& groups,
5073  const TDashboardPermissions& permissions) {
5074  heavyai::RequestInfo const request_info(session_id_or_json);
5075  SET_REQUEST_ID(request_info.requestId());
5077  request_info.sessionId(), dashboard_ids, groups, permissions, false);
5078 }
5079 
5080 void DBHandler::unshare_dashboard(const TSessionId& session_id_or_json,
5081  const int32_t dashboard_id,
5082  const std::vector<std::string>& groups,
5083  const std::vector<std::string>& objects,
5084  const TDashboardPermissions& permissions) {
5085  unshare_dashboards(session_id_or_json, {dashboard_id}, groups, permissions);
5086 }
5087 
5089  std::vector<TDashboardGrantees>& dashboard_grantees,
5090  const TSessionId& session_id_or_json,
5091  const int32_t dashboard_id) {
5092  heavyai::RequestInfo const request_info(session_id_or_json);
5093  SET_REQUEST_ID(request_info.requestId());
5094  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
5095  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5096  auto session_ptr = stdlog.getConstSessionInfo();
5097  auto const& cat = session_ptr->getCatalog();
5099  auto dash = cat.getMetadataForDashboard(dashboard_id);
5100  if (!dash) {
5101  THROW_DB_EXCEPTION("Dashboard id " + std::to_string(dashboard_id) +
5102  " does not exist");
5103  } else if (session_ptr->get_currentUser().userId != dash->userId &&
5104  !session_ptr->get_currentUser().isSuper) {
5106  "User should be either owner of dashboard or super user to access grantees");
5107  }
5108  std::vector<ObjectRoleDescriptor*> objectsList;
5109  objectsList = SysCatalog::instance().getMetadataForObject(
5110  cat.getCurrentDB().dbId,
5111  static_cast<int>(DBObjectType::DashboardDBObjectType),
5112  dashboard_id); // By default objecttypecan be only dashabaords
5113  user_meta.userId = -1;
5114  user_meta.userName = "";
5115  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
5116  for (auto object : objectsList) {
5117  if (user_meta.userName == object->roleName) {
5118  // Mask owner
5119  continue;
5120  }
5121  TDashboardGrantees grantee;
5122  TDashboardPermissions perm;
5123  grantee.name = object->roleName;
5124  grantee.is_user = object->roleType;
5125  perm.create_ = object->privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD);
5126  perm.delete_ = object->privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD);
5127  perm.edit_ = object->privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD);
5128  perm.view_ = object->privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD);
5129  grantee.permissions = perm;
5130  dashboard_grantees.push_back(grantee);
5131  }
5132 }
5133 
5134 void DBHandler::create_link(std::string& _return,
5135  const TSessionId& session_id_or_json,
5136  const std::string& view_state,
5137  const std::string& view_metadata) {
5138  heavyai::RequestInfo const request_info(session_id_or_json);
5139  SET_REQUEST_ID(request_info.requestId());
5140  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
5141  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5142  auto session_ptr = stdlog.getConstSessionInfo();
5143  // check_read_only("create_link");
5144  auto& cat = session_ptr->getCatalog();
5145 
5146  LinkDescriptor ld;
5147  ld.userId = session_ptr->get_currentUser().userId;
5148  ld.viewState = view_state;
5149  ld.viewMetadata = view_metadata;
5150 
5151  try {
5152  _return = cat.createLink(ld, 6);
5153  } catch (const std::exception& e) {
5154  THROW_DB_EXCEPTION(e.what());
5155  }
5156 }
5157 
5159  const std::string& name,
5160  const bool is_array) {
5161  TColumnType ct;
5162  ct.col_name = name;
5163  ct.col_type.type = type;
5164  ct.col_type.is_array = is_array;
5165  return ct;
5166 }
5167 
5168 void DBHandler::check_geospatial_files(const boost::filesystem::path file_path,
5169  const import_export::CopyParams& copy_params) {
5170  const std::list<std::string> shp_ext{".shp", ".shx", ".dbf"};
5171  if (std::find(shp_ext.begin(),
5172  shp_ext.end(),
5173  boost::algorithm::to_lower_copy(file_path.extension().string())) !=
5174  shp_ext.end()) {
5175  for (auto ext : shp_ext) {
5176  auto aux_file = file_path;
5178  aux_file.replace_extension(boost::algorithm::to_upper_copy(ext)).string(),
5179  copy_params) &&
5181  aux_file.replace_extension(ext).string(), copy_params)) {
5182  throw std::runtime_error("required file for shapefile does not exist: " +
5183  aux_file.filename().string());
5184  }
5185  }
5186  }
5187 }
5188 
5189 void DBHandler::create_table(const TSessionId& session_id_or_json,
5190  const std::string& table_name,
5191  const TRowDescriptor& rd,
5192  const TCreateParams& create_params) {
5193  heavyai::RequestInfo request_info(session_id_or_json);
5194  SET_REQUEST_ID(request_info.requestId());
5195  auto stdlog = STDLOG("table_name", table_name);
5196  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5197  check_read_only("create_table");
5198 
5199  if (ImportHelpers::is_reserved_name(table_name)) {
5200  THROW_DB_EXCEPTION("Invalid table name (reserved keyword): " + table_name);
5201  } else if (table_name != ImportHelpers::sanitize_name(table_name)) {
5202  THROW_DB_EXCEPTION("Invalid characters in table name: " + table_name);
5203  }
5204 
5205  auto rds = rd;
5206 
5207  std::string stmt{"CREATE TABLE " + table_name};
5208  std::vector<std::string> col_stmts;
5209 
5210  for (auto col : rds) {
5211  if (ImportHelpers::is_reserved_name(col.col_name)) {
5212  THROW_DB_EXCEPTION("Invalid column name (reserved keyword): " + col.col_name);
5213  } else if (col.col_name != ImportHelpers::sanitize_name(col.col_name)) {
5214  THROW_DB_EXCEPTION("Invalid characters in column name: " + col.col_name);
5215  }
5216  if (col.col_type.type == TDatumType::INTERVAL_DAY_TIME ||
5217  col.col_type.type == TDatumType::INTERVAL_YEAR_MONTH) {
5218  THROW_DB_EXCEPTION("Unsupported type: " + thrift_to_name(col.col_type) +
5219  " for column: " + col.col_name);
5220  }
5221 
5222  if (col.col_type.type == TDatumType::DECIMAL) {
5223  // if no precision or scale passed in set to default 14,7
5224  if (col.col_type.precision == 0 && col.col_type.scale == 0) {
5225  col.col_type.precision = 14;
5226  col.col_type.scale = 7;
5227  }
5228  }
5229 
5230  std::string col_stmt;
5231  col_stmt.append(col.col_name + " " + thrift_to_name(col.col_type));
5232  if (col.__isset.default_value) {
5233  col_stmt.append(" DEFAULT " + col.default_value);
5234  }
5235 
5236  // As of 2016-06-27 the Immerse v1 frontend does not explicitly set the
5237  // `nullable` argument, leading this to default to false. Uncomment for v2.
5238  // if (!col.col_type.nullable) col_stmt.append(" NOT NULL");
5239 
5240  if (thrift_to_encoding(col.col_type.encoding) != kENCODING_NONE) {
5241  col_stmt.append(" ENCODING " + thrift_to_encoding_name(col.col_type));
5242  if (thrift_to_encoding(col.col_type.encoding) == kENCODING_DICT ||
5243  thrift_to_encoding(col.col_type.encoding) == kENCODING_FIXED ||
5244  thrift_to_encoding(col.col_type.encoding) == kENCODING_GEOINT ||
5245  thrift_to_encoding(col.col_type.encoding) == kENCODING_DATE_IN_DAYS) {
5246  col_stmt.append("(" + std::to_string(col.col_type.comp_param) + ")");
5247  }
5248  } else if (col.col_type.type == TDatumType::STR) {
5249  // non DICT encoded strings
5250  col_stmt.append(" ENCODING NONE");
5251  } else if (col.col_type.type == TDatumType::POINT ||
5252  col.col_type.type == TDatumType::MULTIPOINT ||
5253  col.col_type.type == TDatumType::LINESTRING ||
5254  col.col_type.type == TDatumType::MULTILINESTRING ||
5255  col.col_type.type == TDatumType::POLYGON ||
5256  col.col_type.type == TDatumType::MULTIPOLYGON) {
5257  // non encoded compressable geo
5258  if (col.col_type.scale == 4326) {
5259  col_stmt.append(" ENCODING NONE");
5260  }
5261  }
5262  col_stmts.push_back(col_stmt);
5263  }
5264 
5265  stmt.append(" (" + boost::algorithm::join(col_stmts, ", ") + ")");
5266 
5267  if (create_params.is_replicated) {
5268  stmt.append(" WITH (PARTITIONS = 'REPLICATED')");
5269  }
5270 
5271  stmt.append(";");
5272 
5273  TQueryResult ret;
5274  request_info.setRequestId(logger::request_id());
5275  sql_execute(ret, request_info.json(), stmt, true, "", -1, -1);
5276 }
5277 
5278 void DBHandler::import_table(const TSessionId& session_id_or_json,
5279  const std::string& table_name,
5280  const std::string& file_name_in,
5281  const TCopyParams& cp) {
5282  try {
5283  heavyai::RequestInfo const request_info(session_id_or_json);
5284  SET_REQUEST_ID(request_info.requestId());
5285  auto stdlog =
5286  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
5287  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5288  auto session_ptr = stdlog.getConstSessionInfo();
5289  check_read_only("import_table");
5290  LOG(INFO) << "import_table " << table_name << " from " << file_name_in;
5291 
5292  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5293  auto& cat = session_ptr->getCatalog();
5295  auto start_time = ::toString(std::chrono::system_clock::now());
5297  executor->enrollQuerySession(request_info.sessionId(),
5298  "IMPORT_TABLE",
5299  start_time,
5301  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5302  }
5303 
5304  ScopeGuard clearInterruptStatus = [executor, &request_info, &start_time] {
5305  // reset the runtime query interrupt status
5307  executor->clearQuerySessionStatus(request_info.sessionId(), start_time);
5308  }
5309  };
5310  const auto td_with_lock =
5312  cat, table_name);
5313  const auto td = td_with_lock();
5314  CHECK(td);
5315  check_table_load_privileges(*session_ptr, table_name);
5316 
5317  std::string copy_from_source;
5319  if (copy_params.source_type == import_export::SourceType::kOdbc) {
5320  copy_from_source = copy_params.sql_select;
5321  } else {
5322  std::string file_name{file_name_in};
5323  auto file_path = boost::filesystem::path(file_name);
5324  if (!boost::istarts_with(file_name, "s3://")) {
5325  if (!boost::filesystem::path(file_name).is_absolute()) {
5326  file_path = import_path_ /
5327  picosha2::hash256_hex_string(request_info.sessionId()) /
5328  boost::filesystem::path(file_name).filename();
5329  file_name = file_path.string();
5330  }
5331  if (!shared::file_or_glob_path_exists(file_path.string())) {
5332  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
5333  "\" does not exist.");
5334  }
5335  }
5337 
5338  // TODO(andrew): add delimiter detection to Importer
5339  if (copy_params.delimiter == '\0') {
5340  copy_params.delimiter = ',';
5341  if (boost::filesystem::path(file_path).extension() == ".tsv") {
5342  copy_params.delimiter = '\t';
5343  }
5344  }
5345  copy_from_source = file_path.string();
5346  }
5347  auto const load_tag = get_import_tag("import_table", table_name, copy_from_source);
5348  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
5349  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
5350  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
5351  };
5352  const auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
5353  session_ptr->getCatalog(), table_name);
5354  std::unique_ptr<import_export::AbstractImporter> importer;
5355  importer = import_export::create_importer(cat, td, copy_from_source, copy_params);
5356  auto ms = measure<>::execution([&]() { importer->import(session_ptr.get()); });
5357  LOG(INFO) << "Total Import Time: " << (double)ms / 1000.0 << " Seconds.";
5358  } catch (const TDBException& e) {
5359  throw;
5360  } catch (const std::exception& e) {
5361  THROW_DB_EXCEPTION(std::string(e.what()));
5362  }
5363 }
5364 
5365 namespace {
5366 
5367 // helper functions for error checking below
5368 // these would usefully be added as methods of TDatumType
5369 // but that's not possible as it's auto-generated by Thrift
5370 
5372  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5373  t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5374  t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5375 }
5376 
5378  std::stringstream ss;
5379  ss << t;
5380  return ss.str();
5381 }
5382 
5383 std::string get_mismatch_attr_warning_text(const std::string& table_name,
5384  const std::string& file_path,
5385  const std::string& column_name,
5386  const std::string& attr,
5387  const std::string& got,
5388  const std::string& expected) {
5389  return "Issue encountered in geo/raster file '" + file_path +
5390  "' while appending to table '" + table_name + "'. Column '" + column_name +
5391  "' " + attr + " mismatch (got '" + got + "', expected '" + expected + "')";
5392 }
5393 
5394 } // namespace
5395 
5396 #define THROW_COLUMN_ATTR_MISMATCH_EXCEPTION(attr, got, expected) \
5397  THROW_DB_EXCEPTION("Could not append geo/raster file '" + \
5398  file_path.filename().string() + "' to table '" + table_name + \
5399  "'. Column '" + cd->columnName + "' " + attr + " mismatch (got '" + \
5400  got + "', expected '" + expected + "')");
5401 
5402 void DBHandler::import_geo_table(const TSessionId& session_id_or_json,
5403  const std::string& table_name,
5404  const std::string& file_name,
5405  const TCopyParams& cp,
5406  const TRowDescriptor& row_desc,
5407  const TCreateParams& create_params) {
5408  // this is the direct Thrift endpoint
5409  // it does NOT support the separate FSI regex/filter/sort options
5410  // but it DOES support basic globbing specified in the filename itself
5411  heavyai::RequestInfo const request_info(session_id_or_json);
5412  SET_REQUEST_ID(request_info.requestId());
5413  importGeoTableGlobFilterSort(request_info.sessionId(),
5414  table_name,
5415  file_name,
5417  row_desc,
5418  create_params);
5419 }
5420 
5421 void DBHandler::importGeoTableGlobFilterSort(const TSessionId& session_id,
5422  const std::string& table_name,
5423  const std::string& file_name,
5424  const import_export::CopyParams& copy_params,
5425  const TRowDescriptor& row_desc,
5426  const TCreateParams& create_params) {
5427  // this is called by the above direct Thrift endpoint
5428  // and also for a deferred COPY FROM for geo/raster
5429  // it DOES support the full FSI regex/filter/sort options
5430  std::vector<std::string> file_names;
5431  try {
5432  const shared::FilePathOptions options{copy_params.regex_path_filter,
5433  copy_params.file_sort_order_by,
5434  copy_params.file_sort_regex};
5436  file_names = shared::local_glob_filter_sort_files(file_name, options, false);
5437  } catch (const shared::FileNotFoundException& e) {
5438  // no files match, just try the original filename, might be remote
5439  file_names.push_back(file_name);
5440  }
5441  // import whatever we found
5442  for (auto const& file_name : file_names) {
5444  session_id, table_name, file_name, copy_params, row_desc, create_params);
5445  }
5446 }
5447 
5448 void DBHandler::importGeoTableSingle(const TSessionId& session_id,
5449  const std::string& table_name,
5450  const std::string& file_name_in,
5451  const import_export::CopyParams& copy_params,
5452  const TRowDescriptor& row_desc,
5453  const TCreateParams& create_params) {
5454  auto stdlog = STDLOG(get_session_ptr(session_id), "table_name", table_name);
5455  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5456  auto session_ptr = stdlog.getConstSessionInfo();
5457  check_read_only("import_table");
5458 
5459  auto& cat = session_ptr->getCatalog();
5461  auto start_time = ::toString(std::chrono::system_clock::now());
5463  executor->enrollQuerySession(session_id,
5464  "IMPORT_GEO_TABLE",
5465  start_time,
5467  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5468  }
5469 
5470  ScopeGuard clearInterruptStatus = [executor, &session_id, &start_time] {
5471  // reset the runtime query interrupt status
5473  executor->clearQuerySessionStatus(session_id, start_time);
5474  }
5475  };
5476 
5477  std::string file_name{file_name_in};
5478 
5479  if (path_is_relative(file_name)) {
5480  // assume relative paths are relative to data_path / import / <session>
5481  auto file_path = import_path_ / picosha2::hash256_hex_string(session_id) /
5482  boost::filesystem::path(file_name).filename();
5483  file_name = file_path.string();
5484  }
5486 
5487  bool is_raster = false;
5488  if (copy_params.source_type == import_export::SourceType::kGeoFile) {
5489  if (is_a_supported_archive_file(file_name)) {
5490  // find the archive file
5491  add_vsi_network_prefix(file_name);
5492  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
5493  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
5494  }
5495  // find geo file in archive
5496  add_vsi_archive_prefix(file_name);
5497  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
5498  // prepare to load that geo file
5499  if (geo_file.size()) {
5500  file_name = file_name + std::string("/") + geo_file;
5501  }
5502  } else {
5503  // prepare to load geo file directly
5504  add_vsi_network_prefix(file_name);
5505  add_vsi_geo_prefix(file_name);
5506  }
5507  } else if (copy_params.source_type == import_export::SourceType::kRasterFile) {
5508  // prepare to load geo raster file directly
5509  add_vsi_network_prefix(file_name);
5510  add_vsi_geo_prefix(file_name);
5511  is_raster = true;
5512  } else {
5513  THROW_DB_EXCEPTION("import_geo_table called with file_type other than GEO or RASTER");
5514  }
5515 
5516  // log what we're about to try to do
5517  VLOG(1) << "import_geo_table: Original filename: " << file_name_in;
5518  VLOG(1) << "import_geo_table: Actual filename: " << file_name;
5519  VLOG(1) << "import_geo_table: Raster: " << is_raster;
5520  auto const load_tag = get_import_tag("import_geo_table", table_name, file_name);
5521  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
5522  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
5523  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
5524  };
5525  // use GDAL to check the primary file exists (even if on S3 and/or in archive)
5526  auto file_path = boost::filesystem::path(file_name);
5527  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
5528  THROW_DB_EXCEPTION("File does not exist: " + file_path.filename().string());
5529  }
5530 
5531  // use GDAL to check any dependent files exist (ditto)
5532  try {
5533  check_geospatial_files(file_path, copy_params);
5534  } catch (const std::exception& e) {
5535  THROW_DB_EXCEPTION("import_geo_table error: " + std::string(e.what()));
5536  }
5537 
5538  // get layer info and deconstruct
5539  // in general, we will get a combination of layers of these four types:
5540  // EMPTY: no rows, report and skip
5541  // GEO: create a geo table from this
5542  // NON_GEO: create a regular table from this
5543  // UNSUPPORTED_GEO: report and skip
5544  std::vector<import_export::Importer::GeoFileLayerInfo> layer_info;
5545  if (!is_raster) {
5546  try {
5547  layer_info =
5548  import_export::Importer::gdalGetLayersInGeoFile(file_name, copy_params);
5549  } catch (const std::exception& e) {
5550  THROW_DB_EXCEPTION("import_geo_table error: " + std::string(e.what()));
5551  }
5552  }
5553 
5554  // categorize the results
5555  using LayerNameToContentsMap =
5556  std::map<std::string, import_export::Importer::GeoFileLayerContents>;
5557  LayerNameToContentsMap load_layers;
5558  LOG_IF(INFO, layer_info.size() > 0)
5559  << "import_geo_table: Found the following layers in the geo file:";
5560  for (const auto& layer : layer_info) {
5561  switch (layer.contents) {
5563  LOG(INFO) << "import_geo_table: '" << layer.name
5564  << "' (will import as geo table)";
5565  load_layers[layer.name] = layer.contents;
5566  break;
5568  LOG(INFO) << "import_geo_table: '" << layer.name
5569  << "' (will import as regular table)";
5570  load_layers[layer.name] = layer.contents;
5571  break;
5573  LOG(WARNING) << "import_geo_table: '" << layer.name
5574  << "' (will not import, unsupported geo type)";
5575  break;
5577  LOG(INFO) << "import_geo_table: '" << layer.name << "' (ignoring, empty)";
5578  break;
5579  default:
5580  break;
5581  }
5582  }
5583 
5584  // if nothing is loadable, stop now
5585  if (!is_raster && load_layers.size() == 0) {
5586  THROW_DB_EXCEPTION("import_geo_table: No loadable layers found, aborting!");
5587  }
5588 
5589  // if we've been given an explicit layer name, check that it exists and is loadable
5590  // scan the original list, as it may exist but not have been gathered as loadable
5591  if (!is_raster && copy_params.geo_layer_name.size()) {
5592  bool found = false;
5593  for (const auto& layer : layer_info) {
5594  if (copy_params.geo_layer_name == layer.name) {
5597  // forget all the other layers and just load this one
5598  load_layers.clear();
5599  load_layers[layer.name] = layer.contents;
5600  found = true;
5601  break;
5602  } else if (layer.contents ==
5604  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5605  copy_params.geo_layer_name + "' has unsupported geo type!");
5606  } else if (layer.contents ==
5608  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5609  copy_params.geo_layer_name + "' is empty!");
5610  }
5611  }
5612  }
5613  if (!found) {
5614  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5615  copy_params.geo_layer_name + "' not found!");
5616  }
5617  }
5618 
5619  // Immerse import of multiple layers is not yet supported
5620  // @TODO fix this!
5621  if (!is_raster && row_desc.size() > 0 && load_layers.size() > 1) {
5623  "import_geo_table: Multi-layer geo import not yet supported from Immerse!");
5624  }
5625 
5626  // one definition of layer table name construction
5627  // we append the layer name if we're loading more than one table
5628  auto construct_layer_table_name = [&load_layers](const std::string& table_name,
5629  const std::string& layer_name) {
5630  if (load_layers.size() > 1) {
5631  auto sanitized_layer_name = ImportHelpers::sanitize_name(layer_name);
5632  if (sanitized_layer_name != layer_name) {
5633  LOG(INFO) << "import_geo_table: Using sanitized layer name '"
5634  << sanitized_layer_name << "' for table name";
5635  }
5636  return table_name + "_" + sanitized_layer_name;
5637  }
5638  return table_name;
5639  };
5640 
5641  // if we're importing multiple tables, then NONE of them must exist already
5642  if (!is_raster && load_layers.size() > 1) {
5643  for (const auto& layer : load_layers) {
5644  // construct table name
5645  auto this_table_name = construct_layer_table_name(table_name, layer.first);
5646 
5647  // table must not exist
5648  if (cat.getMetadataForTable(this_table_name)) {
5649  THROW_DB_EXCEPTION("import_geo_table: Table '" + this_table_name +
5650  "' already exists, aborting!");
5651  }
5652  }
5653  }
5654 
5655  // prepare to gather errors that would otherwise be exceptions, as we can only throw
5656  // one
5657  std::vector<std::string> caught_exception_messages;
5658 
5659  // prepare to time multi-layer import
5660  double total_import_ms = 0.0;
5661 
5662  // for geo raster, we make a single dummy layer
5663  // the name is irrelevant, but set it to the filename so the log makes sense
5664  if (is_raster) {
5665  CHECK_EQ(load_layers.size(), 0u);
5666  load_layers.emplace(file_name, import_export::Importer::GeoFileLayerContents::GEO);
5667  }
5668 
5669  // now we're safe to start importing
5670  // we loop over the layers we're going to attempt to load
5671  for (const auto& layer : load_layers) {
5672  // unpack
5673  const auto& layer_name = layer.first;
5674  const auto& layer_contents = layer.second;
5675  bool is_geo_layer =
5677 
5678  // construct table name again
5679  auto this_table_name = construct_layer_table_name(table_name, layer_name);
5680 
5681  // report
5682  LOG(INFO) << "import_geo_table: Creating table: " << this_table_name;
5683 
5684  // we need a row descriptor
5685  TRowDescriptor rd;
5686  if (row_desc.size() > 0) {
5687  // we have a valid RowDescriptor
5688  // this is the case where Immerse has already detected and created
5689  // all we need to do is import and trust that the data will match
5690  // use the provided row descriptor
5691  // table must already exist (we check this below)
5692  rd = row_desc;
5693  } else {
5694  // we don't have a RowDescriptor
5695  // we have to detect the file ourselves
5696  TDetectResult cds;
5697  TCopyParams cp_copy = copyparams_to_thrift(copy_params);
5698  cp_copy.geo_layer_name = layer_name;
5699  try {
5700  detect_column_types(cds, session_id, file_name_in, cp_copy);
5701  } catch (const std::exception& e) {
5702  // capture the error and abort this layer
5703  caught_exception_messages.emplace_back("Column Type Detection failed for '" +
5704  layer_name + "':" + e.what());
5705  continue;
5706  }
5707  rd = cds.row_set.row_desc;
5708 
5709  // then, if the table does NOT already exist, create it
5710  const TableDescriptor* td = cat.getMetadataForTable(this_table_name);
5711  if (!td) {
5712  try {
5713  create_table(session_id, this_table_name, rd, create_params);
5714  } catch (const std::exception& e) {
5715  // capture the error and abort this layer
5716  caught_exception_messages.emplace_back("Failed to create table for Layer '" +
5717  layer_name + "':" + e.what());
5718  continue;
5719  }
5720  }
5721  }
5722 
5723  // match locking sequence for CopyTableStmt::execute
5724  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5725 
5726  const TableDescriptor* td{nullptr};
5727  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
5728  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
5729 
5730  try {
5731  td_with_lock =
5732  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
5734  lockmgr::ReadLock>::acquireTableDescriptor(cat, this_table_name));
5735  td = (*td_with_lock)();
5736  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
5738  } catch (const std::runtime_error& e) {
5739  // capture the error and abort this layer
5740  std::string exception_message = "Could not import geo/raster file '" +
5741  file_path.filename().string() + "' to table '" +
5742  this_table_name +
5743  "'; table does not exist or failed to create.";
5744  caught_exception_messages.emplace_back(exception_message);
5745  continue;
5746  }
5747  CHECK(td);
5748 
5749  // then, we have to verify that the structure matches
5750  // get column descriptors (non-system, non-deleted, logical columns only)
5751  const auto col_descriptors =
5752  cat.getAllColumnMetadataForTable(td->tableId, false, false, false);
5753 
5754  // first, compare the column count
5755  if (col_descriptors.size() != rd.size()) {
5756  // capture the error and abort this layer
5757  std::string exception_message = "Could not append geo/raster file '" +
5758  file_path.filename().string() + "' to table '" +
5759  this_table_name + "'. Column count mismatch (got " +
5760  std::to_string(rd.size()) + ", expecting " +
5761  std::to_string(col_descriptors.size()) + ")";
5762  caught_exception_messages.emplace_back(exception_message);
5763  continue;
5764  }
5765 
5766  try {
5767  // validate column type match
5768  // also handle geo column name changes
5769  int rd_index = 0;
5770  for (auto const* cd : col_descriptors) {
5771  auto const cd_col_type = populateThriftColumnType(&cat, cd);
5772 
5773  // for types, all we care about is that the got and expected types are either both
5774  // geo or both non-geo, and if they're geo that the exact geo type matches
5775  auto const gtype = rd[rd_index].col_type.type; // importer type
5776  auto const etype = cd_col_type.col_type.type; // existing table type
5777  if (TTypeInfo_IsGeo(gtype) && TTypeInfo_IsGeo(etype)) {
5778  if (gtype != etype) {
5780  "type", TTypeInfo_TypeToString(gtype), TTypeInfo_TypeToString(etype));
5781  }
5782  } else if (TTypeInfo_IsGeo(gtype) != TTypeInfo_IsGeo(etype)) {
5784  "type", TTypeInfo_TypeToString(gtype), TTypeInfo_TypeToString(etype));
5785  }
5786 
5787  // for names, we keep the existing table geo column name (for example, to handle
5788  // the case where an existing table has a geo column with a legacy name), but all
5789  // other column names must match, otherwise the import will fail
5790  auto const gname = rd[rd_index].col_name; // importer name
5791  auto const ename = cd->columnName; // existing table name
5792  if (gname != ename) {
5793  if (TTypeInfo_IsGeo(gtype)) {
5794  LOG(INFO) << "import_geo_table: Renaming incoming geo column to match "
5795  "existing table column name '"
5796  << ename << "'";
5797  rd[rd_index].col_name = ename;
5798  } else {
5799  if (is_raster) {
5801  table_name,
5802  file_path.filename().string(),
5803  cd->columnName,
5804  "name",
5805  gname,
5806  ename);
5807  } else {
5808  THROW_COLUMN_ATTR_MISMATCH_EXCEPTION("name", gname, ename);
5809  }
5810  }
5811  }
5812  rd_index++;
5813  }
5814  } catch (const std::exception& e) {
5815  // capture the error and abort this layer
5816  caught_exception_messages.emplace_back(e.what());
5817  continue;
5818  }
5819 
5820  std::map<std::string, std::string> colname_to_src;
5821  for (auto r : rd) {
5822  colname_to_src[r.col_name] =
5823  r.src_name.length() > 0 ? r.src_name : ImportHelpers::sanitize_name(r.src_name);
5824  }
5825 
5826  try {
5827  check_table_load_privileges(*session_ptr, this_table_name);
5828  } catch (const std::exception& e) {
5829  // capture the error and abort this layer
5830  caught_exception_messages.emplace_back(e.what());
5831  continue;
5832  }
5833 
5834  if (!is_raster && is_geo_layer) {
5835  // Final check to ensure that we have exactly one geo column
5836  // before doing the actual import, in case the user naively
5837  // overrode the types in Immerse Preview (which as of 6/17/21
5838  // it still allows you to do). We should make Immerse more
5839  // robust and disallow re-typing of columns to/from geo types
5840  // completely. Currently, if multiple columns are re-typed
5841  // such that there is still exactly one geo column (but it's
5842  // the wrong one) then this test will pass, but the import
5843  // will then reject some (or more likely all) of the rows.
5844  int num_geo_columns{0};
5845  for (auto const& col : rd) {
5846  if (TTypeInfo_IsGeo(col.col_type.type)) {
5847  num_geo_columns++;
5848  }
5849  }
5850  if (num_geo_columns != 1) {
5851  std::string exception_message =
5852  "Table '" + this_table_name +
5853  "' must have exactly one geo column. Import aborted!";
5854  caught_exception_messages.emplace_back(exception_message);
5855  continue;
5856  }
5857  }
5858 
5859  std::string layer_or_raster = is_raster ? "Raster" : "Layer";
5860 
5861  try {
5862  // import this layer only?
5863  import_export::CopyParams copy_params_copy = copy_params;
5864  copy_params_copy.geo_layer_name = layer_name;
5865 
5866  // create an importer
5867  std::unique_ptr<import_export::Importer> importer;
5868  importer.reset(
5869  new import_export::Importer(cat, td, file_path.string(), copy_params_copy));
5870 
5871  // import
5872  auto ms = measure<>::execution(
5873  [&]() { importer->importGDAL(colname_to_src, session_ptr.get(), is_raster); });
5874  LOG(INFO) << "Import of " << layer_or_raster << " '" << layer_name << "' took "
5875  << (double)ms / 1000.0 << "s";
5876  total_import_ms += ms;
5877  } catch (const std::exception& e) {
5878  std::string exception_message = "Import of " + layer_or_raster + " '" +
5879  this_table_name + "' failed: " + e.what();
5880  caught_exception_messages.emplace_back(exception_message);
5881  continue;
5882  }
5883  }
5884 
5885  // did we catch any exceptions?
5886  if (caught_exception_messages.size()) {
5887  // combine all the strings into one and throw a single Thrift exception
5888  std::string combined_exception_message = "Failed to import geo/raster file: ";
5889  bool comma{false};
5890  for (const auto& message : caught_exception_messages) {
5891  combined_exception_message += comma ? (", " + message) : message;
5892  comma = true;
5893  }
5894  THROW_DB_EXCEPTION(combined_exception_message);
5895  } else {
5896  // report success and total time
5897  LOG(INFO) << "Import Successful!";
5898  LOG(INFO) << "Total Import Time: " << total_import_ms / 1000.0 << "s";
5899  }
5900 }
5901 
5902 #undef THROW_COLUMN_ATTR_MISMATCH_EXCEPTION
5903 
5904 void DBHandler::import_table_status(TImportStatus& _return,
5905  const TSessionId& session_id_or_json,
5906  const std::string& import_id) {
5907  heavyai::RequestInfo const request_info(session_id_or_json);
5908  SET_REQUEST_ID(request_info.requestId());
5909  auto stdlog =
5910  STDLOG(get_session_ptr(request_info.sessionId()), "import_table_status", import_id);
5911  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5912  auto is = import_export::Importer::get_import_status(import_id);
5913  _return.elapsed = is.elapsed.count();
5914  _return.rows_completed = is.rows_completed;
5915  _return.rows_estimated = is.rows_estimated;
5916  _return.rows_rejected = is.rows_rejected;
5917 }
5918 
5920  const TSessionId& session_id_or_json,
5921  const std::string& archive_path_in,
5922  const TCopyParams& copy_params) {
5923  heavyai::RequestInfo const request_info(session_id_or_json);
5924  SET_REQUEST_ID(request_info.requestId());
5925  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
5926  "get_first_geo_file_in_archive",
5927  archive_path_in);
5928  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5929 
5930  std::string archive_path(archive_path_in);
5931 
5932  if (path_is_relative(archive_path)) {
5933  // assume relative paths are relative to data_path / import / <session>
5934  auto file_path = import_path_ /
5935  picosha2::hash256_hex_string(request_info.sessionId()) /
5936  boost::filesystem::path(archive_path).filename();
5937  archive_path = file_path.string();
5938  }
5939  validate_import_file_path_if_local(archive_path);
5940 
5941  if (is_a_supported_archive_file(archive_path)) {
5942  // find the archive file
5943  add_vsi_network_prefix(archive_path);
5944  if (!import_export::Importer::gdalFileExists(archive_path,
5945  thrift_to_copyparams(copy_params))) {
5946  THROW_DB_EXCEPTION("Archive does not exist: " + archive_path_in);
5947  }
5948  // find geo file in archive
5949  add_vsi_archive_prefix(archive_path);
5950  std::string geo_file =
5951  find_first_geo_file_in_archive(archive_path, thrift_to_copyparams(copy_params));
5952  // what did we get?
5953  if (geo_file.size()) {
5954  // prepend it with the original path
5955  _return = archive_path_in + std::string("/") + geo_file;
5956  } else {
5957  // just return the original path
5958  _return = archive_path_in;
5959  }
5960  } else {
5961  // just return the original path
5962  _return = archive_path_in;
5963  }
5964 }
5965 
5966 void DBHandler::get_all_files_in_archive(std::vector<std::string>& _return,
5967  const TSessionId& session_id_or_json,
5968  const std::string& archive_path_in,
5969  const TCopyParams& copy_params) {
5970  heavyai::RequestInfo const request_info(session_id_or_json);
5971  SET_REQUEST_ID(request_info.requestId());
5972  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
5973  "get_all_files_in_archive",
5974  archive_path_in);
5975  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5976 
5977  std::string archive_path(archive_path_in);
5978  if (path_is_relative(archive_path)) {
5979  // assume relative paths are relative to data_path / import / <session>
5980  auto file_path = import_path_ /
5981  picosha2::hash256_hex_string(request_info.sessionId()) /
5982  boost::filesystem::path(archive_path).filename();
5983  archive_path = file_path.string();
5984  }
5985  validate_import_file_path_if_local(archive_path);
5986 
5987  if (is_a_supported_archive_file(archive_path)) {
5988  // find the archive file
5989  add_vsi_network_prefix(archive_path);
5990  if (!import_export::Importer::gdalFileExists(archive_path,
5991  thrift_to_copyparams(copy_params))) {
5992  THROW_DB_EXCEPTION("Archive does not exist: " + archive_path_in);
5993  }
5994  // find all files in archive
5995  add_vsi_archive_prefix(archive_path);
5997  archive_path, thrift_to_copyparams(copy_params));
5998  // prepend them all with original path
5999  for (auto& s : _return) {
6000  s = archive_path_in + '/' + s;
6001  }
6002  }
6003 }
6004 
6005 void DBHandler::get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
6006  const TSessionId& session_id_or_json,
6007  const std::string& file_name_in,
6008  const TCopyParams& cp) {
6009  heavyai::RequestInfo const request_info(session_id_or_json);
6010  SET_REQUEST_ID(request_info.requestId());
6011  auto stdlog = STDLOG(
6012  get_session_ptr(request_info.sessionId()), "get_layers_in_geo_file", file_name_in);
6013  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
6014 
6015  std::string file_name(file_name_in);
6016 
6018 
6019  // handle relative paths
6020  if (path_is_relative(file_name)) {
6021  // assume relative paths are relative to data_path / import / <session>
6022  auto file_path = import_path_ /
6023  picosha2::hash256_hex_string(request_info.sessionId()) /
6024  boost::filesystem::path(file_name).filename();
6025  file_name = file_path.string();
6026  }
6028 
6029  // archive or file?
6030  if (is_a_supported_archive_file(file_name)) {
6031  // find the archive file
6032  add_vsi_network_prefix(file_name);
6033  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
6034  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
6035  }
6036  // find geo file in archive
6037  add_vsi_archive_prefix(file_name);
6038  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
6039  // prepare to load that geo file
6040  if (geo_file.size()) {
6041  file_name = file_name + std::string("/") + geo_file;
6042  }
6043  } else {
6044  // prepare to load geo file directly
6045  add_vsi_network_prefix(file_name);
6046  add_vsi_geo_prefix(file_name);
6047  }
6048 
6049  // check the file actually exists
6050  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
6051  THROW_DB_EXCEPTION("Geo file/archive does not exist: " + file_name_in);
6052  }
6053 
6054  // find all layers
6055  auto internal_layer_info =
6056  import_export::Importer::gdalGetLayersInGeoFile(file_name, copy_params);
6057 
6058  // convert to Thrift type
6059  for (const auto& internal_layer : internal_layer_info) {
6060  TGeoFileLayerInfo layer;
6061  layer.name = internal_layer.name;
6062  switch (internal_layer.contents) {
6064  layer.contents = TGeoFileLayerContents::EMPTY;
6065  break;
6067  layer.contents = TGeoFileLayerContents::GEO;
6068  break;
6070  layer.contents = TGeoFileLayerContents::NON_GEO;
6071  break;
6073  layer.contents = TGeoFileLayerContents::UNSUPPORTED_GEO;
6074  break;
6075  default:
6076  CHECK(false);
6077  }
6078  _return.emplace_back(layer); // no suitable constructor to just pass parameters
6079  }
6080 }
6081 
6082 void DBHandler::start_heap_profile(const TSessionId& session_id_or_json) {
6083  heavyai::RequestInfo const request_info(session_id_or_json);
6084  SET_REQUEST_ID(request_info.requestId());
6085  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6086 #ifdef HAVE_PROFILER
6087  if (IsHeapProfilerRunning()) {
6088  THROW_DB_EXCEPTION("Profiler already started");
6089  }
6090  HeapProfilerStart("omnisci");
6091 #else
6092  THROW_DB_EXCEPTION("Profiler not enabled");
6093 #endif // HAVE_PROFILER
6094 }
6095 
6096 void DBHandler::stop_heap_profile(const TSessionId& session_id_or_json) {
6097  heavyai::RequestInfo const request_info(session_id_or_json);
6098  SET_REQUEST_ID(request_info.requestId());
6099  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6100 #ifdef HAVE_PROFILER
6101  if (!IsHeapProfilerRunning()) {
6102  THROW_DB_EXCEPTION("Profiler not running");
6103  }
6104  HeapProfilerStop();
6105 #else
6106  THROW_DB_EXCEPTION("Profiler not enabled");
6107 #endif // HAVE_PROFILER
6108 }
6109 
6111  TSessionId const& session_id) const {
6113  auto const itr = calcite_sessions_.find(session_id);
6114  return itr == calcite_sessions_.end() ? nullptr : itr->second;
6115 }
6116 
6117 void DBHandler::get_heap_profile(std::string& profile,
6118  const TSessionId& session_id_or_json) {
6119  heavyai::RequestInfo const request_info(session_id_or_json);
6120  SET_REQUEST_ID(request_info.requestId());
6121  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6122 #ifdef HAVE_PROFILER
6123  if (!IsHeapProfilerRunning()) {
6124  THROW_DB_EXCEPTION("Profiler not running");
6125  }
6126  auto profile_buff = GetHeapProfile();
6127  profile = profile_buff;
6128  free(profile_buff);
6129 #else
6130  THROW_DB_EXCEPTION("Profiler not enabled");
6131 #endif // HAVE_PROFILER
6132 }
6133 
6135  if (session_id.length() == Catalog_Namespace::CALCITE_SESSION_ID_LENGTH) {
6137  if (auto it = calcite_sessions_.find(session_id); it != calcite_sessions_.end()) {
6138  return *it->second;
6139  }
6140  throw std::runtime_error("No session with id " + session_id);
6141  }
6142  return sessions_store_->getSessionCopy(session_id);
6143 }
6144 
6145 std::shared_ptr<Catalog_Namespace::SessionInfo> DBHandler::get_session_ptr(
6146  const TSessionId& session_id) {
6147  // Note(Wamsi): This method will give you a shared_ptr to master SessionInfo itself.
6148  // Should be used only when you need to make updates to original SessionInfo object.
6149  // Currently used by `update_session_last_used_duration`
6150 
6151  // 1) `session_id` will be empty during intial connect. 2)`sessionmapd iterator` will
6152  // be invalid during disconnect. SessionInfo will be erased from map by the time it
6153  // reaches here. In both the above cases, we would return `nullptr` and can skip
6154  // SessionInfo updates.
6155  if (session_id.empty()) {
6156  return nullptr;
6157  }
6158  auto ptr = session_id.length() == Catalog_Namespace::CALCITE_SESSION_ID_LENGTH
6159  ? findCalciteSession(session_id)
6160  : sessions_store_->get(session_id);
6161  if (!ptr) {
6162  THROW_DB_EXCEPTION("Session not valid or expired.");
6163  }
6164  return ptr;
6165 }
6166 
6168  const Catalog_Namespace::SessionInfo& session_info,
6169  const std::string& table_name) {
6170  auto user_metadata = session_info.get_currentUser();
6171  auto& cat = session_info.getCatalog();
6172  DBObject dbObject(table_name, TableDBObjectType);
6173  dbObject.loadKey(cat);
6175  std::vector<DBObject> privObjects;
6176  privObjects.push_back(dbObject);
6177  if (!SysCatalog::instance().checkPrivileges(user_metadata, privObjects)) {
6178  THROW_DB_EXCEPTION("Violation of access privileges: user " +
6179  user_metadata.userLoggable() +
6180  " has no insert privileges for table " + table_name + ".");
6181  }
6182 }
6183 
6185  const TExecuteMode::type mode) {
6186  const std::string user_name = session_ptr->get_currentUser().userLoggable();
6187  switch (mode) {
6188  case TExecuteMode::GPU:
6189  if (cpu_mode_only_) {
6190  TDBException e;
6191  e.error_msg = "Cannot switch to GPU mode in a server started in CPU-only mode.";
6192  throw e;
6193  }
6195  LOG(INFO) << "User " << user_name << " sets GPU mode.";
6196  break;
6197  case TExecuteMode::CPU:
6199  LOG(INFO) << "User " << user_name << " sets CPU mode.";
6200  break;
6201  }
6202 }
6203 
6204 std::vector<PushedDownFilterInfo> DBHandler::execute_rel_alg(
6205  ExecutionResult& _return,
6206  QueryStateProxy query_state_proxy,
6207  const std::string& query_ra,
6208  const bool column_format,
6209  const ExecutorDeviceType executor_device_type,
6210  const int32_t first_n,
6211  const int32_t at_most_n,
6212  const bool just_validate,
6213  const bool find_push_down_candidates,
6214  const ExplainInfo& explain_info,
6215  const std::optional<size_t> executor_index) const {
6216  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6217  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
6218  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
6219  auto executor = Executor::getExecutor(
6220  executor_index ? *executor_index : Executor::UNITARY_EXECUTOR_ID,
6221  jit_debug_ ? "/tmp" : "",
6222  jit_debug_ ? "mapdquery" : "",
6224  RelAlgExecutor ra_executor(
6225  executor.get(), query_ra, query_state_proxy->shared_from_this());
6226  CompilationOptions co = {executor_device_type,
6227  /*hoist_literals=*/true,
6230  /*allow_lazy_fetch=*/true,
6231  /*filter_on_deleted_column=*/true,
6232  explain_info.isOptimizedExplain()
6236  auto validate_or_explain_query =
6237  explain_info.isJustExplain() || explain_info.isCalciteExplain() || just_validate;
6238  ExecutionOptions eo = {
6240  false,
6242  explain_info.isJustExplain(),
6243  allow_loop_joins_ || just_validate,
6245  jit_debug_,
6246  just_validate,
6249  find_push_down_candidates,
6250  explain_info.isCalciteExplain(),
6252  g_enable_runtime_query_interrupt && !validate_or_explain_query &&
6253  !query_state_proxy->getConstSessionInfo()->get_session_id().empty(),
6257  auto execution_time_ms =
6258  _return.getExecutionTime() + measure<>::execution([&]() {
6259  _return = ra_executor.executeRelAlgQuery(
6260  co, eo, explain_info.isPlanExplain(), explain_info.isVerbose(), nullptr);
6261  });
6262  // reduce execution time by the time spent during queue waiting
6263  const auto rs = _return.getRows();
6264  if (rs) {
6265  execution_time_ms -= rs->getQueueTime();
6266  }
6267  _return.setExecutionTime(execution_time_ms);
6268  const auto& filter_push_down_info = _return.getPushedDownFilterInfo();
6269  if (!filter_push_down_info.empty()) {
6270  return filter_push_down_info;
6271  }
6272  if (explain_info.isJustExplain()) {
6274  } else if (!explain_info.isCalciteExplain()) {
6276  }
6277  return {};
6278 }
6279 
6280 std::vector<TargetMetaInfo> DBHandler::getTargetMetaInfo(
6281  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const {
6282  std::vector<TargetMetaInfo> result;
6283  for (const auto& target : targets) {
6284  CHECK(target);
6285  CHECK(target->get_expr());
6286  result.emplace_back(target->get_resname(), target->get_expr()->get_type_info());
6287  }
6288  return result;
6289 }
6290 
6291 std::vector<std::string> DBHandler::getTargetNames(
6292  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const {
6293  std::vector<std::string> names;
6294  for (const auto& target : targets) {
6295  CHECK(target);
6296  CHECK(target->get_expr());
6297  names.push_back(target->get_resname());
6298  }
6299  return names;
6300 }
6301 
6302 std::vector<std::string> DBHandler::getTargetNames(
6303  const std::vector<TargetMetaInfo>& targets) const {
6304  std::vector<std::string> names;
6305  for (const auto& target : targets) {
6306  names.push_back(target.get_resname());
6307  }
6308  return names;
6309 }
6310 
6311 void DBHandler::convertRows(TQueryResult& _return,
6312  QueryStateProxy query_state_proxy,
6313  const std::vector<TargetMetaInfo>& targets,
6314  const ResultSet& results,
6315  const bool column_format,
6316  const int32_t first_n,
6317  const int32_t at_most_n) {
6318  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6319  _return.row_set.row_desc = ThriftSerializers::target_meta_infos_to_thrift(targets);
6320  int32_t fetched{0};
6321  if (column_format) {
6322  _return.row_set.is_columnar = true;
6323  std::vector<TColumn> tcolumns(results.colCount());
6324  while (first_n == -1 || fetched < first_n) {
6325  const auto crt_row = results.getNextRow(true, true);
6326  if (crt_row.empty()) {
6327  break;
6328  }
6329  ++fetched;
6330  if (at_most_n >= 0 && fetched > at_most_n) {
6331  THROW_DB_EXCEPTION("The result contains more rows than the specified cap of " +
6332  std::to_string(at_most_n));
6333  }
6334  for (size_t i = 0; i < results.colCount(); ++i) {
6335  const auto agg_result = crt_row[i];
6336  value_to_thrift_column(agg_result, targets[i].get_type_info(), tcolumns[i]);
6337  }
6338  }
6339  for (size_t i = 0; i < results.colCount(); ++i) {
6340  _return.row_set.columns.push_back(tcolumns[i]);
6341  }
6342  } else {
6343  _return.row_set.is_columnar = false;
6344  while (first_n == -1 || fetched < first_n) {
6345  const auto crt_row = results.getNextRow(true, true);
6346  if (crt_row.empty()) {
6347  break;
6348  }
6349  ++fetched;
6350  if (at_most_n >= 0 && fetched > at_most_n) {
6351  THROW_DB_EXCEPTION("The result contains more rows than the specified cap of " +
6352  std::to_string(at_most_n));
6353  }
6354  TRow trow;
6355  trow.cols.reserve(results.colCount());
6356  for (size_t i = 0; i < results.colCount(); ++i) {
6357  const auto agg_result = crt_row[i];
6358  trow.cols.push_back(value_to_thrift(agg_result, targets[i].get_type_info()));
6359  }
6360  _return.row_set.rows.push_back(trow);
6361  }
6362  }
6363 }
6364 
6365 // create simple result set to return a single column result
6366 void DBHandler::createSimpleResult(TQueryResult& _return,
6367  const ResultSet& results,
6368  const bool column_format,
6369  const std::string label) {
6370  CHECK_EQ(size_t(1), results.rowCount());
6371  TColumnType proj_info;
6372  proj_info.col_name = label;
6373  proj_info.col_type.type = TDatumType::STR;
6374  proj_info.col_type.nullable = false;
6375  proj_info.col_type.is_array = false;
6376  _return.row_set.row_desc.push_back(proj_info);
6377  const auto crt_row = results.getNextRow(true, true);
6378  const auto tv = crt_row[0];
6379  CHECK(results.getNextRow(true, true).empty());
6380  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
6381  CHECK(scalar_tv);
6382  const auto s_n = boost::get<NullableString>(scalar_tv);
6383  CHECK(s_n);
6384  const auto s = boost::get<std::string>(s_n);
6385  CHECK(s);
6386  if (column_format) {
6387  TColumn tcol;
6388  tcol.data.str_col.push_back(*s);
6389  tcol.nulls.push_back(false);
6390  _return.row_set.is_columnar = true;
6391  _return.row_set.columns.push_back(tcol);
6392  } else {
6393  TDatum explanation;
6394  explanation.val.str_val = *s;
6395  explanation.is_null = false;
6396  TRow trow;
6397  trow.cols.push_back(explanation);
6398  _return.row_set.is_columnar = false;
6399  _return.row_set.rows.push_back(trow);
6400  }
6401 }
6402 
6403 void DBHandler::convertExplain(TQueryResult& _return,
6404  const ResultSet& results,
6405  const bool column_format) {
6406  createSimpleResult(_return, results, column_format, "Explanation");
6407 }
6408 
6409 void DBHandler::convertResult(TQueryResult& _return,
6410  const ResultSet& results,
6411  const bool column_format) {
6412  createSimpleResult(_return, results, column_format, "Result");
6413 }
6414 
6415 // this all should be moved out of here to catalog
6417  const TableDescriptor* td,
6418  const AccessPrivileges access_priv) {
6419  CHECK(td);
6420  auto& cat = session_info.getCatalog();
6421  std::vector<DBObject> privObjects;
6422  DBObject dbObject(td->tableName, TableDBObjectType);
6423  dbObject.loadKey(cat);
6424  dbObject.setPrivileges(access_priv);
6425  privObjects.push_back(dbObject);
6426  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
6427  privObjects);
6428 }
6429 
6430 // TODO(max): usage of it was accidentally lost. Need to restore this check
6432  if (const auto drop_db_stmt = dynamic_cast<Parser::DropDBStmt*>(ddl)) {
6433  sessions_store_->eraseByDB(*drop_db_stmt->getDatabaseName());
6434  } else if (const auto rename_db_stmt = dynamic_cast<Parser::RenameDBStmt*>(ddl)) {
6435  sessions_store_->eraseByDB(*rename_db_stmt->getPreviousDatabaseName());
6436  } else if (const auto drop_user_stmt = dynamic_cast<Parser::DropUserStmt*>(ddl)) {
6437  sessions_store_->eraseByUser(*drop_user_stmt->getUserName());
6438  } else if (const auto rename_user_stmt = dynamic_cast<Parser::RenameUserStmt*>(ddl)) {
6439  sessions_store_->eraseByUser(*rename_user_stmt->getOldUserName());
6440  }
6441 }
6442 
6444  QueryStateProxy query_state_proxy,
6445  const bool column_format,
6446  const ExecutorDeviceType executor_device_type,
6447  const int32_t first_n,
6448  const int32_t at_most_n,
6449  const bool use_calcite,
6451  if (leaf_handler_) {
6452  leaf_handler_->flush_queue();
6453  }
6454  auto const query_str = strip(query_state_proxy->getQueryStr());
6455  auto session_ptr = query_state_proxy->getConstSessionInfo();
6456  // Call to DistributedValidate() below may change cat.
6457  auto& cat = session_ptr->getCatalog();
6458  legacylockmgr::ExecutorWriteLock execute_write_lock;
6459  legacylockmgr::ExecutorReadLock execute_read_lock;
6460 
6461  ParserWrapper pw{query_str};
6462  auto [query_substr, post_fix] = ::substring(query_str, g_max_log_length);
6463  std::ostringstream oss;
6464  oss << query_substr << post_fix;
6465  auto const reduced_query_str = oss.str();
6466  log_system_cpu_memory_status("Start query execution: " + reduced_query_str, cat);
6467  ScopeGuard cpu_system_memory_logging = [&cat, &reduced_query_str]() {
6468  log_system_cpu_memory_status("Finish query execution: " + reduced_query_str, cat);
6469  };
6470 
6471  // test to see if db/catalog is writable before execution of a writable SQL/DDL command
6472  // TODO: move to execute() (?)
6473  // instead of pre-filtering here based upon incomplete info ?
6474  if (!pw.is_refresh && pw.getQueryType() != ParserWrapper::QueryType::Read &&
6475  pw.getQueryType() != ParserWrapper::QueryType::SchemaRead &&
6476  pw.getQueryType() != ParserWrapper::QueryType::Unknown) {
6478  }
6479 
6480  if (pw.is_itas) {
6481  // itas can attempt to execute here
6482  check_read_only("insert_into_table");
6483 
6484  std::string query_ra;
6485  _return.addExecutionTime(measure<>::execution([&]() {
6486  TPlanResult result;
6487  std::tie(result, locks) =
6488  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6489  query_ra = result.plan_result;
6490  }));
6491  rapidjson::Document ddl_query;
6492  ddl_query.Parse(query_ra);
6493  CHECK(ddl_query.HasMember("payload"));
6494  CHECK(ddl_query["payload"].IsObject());
6495  auto stmt = Parser::InsertIntoTableAsSelectStmt(ddl_query["payload"].GetObject());
6496  _return.addExecutionTime(
6497  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6498  return;
6499 
6500  } else if (pw.is_ctas) {
6501  // ctas can attempt to execute here
6502  check_read_only("create_table_as");
6503 
6504  std::string query_ra;
6505  _return.addExecutionTime(measure<>::execution([&]() {
6506  TPlanResult result;
6507  std::tie(result, locks) =
6508  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6509  query_ra = result.plan_result;
6510  }));
6511  if (query_ra.size()) {
6512  rapidjson::Document ddl_query;
6513  ddl_query.Parse(query_ra);
6514  CHECK(ddl_query.HasMember("payload"));
6515  CHECK(ddl_query["payload"].IsObject());
6516  auto stmt = Parser::CreateTableAsSelectStmt(ddl_query["payload"].GetObject());
6517  _return.addExecutionTime(
6518  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6519  }
6520  return;
6521 
6522  } else if (pw.getDMLType() == ParserWrapper::DMLType::Insert) {
6523  check_read_only("insert_into_table");
6524  std::string query_ra;
6525  _return.addExecutionTime(measure<>::execution([&]() {
6526  TPlanResult result;
6527  std::tie(result, locks) =
6528  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6529  query_ra = result.plan_result;
6530  }));
6531  rapidjson::Document ddl_query;
6532  ddl_query.Parse(query_ra);
6533  CHECK(ddl_query.HasMember("payload"));
6534  CHECK(ddl_query["payload"].IsObject());
6535  auto stmt = Parser::InsertValuesStmt(cat, ddl_query["payload"].GetObject());
6536  _return.addExecutionTime(
6537  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6538  return;
6539 
6540  } else if (pw.is_validate) {
6541  // check user is superuser
6542  if (!session_ptr->get_currentUser().isSuper) {
6543  throw std::runtime_error("Superuser is required to run VALIDATE");
6544  }
6545 
6546  std::string query_ra;
6547  _return.addExecutionTime(measure<>::execution([&]() {
6548  TPlanResult result;
6549  std::tie(result, locks) =
6550  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6551  query_ra = result.plan_result;
6552  }));
6553  rapidjson::Document ddl_query;
6554  ddl_query.Parse(query_ra);
6555  CHECK(ddl_query.HasMember("payload"));
6556  CHECK(ddl_query["payload"].IsObject());
6557  auto validate_stmt = Parser::ValidateStmt(ddl_query["payload"].GetObject());
6558  _return.addExecutionTime(measure<>::execution([&]() {
6559  // Prevent any other query from running while doing validate
6560  execute_write_lock = legacylockmgr::getExecuteWriteLock();
6561 
6562  std::string output{"Result for validate"};
6563  if (g_cluster) {
6564  THROW_DB_EXCEPTION("Validate command should be executed on the aggregator.");
6565  } else {
6566  _return.addExecutionTime(measure<>::execution([&]() {
6567  const system_validator::SingleNodeValidator validator(validate_stmt.getType(),
6568  cat);
6569  output = validator.validate();
6570  }));
6571  }
6573  }));
6574  return;
6575 
6576  } else if (pw.is_copy && !pw.is_copy_to) {
6577  std::unique_ptr<Parser::Stmt> stmt =
6578  Parser::create_stmt_for_query(query_str, *session_ptr);
6579  const auto import_stmt = dynamic_cast<Parser::CopyTableStmt*>(stmt.get());
6580  if (import_stmt) {
6581  if (g_cluster && !leaf_aggregator_.leafCount()) {
6582  // Don't allow copy from imports directly on a leaf node
6583  throw std::runtime_error(
6584  "Cannot import on an individual leaf. Please import from the Aggregator.");
6585  } else if (leaf_aggregator_.leafCount() > 0) {
6587  [&]() { execute_distributed_copy_statement(import_stmt, *session_ptr); }));
6588  } else {
6590  [&]() { import_stmt->execute(*session_ptr, read_only_); }));
6591  }
6592 
6593  // Read response message
6594  _return.updateResultSet(*import_stmt->return_message.get(),
6596  import_stmt->get_success());
6597 
6598  // get deferred_copy_from info
6599  if (import_stmt->was_deferred_copy_from()) {
6600  DeferredCopyFromState deferred_copy_from_state;
6601  import_stmt->get_deferred_copy_from_payload(deferred_copy_from_state.table,
6602  deferred_copy_from_state.file_name,
6603  deferred_copy_from_state.copy_params,
6604  deferred_copy_from_state.partitions);
6605  deferred_copy_from_sessions.add(session_ptr->get_session_id(),
6606  deferred_copy_from_state);
6607  }
6608 
6609  // } else {
6610  // possibly a failure case:
6611  // CopyTableStmt failed to be created, or failed typecast
6612  // but historically just returned
6613  // }
6614  }
6615  return;
6616 
6617  } else if (pw.is_ddl) {
6618  std::string query_ra;
6619  _return.addExecutionTime(measure<>::execution([&]() {
6620  TPlanResult result;
6621  std::tie(result, locks) =
6622  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6623  query_ra = result.plan_result;
6624  }));
6625  executeDdl(_return, query_ra, session_ptr);
6626  return;
6627 
6628  } else if (pw.is_other_explain) {
6629  // does nothing
6630  throw std::runtime_error("EXPLAIN not yet supported for DDL or DML commands.");
6631  return;
6632 
6633  } else {
6634  // includes:
6635  // explain that is not 'other'
6636  // copy_to
6637  // DmlUpdate DmlDelete
6638  // anything else that failed to match
6639 
6640  if (pw.getDMLType() != ParserWrapper::DMLType::NotDML) {
6641  check_read_only("modify");
6642  }
6643 
6644  execute_read_lock = legacylockmgr::getExecuteReadLock();
6645 
6646  std::string query_ra = query_str;
6647  if (use_calcite) {
6648  _return.addExecutionTime(measure<>::execution([&]() {
6649  TPlanResult result;
6650  std::tie(result, locks) =
6651  parse_to_ra(query_state_proxy, query_str, {}, true, system_parameters_);
6652  query_ra = result.plan_result;
6653  }));
6654  }
6655  std::string query_ra_calcite_explain;
6656  ExplainInfo explain(query_str);
6657  if (explain.isCalciteExplain()) {
6659  // return the ra as the result
6660  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6661  return;
6662  }
6663  CHECK(!locks.empty());
6664  query_ra_calcite_explain =
6665  parse_to_ra(
6666  query_state_proxy, explain.ActualQuery(), {}, false, system_parameters_)
6667  .first.plan_result;
6668  }
6669  std::vector<PushedDownFilterInfo> filter_push_down_requests;
6670  auto submitted_time_str = query_state_proxy->getQuerySubmittedTime();
6671  auto query_session = session_ptr ? session_ptr->get_session_id() : "";
6672  auto execute_rel_alg_task = std::make_shared<QueryDispatchQueue::Task>(
6673  [this,
6674  &filter_push_down_requests,
6675  &_return,
6676  query_state_proxy,
6677  &explain,
6678  &query_ra_calcite_explain,
6679  &query_ra,
6680  &query_str,
6681  &locks,
6682  column_format,
6683  executor_device_type,
6684  first_n,
6685  at_most_n,
6686  parent_thread_local_ids =
6687  logger::thread_local_ids()](const size_t executor_index) {
6688  // if we find proper filters we need to "re-execute" the query
6689  // with a modified query plan (i.e., which has pushdowned filter)
6690  // otherwise this trial just executes the query and keeps corresponding query
6691  // resultset in _return object
6692  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
6693  filter_push_down_requests = execute_rel_alg(
6694  _return,
6695  query_state_proxy,
6696  explain.isCalciteExplain() ? query_ra_calcite_explain : query_ra,
6697  column_format,
6698  executor_device_type,
6699  first_n,
6700  at_most_n,
6701  /*just_validate=*/false,
6703  explain,
6704  executor_index);
6705  if (explain.isCalciteExplain()) {
6706  if (filter_push_down_requests.empty()) {
6707  // we only reach here if filter push down was enabled, but no filter
6708  // push down candidate was found
6709  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6710  } else {
6711  CHECK(!locks.empty());
6712  std::vector<TFilterPushDownInfo> filter_push_down_info;
6713  for (const auto& req : filter_push_down_requests) {
6714  TFilterPushDownInfo filter_push_down_info_for_request;
6715  filter_push_down_info_for_request.input_prev = req.input_prev;
6716  filter_push_down_info_for_request.input_start = req.input_start;
6717  filter_push_down_info_for_request.input_next = req.input_next;
6718  filter_push_down_info.push_back(filter_push_down_info_for_request);
6719  }
6720  query_ra = parse_to_ra(query_state_proxy,
6721  query_str,
6722  filter_push_down_info,
6723  false,
6725  .first.plan_result;
6726  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6727  }
6728  } else {
6729  if (!filter_push_down_requests.empty()) {
6730  CHECK(!locks.empty());
6732  query_state_proxy,
6733  query_ra,
6734  column_format,
6735  executor_device_type,
6736  first_n,
6737  at_most_n,
6738  false,
6739  false,
6740  filter_push_down_requests);
6741  }
6742  }
6743  });
6746  if (g_enable_runtime_query_interrupt && !query_session.empty() &&
6747  !explain.isSelectExplain()) {
6748  executor->enrollQuerySession(query_session,
6749  query_str,
6750  submitted_time_str,
6752  QuerySessionStatus::QueryStatus::PENDING_QUEUE);
6753  while (!dispatch_queue_->hasIdleWorker()) {
6754  try {
6755  executor->checkPendingQueryStatus(query_session);
6756  } catch (QueryExecutionError& e) {
6757  executor->clearQuerySessionStatus(query_session, submitted_time_str);
6759  throw std::runtime_error(
6760  "Query execution has been interrupted (pending query).");
6761  }
6762  throw e;
6763  }
6764  std::this_thread::sleep_for(std::chrono::milliseconds(10));
6765  }
6766  }
6767  dispatch_queue_->submit(execute_rel_alg_task,
6768  pw.getDMLType() == ParserWrapper::DMLType::Update ||
6769  pw.getDMLType() == ParserWrapper::DMLType::Delete);
6770  auto result_future = execute_rel_alg_task->get_future();
6771  result_future.get();
6772  return;
6773  }
6774 }
6775 
6777  ExecutionResult& _return,
6778  QueryStateProxy query_state_proxy,
6779  std::string& query_ra,
6780  const bool column_format,
6781  const ExecutorDeviceType executor_device_type,
6782  const int32_t first_n,
6783  const int32_t at_most_n,
6784  const bool just_explain,
6785  const bool is_calcite_explain,
6786  const std::vector<PushedDownFilterInfo>& filter_push_down_requests) {
6787  // collecting the selected filters' info to be sent to Calcite:
6788  std::vector<TFilterPushDownInfo> filter_push_down_info;
6789  for (const auto& req : filter_push_down_requests) {
6790  TFilterPushDownInfo filter_push_down_info_for_request;
6791  filter_push_down_info_for_request.input_prev = req.input_prev;
6792  filter_push_down_info_for_request.input_start = req.input_start;
6793  filter_push_down_info_for_request.input_next = req.input_next;
6794  filter_push_down_info.push_back(filter_push_down_info_for_request);
6795  }
6796  // deriving the new relational algebra plan with respect to the pushed down filters
6797  _return.addExecutionTime(measure<>::execution([&]() {
6798  query_ra = parse_to_ra(query_state_proxy,
6799  query_state_proxy->getQueryStr(),
6800  filter_push_down_info,
6801  false,
6803  .first.plan_result;
6804  }));
6805 
6806  // execute the new relational algebra plan:
6807  auto explain_info = ExplainInfo(ExplainInfo::ExplainType::None);
6808  execute_rel_alg(_return,
6809  query_state_proxy,
6810  query_ra,
6811  column_format,
6812  executor_device_type,
6813  first_n,
6814  at_most_n,
6815  /*just_validate=*/false,
6816  /*find_push_down_candidates=*/false,
6817  explain_info);
6818 }
6819 
6821  Parser::CopyTableStmt* copy_stmt,
6822  const Catalog_Namespace::SessionInfo& session_info) {}
6823 
6824 namespace {
6826  const TableDescriptor& td) {
6827  if (td.is_in_memory_system_table) {
6828  if (g_enable_system_tables) {
6829  // Reset system table fragmenter in order to force chunk metadata refetch.
6830  auto table_schema_lock =
6832  auto table_data_lock =
6834  catalog.removeFragmenterForTable(td.tableId);
6835  catalog.getMetadataForTable(td.tableId, true);
6836  return true;
6837  } else {
6838  throw std::runtime_error(
6839  "Query cannot be executed because use of system tables is currently "
6840  "disabled.");
6841  }
6842  }
6843  return false;
6844 }
6845 
6847  const std::vector<std::vector<std::string>>& selected_tables) {
6848  const auto info_schema_catalog =
6850  if (info_schema_catalog) {
6851  for (const auto& table : selected_tables) {
6852  if (table[1] == shared::kInfoSchemaDbName) {
6853  auto td = info_schema_catalog->getMetadataForTable(table[0], false);
6854  CHECK(td);
6855  check_and_reset_in_memory_system_table(*info_schema_catalog, *td);
6856  }
6857  }
6858  }
6859 }
6860 } // namespace
6861 
6863  QueryStateProxy query_state_proxy,
6864  const std::shared_ptr<Catalog_Namespace::Catalog>& cat,
6865  const std::string& query_str,
6866  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
6867  const SystemParameters& system_parameters,
6868  const bool check_privileges) {
6869  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6870 
6871  heavyai::RequestInfo const request_info(createInMemoryCalciteSession(cat),
6872  logger::request_id());
6873  ScopeGuard cleanup = [&]() { removeInMemoryCalciteSession(request_info.sessionId()); };
6874  ExplainInfo explain(query_str);
6875  std::string const actual_query{explain.isSelectExplain() ? explain.ActualQuery()
6876  : query_str};
6877  auto query_parsing_option =
6878  calcite_->getCalciteQueryParsingOption(legacy_syntax_,
6879  explain.isCalciteExplain(),
6880  check_privileges,
6881  explain.isCalciteExplainDetail());
6882  auto optimization_option = calcite_->getCalciteOptimizationOption(
6883  system_parameters.enable_calcite_view_optimize,
6885  filter_push_down_info,
6887 
6888  return calcite_->process(timer.createQueryStateProxy(),
6889  legacy_syntax_ ? pg_shim(actual_query) : actual_query,
6890  query_parsing_option,
6891  optimization_option,
6892  request_info.json());
6893 }
6894 
6895 std::pair<TPlanResult, lockmgr::LockedTableDescriptors> DBHandler::parse_to_ra(
6896  QueryStateProxy query_state_proxy,
6897  const std::string& query_str,
6898  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
6899  const bool acquire_locks,
6900  const SystemParameters& system_parameters,
6901  bool check_privileges) {
6902  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6903  ParserWrapper pw{query_str};
6904  TPlanResult result;
6906  if (pw.is_ddl || (!pw.is_validate && !pw.is_other_explain)) {
6907  auto cat = query_state_proxy->getConstSessionInfo()->get_catalog_ptr();
6908  // Need to read lock the catalog while determining what table names are used by this
6909  // query, confirming the tables exist, checking the user's permissions, and finally
6910  // locking the individual tables. The catalog lock can be released once the query
6911  // begins running. The table locks will protect the running query.
6912  std::shared_lock<heavyai::DistributedSharedMutex> cat_lock;
6913  if (g_multi_instance) {
6914  cat_lock = std::shared_lock<heavyai::DistributedSharedMutex>(*cat->dcatalogMutex_);
6915  }
6917  cat,
6918  query_str,
6919  filter_push_down_info,
6920  system_parameters,
6921  check_privileges);
6923  result.resolved_accessed_objects.tables_selected_from);
6924 
6925  if (acquire_locks) {
6926  std::set<std::vector<std::string>> write_only_tables;
6927  std::vector<std::vector<std::string>> tables;
6928 
6929  tables.insert(tables.end(),
6930  result.resolved_accessed_objects.tables_updated_in.begin(),
6931  result.resolved_accessed_objects.tables_updated_in.end());
6932  tables.insert(tables.end(),
6933  result.resolved_accessed_objects.tables_deleted_from.begin(),
6934  result.resolved_accessed_objects.tables_deleted_from.end());
6935 
6936  // Collect the tables that need a write lock
6937  for (const auto& table : tables) {
6938  write_only_tables.insert(table);
6939  }
6940 
6941  tables.insert(tables.end(),
6942  result.resolved_accessed_objects.tables_selected_from.begin(),
6943  result.resolved_accessed_objects.tables_selected_from.end());
6944  tables.insert(tables.end(),
6945  result.resolved_accessed_objects.tables_inserted_into.begin(),
6946  result.resolved_accessed_objects.tables_inserted_into.end());
6947 
6948  // avoid deadlocks by enforcing a deterministic locking sequence
6949  // first, obtain table schema locks
6950  // then, obtain table data locks
6951  // force sort by database id and table id order in case of name change to
6952  // guarantee fixed order of mutex access
6953  std::sort(tables.begin(),
6954  tables.end(),
6955  [](const std::vector<std::string>& a, const std::vector<std::string>& b) {
6956  if (a[1] != b[1]) {
6957  const auto cat_a = SysCatalog::instance().getCatalog(a[1]);
6958  const auto cat_b = SysCatalog::instance().getCatalog(b[1]);
6959  return cat_a->getDatabaseId() < cat_b->getDatabaseId();
6960  }
6961  const auto cat = SysCatalog::instance().getCatalog(a[1]);
6962  return cat->getMetadataForTable(a[0], false)->tableId <
6963  cat->getMetadataForTable(b[0], false)->tableId;
6964  });
6965 
6966  // In the case of self-join and possibly other cases, we will
6967  // have duplicate tables. Ensure we only take one for locking below.
6968  tables.erase(unique(tables.begin(), tables.end()), tables.end());
6969  for (const auto& table : tables) {
6970  const auto cat = SysCatalog::instance().getCatalog(table[1]);
6971  CHECK(cat);
6972  locks.emplace_back(
6975  lockmgr::ReadLock>::acquireTableDescriptor(*cat, table[0])));
6976  if (write_only_tables.count(table)) {
6977  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
6978  // table data lock will be aquired in the fragmenter during checkpoint.
6979  locks.emplace_back(
6982  cat->getDatabaseId(), (*locks.back())())));
6983  } else {
6984  auto lock_td = (*locks.back())();
6985  if (lock_td->is_in_memory_system_table) {
6986  locks.emplace_back(
6989  cat->getDatabaseId(), lock_td)));
6990  } else {
6991  locks.emplace_back(
6994  cat->getDatabaseId(), lock_td)));
6995  }
6996  }
6997  }
6998  }
6999  }
7000  return std::make_pair(result, std::move(locks));
7001 }
7002 
7003 int64_t DBHandler::query_get_outer_fragment_count(const TSessionId& session_id_or_json,
7004  const std::string& select_query) {
7005  heavyai::RequestInfo const request_info(session_id_or_json);
7006  SET_REQUEST_ID(request_info.requestId());
7007  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7008  if (!leaf_handler_) {
7009  THROW_DB_EXCEPTION("Distributed support is disabled.");
7010  }
7011  try {
7012  return leaf_handler_->query_get_outer_fragment_count(request_info.sessionId(),
7013  select_query);
7014  } catch (std::exception& e) {
7015  THROW_DB_EXCEPTION(e.what());
7016  }
7017 }
7018 
7019 void DBHandler::check_table_consistency(TTableMeta& _return,
7020  const TSessionId& session_id_or_json,
7021  const int32_t table_id) {
7022  heavyai::RequestInfo const request_info(session_id_or_json);
7023  SET_REQUEST_ID(request_info.requestId());
7024  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7025  if (!leaf_handler_) {
7026  THROW_DB_EXCEPTION("Distributed support is disabled.");
7027  }
7028  try {
7029  leaf_handler_->check_table_consistency(_return, request_info.sessionId(), table_id);
7030  } catch (std::exception& e) {
7031  THROW_DB_EXCEPTION(e.what());
7032  }
7033 }
7034 
7035 void DBHandler::start_query(TPendingQuery& _return,
7036  const TSessionId& leaf_session_id_or_json,
7037  const TSessionId& parent_session_id_or_json,
7038  const std::string& serialized_rel_alg_dag,
7039  const std::string& start_time_str,
7040  const bool just_explain,
7041  const std::vector<int64_t>& outer_fragment_indices) {
7042  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
7043  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
7044  SET_REQUEST_ID(leaf_request_info.requestId());
7045  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
7046  auto session_ptr = stdlog.getConstSessionInfo();
7047  if (!leaf_handler_) {
7048  THROW_DB_EXCEPTION("Distributed support is disabled.");
7049  }
7050  LOG(INFO) << "start_query :" << *session_ptr << " :" << just_explain;
7051  auto time_ms = measure<>::execution([&]() {
7052  try {
7053  leaf_handler_->start_query(_return,
7054  leaf_request_info.sessionId(),
7055  parent_request_info.sessionId(),
7056  serialized_rel_alg_dag,
7057  start_time_str,
7058  just_explain,
7059  outer_fragment_indices);
7060  } catch (std::exception& e) {
7061  THROW_DB_EXCEPTION(e.what());
7062  }
7063  });
7064  LOG(INFO) << "start_query-COMPLETED " << time_ms << "ms "
7065  << "id is " << _return.id;
7066 }
7067 
7068 void DBHandler::execute_query_step(TStepResult& _return,
7069  const TPendingQuery& pending_query,
7070  const TSubqueryId subquery_id,
7071  const std::string& start_time_str) {
7072  SET_REQUEST_ID(0); // No SessionID is available
7073  if (!leaf_handler_) {
7074  THROW_DB_EXCEPTION("Distributed support is disabled.");
7075  }
7076  LOG(INFO) << "execute_query_step : id:" << pending_query.id;
7077  auto time_ms = measure<>::execution([&]() {
7078  try {
7079  leaf_handler_->execute_query_step(
7080  _return, pending_query, subquery_id, start_time_str);
7081  } catch (std::exception& e) {
7082  THROW_DB_EXCEPTION(e.what());
7083  }
7084  });
7085  LOG(INFO) << "execute_query_step-COMPLETED " << time_ms << "ms";
7086 }
7087 
7088 void DBHandler::broadcast_serialized_rows(const TSerializedRows& serialized_rows,
7089  const TRowDescriptor& row_desc,
7090  const TQueryId query_id,
7091  const TSubqueryId subquery_id,
7092  const bool is_final_subquery_result) {
7093  if (!leaf_handler_) {
7094  THROW_DB_EXCEPTION("Distributed support is disabled.");
7095  }
7096  LOG(INFO) << "BROADCAST-SERIALIZED-ROWS id:" << query_id;
7097  auto time_ms = measure<>::execution([&]() {
7098  try {
7099  leaf_handler_->broadcast_serialized_rows(
7100  serialized_rows, row_desc, query_id, subquery_id, is_final_subquery_result);
7101  } catch (std::exception& e) {
7102  THROW_DB_EXCEPTION(e.what());
7103  }
7104  });
7105  LOG(INFO) << "BROADCAST-SERIALIZED-ROWS COMPLETED " << time_ms << "ms";
7106 }
7107 
7108 void DBHandler::insert_chunks(const TSessionId& session_id_or_json,
7109  const TInsertChunks& thrift_insert_chunks) {
7110  try {
7111  heavyai::RequestInfo const request_info(session_id_or_json);
7112  SET_REQUEST_ID(request_info.requestId());
7113  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7114  auto session_ptr = stdlog.getConstSessionInfo();
7115  auto const& cat = session_ptr->getCatalog();
7116  Fragmenter_Namespace::InsertChunks insert_chunks{thrift_insert_chunks.table_id,
7117  thrift_insert_chunks.db_id};
7118  insert_chunks.valid_row_indices.resize(thrift_insert_chunks.valid_indices.size());
7119  std::copy(thrift_insert_chunks.valid_indices.begin(),
7120  thrift_insert_chunks.valid_indices.end(),
7121  insert_chunks.valid_row_indices.begin());
7122 
7123  auto columns =
7124  cat.getAllColumnMetadataForTable(insert_chunks.table_id, false, false, true);
7125  CHECK_EQ(columns.size(), thrift_insert_chunks.data.size());
7126 
7127  std::list<foreign_storage::PassThroughBuffer> pass_through_buffers;
7128  auto thrift_data_it = thrift_insert_chunks.data.begin();
7129  for (const auto col_desc : columns) {
7130  AbstractBuffer* data_buffer = nullptr;
7131  AbstractBuffer* index_buffer = nullptr;
7132  data_buffer = &pass_through_buffers.emplace_back(
7133  reinterpret_cast<const int8_t*>(thrift_data_it->data_buffer.data()),
7134  thrift_data_it->data_buffer.size());
7135  data_buffer->initEncoder(col_desc->columnType);
7136  data_buffer->getEncoder()->setNumElems(thrift_insert_chunks.num_rows);
7137  if (col_desc->columnType.is_varlen_indeed()) {
7138  CHECK(thrift_insert_chunks.num_rows == 0 ||
7139  thrift_data_it->index_buffer.size() > 0);
7140  index_buffer = &pass_through_buffers.emplace_back(
7141  reinterpret_cast<const int8_t*>(thrift_data_it->index_buffer.data()),
7142  thrift_data_it->index_buffer.size());
7143  }
7144 
7145  insert_chunks.chunks[col_desc->columnId] =
7146  Chunk_NS::Chunk::getChunk(col_desc, data_buffer, index_buffer, false);
7147  thrift_data_it++;
7148  }
7149 
7150  const ChunkKey lock_chunk_key{cat.getDatabaseId(),
7151  cat.getLogicalTableId(insert_chunks.table_id)};
7152  auto table_read_lock =
7154  const auto td = cat.getMetadataForTable(insert_chunks.table_id);
7155  CHECK(td);
7156 
7157  // this should have the same lock sequence as COPY FROM
7158  auto insert_data_lock =
7160  td->fragmenter->insertChunksNoCheckpoint(insert_chunks);
7161 
7162  } catch (const std::exception& e) {
7163  THROW_DB_EXCEPTION(std::string(e.what()));
7164  }
7165 }
7166 
7167 void DBHandler::insert_data(const TSessionId& session_id_or_json,
7168  const TInsertData& thrift_insert_data) {
7169  try {
7170  heavyai::RequestInfo const request_info(session_id_or_json);
7171  SET_REQUEST_ID(request_info.requestId());
7172  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7173  auto session_ptr = stdlog.getConstSessionInfo();
7174  CHECK_EQ(thrift_insert_data.column_ids.size(), thrift_insert_data.data.size());
7175  CHECK(thrift_insert_data.is_default.size() == 0 ||
7176  thrift_insert_data.is_default.size() == thrift_insert_data.column_ids.size());
7177  auto const& cat = session_ptr->getCatalog();
7179  insert_data.databaseId = thrift_insert_data.db_id;
7180  insert_data.tableId = thrift_insert_data.table_id;
7181  insert_data.columnIds = thrift_insert_data.column_ids;
7182  insert_data.is_default = thrift_insert_data.is_default;
7183  insert_data.numRows = thrift_insert_data.num_rows;
7184  std::vector<std::unique_ptr<std::vector<std::string>>> none_encoded_string_columns;
7185  std::vector<std::unique_ptr<std::vector<ArrayDatum>>> array_columns;
7186  SQLTypeInfo geo_ti{kNULLT,
7187  false}; // will be filled with the correct info if possible
7188  for (size_t col_idx = 0; col_idx < insert_data.columnIds.size(); ++col_idx) {
7189  const int column_id = insert_data.columnIds[col_idx];
7190  DataBlockPtr p;
7191  const auto cd = cat.getMetadataForColumn(insert_data.tableId, column_id);
7192  CHECK(cd);
7193  const auto& ti = cd->columnType;
7194  size_t rows_expected =
7195  !insert_data.is_default.empty() && insert_data.is_default[col_idx]
7196  ? 1ul
7197  : insert_data.numRows;
7198  if (ti.is_number() || ti.is_time() || ti.is_boolean()) {
7199  p.numbersPtr = (int8_t*)thrift_insert_data.data[col_idx].fixed_len_data.data();
7200  } else if (ti.is_string()) {
7201  if (ti.get_compression() == kENCODING_DICT) {
7202  p.numbersPtr = (int8_t*)thrift_insert_data.data[col_idx].fixed_len_data.data();
7203  } else {
7204  CHECK_EQ(kENCODING_NONE, ti.get_compression());
7205  none_encoded_string_columns.emplace_back(new std::vector<std::string>());
7206  auto& none_encoded_strings = none_encoded_string_columns.back();
7207 
7208  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7209  for (const auto& varlen_str : thrift_insert_data.data[col_idx].var_len_data) {
7210  none_encoded_strings->push_back(varlen_str.payload);
7211  }
7212  p.stringsPtr = none_encoded_strings.get();
7213  }
7214  } else if (ti.is_geometry()) {
7215  none_encoded_string_columns.emplace_back(new std::vector<std::string>());
7216  auto& none_encoded_strings = none_encoded_string_columns.back();
7217  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7218  for (const auto& varlen_str : thrift_insert_data.data[col_idx].var_len_data) {
7219  none_encoded_strings->push_back(varlen_str.payload);
7220  }
7221  p.stringsPtr = none_encoded_strings.get();
7222 
7223  // point geo type needs to mark null sentinel in its physical coord column
7224  // To recognize null sentinel for point, therefore, we keep the actual geo type
7225  // and needs to use it when constructing geo null point
7226  geo_ti = ti;
7227  } else {
7228  CHECK(ti.is_array());
7229  array_columns.emplace_back(new std::vector<ArrayDatum>());
7230  auto& array_column = array_columns.back();
7231  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7232  for (const auto& t_arr_datum : thrift_insert_data.data[col_idx].var_len_data) {
7233  if (t_arr_datum.is_null) {
7234  if ((cd->columnName.find("_coords") != std::string::npos) &&
7235  geo_ti.get_type() == kPOINT) {
7236  // For geo point, we manually mark its null sentinel to coord buffer
7237  array_column->push_back(
7239  } else if (ti.get_size() > 0) {
7240  array_column->push_back(import_export::ImporterUtils::composeNullArray(ti));
7241  } else {
7242  array_column->emplace_back(0, nullptr, true);
7243  }
7244  } else {
7245  ArrayDatum arr_datum;
7246  arr_datum.length = t_arr_datum.payload.size();
7247  int8_t* ptr = (int8_t*)(t_arr_datum.payload.data());
7248  arr_datum.pointer = ptr;
7249  // In this special case, ArrayDatum does not handle freeing the underlying
7250  // memory
7251  arr_datum.data_ptr = std::shared_ptr<int8_t>(ptr, [](auto p) {});
7252  arr_datum.is_null = false;
7253  array_column->push_back(arr_datum);
7254  }
7255  }
7256  p.arraysPtr = array_column.get();
7257  }
7258  insert_data.data.push_back(p);
7259  }
7260  const ChunkKey lock_chunk_key{cat.getDatabaseId(),
7261  cat.getLogicalTableId(insert_data.tableId)};
7262  auto table_read_lock =
7264  const auto td = cat.getMetadataForTable(insert_data.tableId);
7265  CHECK(td);
7266 
7267  // this should have the same lock seq as COPY FROM
7268  auto insert_data_lock =
7270  auto data_memory_holder = import_export::fill_missing_columns(&cat, insert_data);
7271  td->fragmenter->insertDataNoCheckpoint(insert_data);
7272  } catch (const std::exception& e) {
7273  THROW_DB_EXCEPTION(std::string(e.what()));
7274  }
7275 }
7276 
7277 void DBHandler::start_render_query(TPendingRenderQuery& _return,
7278  const TSessionId& session_id_or_json,
7279  const int64_t widget_id,
7280  const int16_t node_idx,
7281  const std::string& vega_json) {
7282  heavyai::RequestInfo const request_info(session_id_or_json);
7283  SET_REQUEST_ID(request_info.requestId());
7284  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7285  auto session_ptr = stdlog.getConstSessionInfo();
7286  if (!render_handler_) {
7287  THROW_DB_EXCEPTION("Backend rendering is disabled.");
7288  }
7289  LOG(INFO) << "start_render_query :" << *session_ptr << " :widget_id:" << widget_id
7290  << ":vega_json:" << vega_json;
7291 
7292  // cast away const-ness of incoming Thrift string ref
7293  // to allow it to be passed down as an r-value and
7294  // ultimately std::moved into the RenderSession
7295  auto& non_const_vega_json = const_cast<std::string&>(vega_json);
7296 
7297  auto time_ms = measure<>::execution([&]() {
7298  try {
7299  render_handler_->start_render_query(_return,
7300  request_info.sessionId(),
7301  widget_id,
7302  node_idx,
7303  std::move(non_const_vega_json));
7304  } catch (std::exception& e) {
7305  THROW_DB_EXCEPTION(e.what());
7306  }
7307  });
7308  LOG(INFO) << "start_render_query-COMPLETED " << time_ms << "ms "
7309  << "id is " << _return.id;
7310 }
7311 
7312 void DBHandler::execute_next_render_step(TRenderStepResult& _return,
7313  const TPendingRenderQuery& pending_render,
7314  const TRenderAggDataMap& merged_data) {
7315  // No SessionID is available
7316  SET_REQUEST_ID(0);
7317 
7318  if (!render_handler_) {
7319  THROW_DB_EXCEPTION("Backend rendering is disabled.");
7320  }
7321 
7322  LOG(INFO) << "execute_next_render_step: id:" << pending_render.id;
7323  auto time_ms = measure<>::execution([&]() {
7324  try {
7325  render_handler_->execute_next_render_step(_return, pending_render, merged_data);
7326  } catch (std::exception& e) {
7327  THROW_DB_EXCEPTION(e.what());
7328  }
7329  });
7330  LOG(INFO) << "execute_next_render_step-COMPLETED id: " << pending_render.id
7331  << ", time: " << time_ms << "ms ";
7332 }
7333 
7334 void DBHandler::checkpoint(const TSessionId& session_id_or_json, const int32_t table_id) {
7335  heavyai::RequestInfo const request_info(session_id_or_json);
7336  SET_REQUEST_ID(request_info.requestId());
7337  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7338  auto session_ptr = stdlog.getConstSessionInfo();
7339  auto& cat = session_ptr->getCatalog();
7340  cat.checkpoint(table_id);
7341 }
7342 
7343 // check and reset epoch if a request has been made
7344 void DBHandler::set_table_epoch(const TSessionId& session_id_or_json,
7345  const int db_id,
7346  const int table_id,
7347  const int new_epoch) {
7348  heavyai::RequestInfo const request_info(session_id_or_json);
7349  SET_REQUEST_ID(request_info.requestId());
7350  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7351  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7352  auto session_ptr = stdlog.getConstSessionInfo();
7353  if (!session_ptr->get_currentUser().isSuper) {
7354  throw std::runtime_error("Only superuser can set_table_epoch");
7355  }
7356  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7357  ChunkKey table_key{db_id, table_id};
7358  auto table_write_lock = lockmgr::TableSchemaLockMgr::getWriteLockForTable(table_key);
7359  auto table_data_write_lock = lockmgr::TableDataLockMgr::getWriteLockForTable(table_key);
7360  try {
7361  auto& cat = session_ptr->getCatalog();
7362  cat.setTableEpoch(db_id, table_id, new_epoch);
7363  } catch (const std::runtime_error& e) {
7364  THROW_DB_EXCEPTION(std::string(e.what()));
7365  }
7366 }
7367 
7368 // check and reset epoch if a request has been made
7369 void DBHandler::set_table_epoch_by_name(const TSessionId& session_id_or_json,
7370  const std::string& table_name,
7371  const int new_epoch) {
7372  heavyai::RequestInfo const request_info(session_id_or_json);
7373  SET_REQUEST_ID(request_info.requestId());
7374  auto stdlog =
7375  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
7376  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7377  auto session_ptr = stdlog.getConstSessionInfo();
7378  if (!session_ptr->get_currentUser().isSuper) {
7379  throw std::runtime_error("Only superuser can set_table_epoch");
7380  }
7381 
7382  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7383  auto& cat = session_ptr->getCatalog();
7384  auto table_write_lock =
7386  auto table_data_write_lock =
7388  auto td = cat.getMetadataForTable(
7389  table_name,
7390  false); // don't populate fragmenter on this call since we only want metadata
7391  int32_t db_id = cat.getCurrentDB().dbId;
7392  try {
7393  cat.setTableEpoch(db_id, td->tableId, new_epoch);
7394  } catch (const std::runtime_error& e) {
7395  THROW_DB_EXCEPTION(std::string(e.what()));
7396  }
7397 }
7398 
7399 int32_t DBHandler::get_table_epoch(const TSessionId& session_id_or_json,
7400  const int32_t db_id,
7401  const int32_t table_id) {
7402  heavyai::RequestInfo const request_info(session_id_or_json);
7403  SET_REQUEST_ID(request_info.requestId());
7404  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7405  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7406  auto session_ptr = stdlog.getConstSessionInfo();
7407 
7408  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7409  ChunkKey table_key{db_id, table_id};
7410  auto table_read_lock = lockmgr::TableSchemaLockMgr::getReadLockForTable(table_key);
7411  auto table_data_write_lock = lockmgr::TableDataLockMgr::getReadLockForTable(table_key);
7412  try {
7413  auto const& cat = session_ptr->getCatalog();
7414  return cat.getTableEpoch(db_id, table_id);
7415  } catch (const std::runtime_error& e) {
7416  THROW_DB_EXCEPTION(std::string(e.what()));
7417  }
7418 }
7419 
7420 int32_t DBHandler::get_table_epoch_by_name(const TSessionId& session_id_or_json,
7421  const std::string& table_name) {
7422  heavyai::RequestInfo const request_info(session_id_or_json);
7423  SET_REQUEST_ID(request_info.requestId());
7424  auto stdlog =
7425  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
7426  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7427  auto session_ptr = stdlog.getConstSessionInfo();
7428 
7429  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7430  auto& cat = session_ptr->getCatalog();
7431  auto table_read_lock =
7433  auto table_data_read_lock =
7435  auto td = cat.getMetadataForTable(
7436  table_name,
7437  false); // don't populate fragmenter on this call since we only want metadata
7438  int32_t db_id = cat.getCurrentDB().dbId;
7439  try {
7440  return cat.getTableEpoch(db_id, td->tableId);
7441  } catch (const std::runtime_error& e) {
7442  THROW_DB_EXCEPTION(std::string(e.what()));
7443  }
7444 }
7445 
7446 void DBHandler::get_table_epochs(std::vector<TTableEpochInfo>& _return,
7447  const TSessionId& session_id_or_json,
7448  const int32_t db_id,
7449  const int32_t table_id) {
7450  heavyai::RequestInfo const request_info(session_id_or_json);
7451  SET_REQUEST_ID(request_info.requestId());
7452  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7453  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7454  auto session_ptr = stdlog.getConstSessionInfo();
7455 
7456  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7457  ChunkKey table_key{db_id, table_id};
7458  auto table_read_lock = lockmgr::TableSchemaLockMgr::getReadLockForTable(table_key);
7459  auto table_data_read_lock = lockmgr::TableDataLockMgr::getReadLockForTable(table_key);
7460 
7461  std::vector<Catalog_Namespace::TableEpochInfo> table_epochs;
7462  auto const& cat = session_ptr->getCatalog();
7463  table_epochs = cat.getTableEpochs(db_id, table_id);
7464  CHECK(!table_epochs.empty());
7465 
7466  for (const auto& table_epoch : table_epochs) {
7467  TTableEpochInfo table_epoch_info;
7468  table_epoch_info.table_id = table_epoch.table_id;
7469  table_epoch_info.table_epoch = table_epoch.table_epoch;
7470  table_epoch_info.leaf_index = table_epoch.leaf_index;
7471  _return.emplace_back(table_epoch_info);
7472  }
7473 }
7474 
7475 void DBHandler::set_table_epochs(const TSessionId& session_id_or_json,
7476  const int32_t db_id,
7477  const std::vector<TTableEpochInfo>& table_epochs) {
7478  heavyai::RequestInfo const request_info(session_id_or_json);
7479  SET_REQUEST_ID(request_info.requestId());
7480  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7481  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7482  auto session_ptr = stdlog.getConstSessionInfo();
7483 
7484  // Only super users are allowed to call this API on a single node instance
7485  // or aggregator (for distributed mode)
7486  if (!g_cluster || leaf_aggregator_.leafCount() > 0) {
7487  if (!session_ptr->get_currentUser().isSuper) {
7488  THROW_DB_EXCEPTION("Only super users can set table epochs");
7489  }
7490  }
7491  if (table_epochs.empty()) {
7492  return;
7493  }
7494  auto& cat = session_ptr->getCatalog();
7495  auto logical_table_id = cat.getLogicalTableId(table_epochs[0].table_id);
7496  std::vector<Catalog_Namespace::TableEpochInfo> table_epochs_vector;
7497  for (const auto& table_epoch : table_epochs) {
7498  if (logical_table_id != cat.getLogicalTableId(table_epoch.table_id)) {
7499  THROW_DB_EXCEPTION("Table epochs do not reference the same logical table");
7500  }
7501  table_epochs_vector.emplace_back(
7502  table_epoch.table_id, table_epoch.table_epoch, table_epoch.leaf_index);
7503  }
7504 
7505  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7507  true, cat.getMetadataForTable(logical_table_id, false), db_id);
7508  ChunkKey table_key{db_id, logical_table_id};
7509  auto table_write_lock = lockmgr::TableSchemaLockMgr::getWriteLockForTable(table_key);
7510  auto table_data_write_lock = lockmgr::TableDataLockMgr::getWriteLockForTable(table_key);
7511  cat.setTableEpochs(db_id, table_epochs_vector);
7512 }
7513 
7514 void DBHandler::set_license_key(TLicenseInfo& _return,
7515  const TSessionId& session_id_or_json,
7516  const std::string& key,
7517  const std::string& nonce) {
7518  heavyai::RequestInfo const request_info(session_id_or_json);
7519  SET_REQUEST_ID(request_info.requestId());
7520  check_read_only("set_license_key");
7521  THROW_DB_EXCEPTION(std::string("Licensing not supported."));
7522 }
7523 
7524 void DBHandler::get_license_claims(TLicenseInfo& _return,
7525  const TSessionId& session_id_or_json,
7526  const std::string& nonce) {
7527  heavyai::RequestInfo const request_info(session_id_or_json);
7528  SET_REQUEST_ID(request_info.requestId());
7529  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7530  _return.claims.emplace_back("");
7531 }
7532 
7535 
7536  Executor::clearExternalCaches(false, nullptr, -1);
7537 
7538  query_engine_.reset();
7539 
7540  if (render_handler_) {
7541  render_handler_->shutdown();
7542  }
7543 
7545 }
7546 
7548  if (calcite_) {
7549  calcite_->close_calcite_server(false);
7550  }
7551 }
7552 
7553 extern std::map<std::string, std::string> get_device_parameters(bool cpu_only);
7554 
7555 #define EXPOSE_THRIFT_MAP(TYPENAME) \
7556  { \
7557  std::map<int, const char*>::const_iterator it = \
7558  _##TYPENAME##_VALUES_TO_NAMES.begin(); \
7559  while (it != _##TYPENAME##_VALUES_TO_NAMES.end()) { \
7560  _return.insert(std::pair<std::string, std::string>( \
7561  #TYPENAME "." + std::string(it->second), std::to_string(it->first))); \
7562  it++; \
7563  } \
7564  }
7565 
7566 void DBHandler::get_device_parameters(std::map<std::string, std::string>& _return,
7567  const TSessionId& session_id_or_json) {
7568  heavyai::RequestInfo const request_info(session_id_or_json);
7569  SET_REQUEST_ID(request_info.requestId());
7570  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7571  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7573  for (auto item : params) {
7574  _return.insert(item);
7575  }
7576  EXPOSE_THRIFT_MAP(TDeviceType);
7577  EXPOSE_THRIFT_MAP(TDatumType);
7578  EXPOSE_THRIFT_MAP(TEncodingType);
7579  EXPOSE_THRIFT_MAP(TExtArgumentType);
7580  EXPOSE_THRIFT_MAP(TOutputBufferSizeType);
7581 }
7582 
7584  const TSessionId& session_id_or_json,
7585  const std::vector<TUserDefinedFunction>& udfs,
7586  const std::vector<TUserDefinedTableFunction>& udtfs,
7587  const std::map<std::string, std::string>& device_ir_map) {
7588  heavyai::RequestInfo const request_info(session_id_or_json);
7589  SET_REQUEST_ID(request_info.requestId());
7590  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7591  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7592 
7593  VLOG(1) << "register_runtime_extension_functions: # UDFs: " << udfs.size()
7594  << " # UDTFs: " << udtfs.size() << std::endl;
7595 
7598  THROW_DB_EXCEPTION("Runtime UDF and UDTF function registration is disabled.");
7599  }
7600 
7603  auto session_ptr = stdlog.getConstSessionInfo();
7604  if (!session_ptr->get_currentUser().isSuper) {
7606  "Server is configured to require superuser privilege to register UDFs and "
7607  "UDTFs.");
7608  }
7609  }
7612 
7614  auto it_cpu = device_ir_map.find(std::string{"cpu"});
7615  auto it_gpu = device_ir_map.find(std::string{"gpu"});
7616  if (it_cpu != device_ir_map.end() || it_gpu != device_ir_map.end()) {
7617  if (it_cpu != device_ir_map.end()) {
7619  it_cpu->second;
7620  } else {
7623  }
7624  if (it_gpu != device_ir_map.end()) {
7626  it_gpu->second;
7627  } else {
7630  }
7631  } /* else avoid locking compilation if registration does not change
7632  the rt_udf_cpu/gpu_module instances */
7633 
7634  VLOG(1) << "Registering runtime UDTFs:\n";
7635 
7637 
7638  for (auto it = udtfs.begin(); it != udtfs.end(); it++) {
7639  VLOG(1) << "UDTF name=" << it->name << std::endl;
7641  it->name,
7643  ThriftSerializers::from_thrift(it->sizerType),
7644  static_cast<size_t>(it->sizerArgPos)},
7645  ThriftSerializers::from_thrift(it->inputArgTypes),
7646  ThriftSerializers::from_thrift(it->outputArgTypes),
7647  ThriftSerializers::from_thrift(it->sqlArgTypes),
7648  it->annotations,
7649  /*is_runtime =*/true);
7650  }
7651  /* Register extension functions with Calcite server */
7652  CHECK(calcite_);
7653  auto udtfs_ = ThriftSerializers::to_thrift(
7655  calcite_->setRuntimeExtensionFunctions(udfs, udtfs_, /*is_runtime =*/true);
7656 
7657  /* Update the extension function whitelist */
7658  std::string whitelist = calcite_->getRuntimeExtensionFunctionWhitelist();
7659  VLOG(1) << "Registering runtime extension functions with CodeGen using whitelist:\n"
7660  << whitelist;
7663  });
7664 }
7665 
7666 void DBHandler::get_function_names(std::vector<std::string>& _return,
7667  const TSessionId& session) {
7668  for (auto udf_name :
7669  ExtensionFunctionsWhitelist::get_udfs_name(/* is_runtime */ false)) {
7670  if (std::find(_return.begin(), _return.end(), udf_name) == _return.end()) {
7671  _return.emplace_back(udf_name);
7672  }
7673  }
7674 }
7675 
7676 void DBHandler::get_runtime_function_names(std::vector<std::string>& _return,
7677  const TSessionId& session) {
7678  for (auto udf_name :
7679  ExtensionFunctionsWhitelist::get_udfs_name(/* is_runtime */ true)) {
7680  if (std::find(_return.begin(), _return.end(), udf_name) == _return.end()) {
7681  _return.emplace_back(udf_name);
7682  }
7683  }
7684 }
7685 
7686 void DBHandler::get_function_details(std::vector<TUserDefinedFunction>& _return,
7687  const TSessionId& session,
7688  const std::vector<std::string>& udf_names) {
7689  for (const std::string& udf_name : udf_names) {
7690  for (auto udf : ExtensionFunctionsWhitelist::get_ext_funcs(udf_name)) {
7691  _return.emplace_back(ThriftSerializers::to_thrift(udf));
7692  }
7693  }
7694 }
7695 
7696 void DBHandler::get_table_function_names(std::vector<std::string>& _return,
7697  const TSessionId& session) {
7699  const std::string& name = tf.getName(/* drop_suffix */ true, /* to_lower */ true);
7700  if (std::find(_return.begin(), _return.end(), name) == _return.end()) {
7701  _return.emplace_back(name);
7702  }
7703  }
7704 }
7705 
7706 void DBHandler::get_runtime_table_function_names(std::vector<std::string>& _return,
7707  const TSessionId& session) {
7708  for (auto tf :
7710  const std::string& name = tf.getName(/* drop_suffix */ true, /* to_lower */ true);
7711  if (std::find(_return.begin(), _return.end(), name) == _return.end()) {
7712  _return.emplace_back(name);
7713  }
7714  }
7715 }
7716 
7718  std::vector<TUserDefinedTableFunction>& _return,
7719  const TSessionId& session,
7720  const std::vector<std::string>& udtf_names) {
7721  for (const std::string& udtf_name : udtf_names) {
7722  for (auto tf : table_functions::TableFunctionsFactory::get_table_funcs(udtf_name)) {
7723  _return.emplace_back(ThriftSerializers::to_thrift(tf));
7724  }
7725  }
7726 }
7727 
7729  const Catalog_Namespace::SessionInfo& session_info,
7730  const std::string& query_state_str,
7731  TQueryResult& _return) {
7732  // Stuff ResultSet into _return (which is a TQueryResult)
7733  // calls convertRows, but after some setup using session_info
7734 
7735  auto session_ptr = get_session_ptr(session_info.get_session_id());
7736  CHECK(session_ptr);
7737  auto qs = create_query_state(session_ptr, query_state_str);
7739 
7740  // heavysql only accepts column format as being 'VALID",
7741  // assume that heavydb should only return column format
7742  int32_t nRows = result.getDataPtr()->rowCount();
7743 
7744  convertData(_return,
7745  result,
7746  qsp,
7747  /*column_format=*/true,
7748  /*first_n=*/nRows,
7749  /*at_most_n=*/nRows);
7750 }
7751 
7752 static std::unique_ptr<RexLiteral> genLiteralStr(std::string val) {
7753  return std::unique_ptr<RexLiteral>(
7754  new RexLiteral(val, SQLTypes::kTEXT, SQLTypes::kTEXT, 0, 0, 0, 0));
7755 }
7756 
7758  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
7759  std::shared_ptr<ResultSet> rSet = nullptr;
7760  std::vector<TargetMetaInfo> label_infos;
7761 
7762  if (!session_ptr->get_currentUser().isSuper) {
7763  throw std::runtime_error(
7764  "SHOW USER SESSIONS failed, because it can only be executed by super user.");
7765  } else {
7766  // label_infos -> column labels
7767  std::vector<std::string> labels{
7768  "session_id", "login_name", "client_address", "db_name"};
7769  for (const auto& label : labels) {
7770  label_infos.emplace_back(label, SQLTypeInfo(kTEXT, true));
7771  }
7772 
7773  // logical_values -> table data
7774  std::vector<RelLogicalValues::RowValues> logical_values;
7775  auto sessions = sessions_store_->getAllSessions();
7776  for (const auto& session_ptr : sessions) {
7777  logical_values.emplace_back(RelLogicalValues::RowValues{});
7778  logical_values.back().emplace_back(
7779  genLiteralStr(session_ptr->get_public_session_id()));
7780  logical_values.back().emplace_back(
7781  genLiteralStr(session_ptr->get_currentUser().userName));
7782  logical_values.back().emplace_back(
7783  genLiteralStr(session_ptr->get_connection_info()));
7784  logical_values.back().emplace_back(
7785  genLiteralStr(session_ptr->getCatalog().getCurrentDB().dbName));
7786  }
7787 
7788  // Create ResultSet
7789  rSet = std::shared_ptr<ResultSet>(
7790  ResultSetLogicalValuesBuilder::create(label_infos, logical_values));
7791  }
7792  return ExecutionResult(rSet, label_infos);
7793 }
7794 
7796  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
7797  std::shared_ptr<ResultSet> rSet = nullptr;
7798  std::vector<TargetMetaInfo> label_infos;
7799  auto current_user_name = session_ptr->get_currentUser().userName;
7800  auto is_super_user = session_ptr->get_currentUser().isSuper.load();
7801 
7802  std::vector<std::string> labels{"query_session_id",
7803  "current_status",
7804  "executor_id",
7805  "submitted",
7806  "query_str",
7807  "login_name",
7808  "client_address",
7809  "db_name",
7810  "exec_device_type"};
7811  for (const auto& label : labels) {
7812  label_infos.emplace_back(label, SQLTypeInfo(kTEXT, true));
7813  }
7814 
7815  std::vector<RelLogicalValues::RowValues> logical_values;
7817  jit_debug_ ? "/tmp" : "",
7818  jit_debug_ ? "mapdquery" : "",
7820  CHECK(executor);
7821  auto sessions = (is_super_user ? sessions_store_->getAllSessions()
7822  : sessions_store_->getUserSessions(current_user_name));
7823  for (const auto& query_session_ptr : sessions) {
7824  std::vector<QuerySessionStatus> query_infos;
7825  {
7827  executor->getSessionLock());
7828  query_infos = executor->getQuerySessionInfo(query_session_ptr->get_session_id(),
7829  session_read_lock);
7830  }
7831  // if there exists query info fired from this session we report it to user
7832  const std::string getQueryStatusStr[] = {"UNDEFINED",
7833  "PENDING_QUEUE",
7834  "PENDING_EXECUTOR",
7835  "RUNNING_QUERY_KERNEL",
7836  "RUNNING_REDUCTION",
7837  "RUNNING_IMPORTER"};
7838  bool is_table_import_session = false;
7839  for (QuerySessionStatus& query_info : query_infos) {
7840  logical_values.emplace_back(RelLogicalValues::RowValues{});
7841  logical_values.back().emplace_back(
7842  genLiteralStr(query_session_ptr->get_public_session_id()));
7843  auto query_status = query_info.getQueryStatus();
7844  logical_values.back().emplace_back(genLiteralStr(getQueryStatusStr[query_status]));
7845  if (query_status == QuerySessionStatus::QueryStatus::RUNNING_IMPORTER) {
7846  is_table_import_session = true;
7847  }
7848  logical_values.back().emplace_back(
7849  genLiteralStr(::toString(query_info.getExecutorId())));
7850  logical_values.back().emplace_back(
7851  genLiteralStr(query_info.getQuerySubmittedTime()));
7852  logical_values.back().emplace_back(genLiteralStr(query_info.getQueryStr()));
7853  logical_values.back().emplace_back(
7854  genLiteralStr(query_session_ptr->get_currentUser().userName));
7855  logical_values.back().emplace_back(
7856  genLiteralStr(query_session_ptr->get_connection_info()));
7857  logical_values.back().emplace_back(
7858  genLiteralStr(query_session_ptr->getCatalog().getCurrentDB().dbName));
7859  if (query_session_ptr->get_executor_device_type() == ExecutorDeviceType::GPU &&
7860  !is_table_import_session) {
7861  logical_values.back().emplace_back(genLiteralStr("GPU"));
7862  } else {
7863  logical_values.back().emplace_back(genLiteralStr("CPU"));
7864  }
7865  }
7866  }
7867 
7868  rSet = std::shared_ptr<ResultSet>(
7869  ResultSetLogicalValuesBuilder::create(label_infos, logical_values));
7870 
7871  return ExecutionResult(rSet, label_infos);
7872 }
7873 
7874 void DBHandler::get_queries_info(std::vector<TQueryInfo>& _return,
7875  const TSessionId& session_id_or_json) {
7876  heavyai::RequestInfo const request_info(session_id_or_json);
7877  SET_REQUEST_ID(request_info.requestId());
7878  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7880  jit_debug_ ? "/tmp" : "",
7881  jit_debug_ ? "mapdquery" : "",
7883  CHECK(executor);
7884  auto sessions = sessions_store_->getAllSessions();
7885  for (const auto& query_session_ptr : sessions) {
7886  const auto query_session_user_name = query_session_ptr->get_currentUser().userName;
7887  std::vector<QuerySessionStatus> query_infos;
7888  {
7890  executor->getSessionLock());
7891  query_infos = executor->getQuerySessionInfo(query_session_ptr->get_session_id(),
7892  session_read_lock);
7893  }
7894  // if there exists query info fired from this session we report it to user
7895  const std::string getQueryStatusStr[] = {"UNDEFINED",
7896  "PENDING_QUEUE",
7897  "PENDING_EXECUTOR",
7898  "RUNNING_QUERY_KERNEL",
7899  "RUNNING_REDUCTION",
7900  "RUNNING_IMPORTER"};
7901  TQueryInfo info;
7902  for (QuerySessionStatus& query_info : query_infos) {
7903  info.query_session_id = query_session_ptr->get_session_id();
7904  info.query_public_session_id = query_session_ptr->get_public_session_id();
7905  info.current_status = getQueryStatusStr[query_info.getQueryStatus()];
7906  info.query_str = query_info.getQueryStr();
7907  info.executor_id = query_info.getExecutorId();
7908  info.submitted = query_info.getQuerySubmittedTime();
7909  info.login_name = query_session_user_name;
7910  info.client_address = query_session_ptr->get_connection_info();
7911  info.db_name = query_session_ptr->getCatalog().getCurrentDB().dbName;
7912  if (query_session_ptr->get_executor_device_type() == ExecutorDeviceType::GPU) {
7913  info.exec_device_type = "GPU";
7914  } else {
7915  info.exec_device_type = "CPU";
7916  }
7917  }
7918  _return.push_back(info);
7919  }
7920 }
7921 
7923  const std::string& target_session) {
7924  // capture the interrupt request from user and then pass to corresponding Executors
7925  // that queries fired by the given session are assigned
7926  // Basic-flow that each query session gets through:
7927  // Enroll --> Update (query session info / executor) --> Running -> Cleanup
7928  // 1. We have to separate 1) "target" query session to interrupt and 2) request session
7929  // Here, we have to focus on "target" session: all interruption management is based on
7930  // the "target" session
7931  // 2. Session info and its required data structures are global to Executor, so
7932  // we can send the interrupt request from UNITARY_EXECUTOR (note that the actual query
7933  // is processed by specific Executor but can also access the global data structure)
7934  // to the Executor that the session's query has been assigned
7935  // this means each Executor should handle the interrupt request, and then update its
7936  // the latest status to the global session map for the correctness
7937  // 3. Three target session's status: PENDING_QUEUE / PENDING_EXECUTOR / RUNNING
7938  // (for now we can interrupt a query at "PENDING_EXECUTOR" and "RUNNING")
7939  // 4. each session has 1) a list of queries that the session tries to initiate and
7940  // 2) a interrupt flag map that indicates whether the session is interrupted
7941  // If a session is interrupted, we turn the flag for the session on so as to Executor
7942  // can know about the user's interrupt request on the query (after all queries are
7943  // removed then the session's query list and its flag are also deleted). And those
7944  // info is managed by Executor's global data structure
7945  // 5. To interrupt queries at "PENDING_EXECUTOR", corresponding Executor regularly
7946  // checks the interrupt flag of the session, and throws an exception if got interrupted
7947  // For the case of running query, we also turn the flag in device memory on in async
7948  // manner so as to inform the query kernel about the latest interrupt flag status
7949  // (it also checks the flag regularly during the query kernel execution and
7950  // query threads return with the error code if necessary -->
7951  // for this we inject interrupt flag checking logic in the generated query kernel)
7952  // 6. Interruption are implemented by throwing runtime_error that contains a visible
7953  // error message like "Query has been interrupted"
7954 
7956  // at least type of query interruption is enabled to allow kill query
7957  // if non-kernel query interrupt is enabled but tries to kill that type's query?
7958  // then the request is skipped
7959  // todo(yoonmin): improve kill query cmd under both types of query
7960  throw std::runtime_error(
7961  "Unable to interrupt running query. Query interrupt is disabled.");
7962  }
7963 
7964  CHECK_EQ(target_session.length(), static_cast<unsigned long>(8));
7965  auto target_query_session = sessions_store_->getByPublicID(target_session);
7966  if (!target_query_session) {
7967  throw std::runtime_error(
7968  "Unable to interrupt running query. An invalid query session is given.");
7969  }
7970  auto target_session_id = target_query_session->get_session_id();
7972  jit_debug_ ? "/tmp" : "",
7973  jit_debug_ ? "mapdquery" : "",
7975  CHECK(executor);
7976 
7977  auto non_admin_interrupt_user = !session_info.get_currentUser().isSuper.load();
7978  auto interrupt_user_name = session_info.get_currentUser().userName;
7979  if (non_admin_interrupt_user) {
7980  auto target_user_name = target_query_session->get_currentUser().userName;
7981  if (target_user_name.compare(interrupt_user_name) != 0) {
7982  throw std::runtime_error("Unable to interrupt running query.");
7983  }
7984  }
7985 
7986  auto target_executor_ids = executor->getExecutorIdsRunningQuery(target_session_id);
7987  if (target_executor_ids.empty()) {
7989  executor->getSessionLock());
7990  if (executor->checkIsQuerySessionEnrolled(target_session_id, session_read_lock)) {
7991  session_read_lock.unlock();
7992  VLOG(1) << "Received interrupt: "
7993  << "User " << session_info.get_currentUser().userLoggable()
7994  << ", LeafCount " << leaf_aggregator_.leafCount() << ", Database "
7995  << session_info.getCatalog().getCurrentDB().dbName << std::endl;
7996  executor->interrupt(target_session_id, session_info.get_session_id());
7997  }
7998  } else {
7999  for (auto& executor_id : target_executor_ids) {
8000  VLOG(1) << "Received interrupt: "
8001  << "User " << session_info.get_currentUser().userLoggable() << ", Executor "
8002  << executor_id << ", LeafCount " << leaf_aggregator_.leafCount()
8003  << ", Database " << session_info.getCatalog().getCurrentDB().dbName
8004  << std::endl;
8005  auto target_executor = Executor::getExecutor(executor_id);
8006  target_executor->interrupt(target_session_id, session_info.get_session_id());
8007  }
8008  }
8009 }
8010 
8011 void DBHandler::alterSystemClear(const std::string& session_id,
8013  const std::string& cache_type,
8014  int64_t& execution_time_ms) {
8015  result = ExecutionResult();
8016  if (to_upper(cache_type) == "CPU") {
8017  execution_time_ms = measure<>::execution([&]() { clear_cpu_memory(session_id); });
8018  } else if (to_upper(cache_type) == "GPU") {
8019  execution_time_ms = measure<>::execution([&]() { clear_gpu_memory(session_id); });
8020  } else if (to_upper(cache_type) == "RENDER") {
8021  execution_time_ms = measure<>::execution([&]() { clearRenderMemory(session_id); });
8022  } else {
8023  throw std::runtime_error("Invalid cache type. Valid values are CPU,GPU or RENDER");
8024  }
8025 }
8026 
8027 void DBHandler::alterSession(const std::string& session_id,
8029  const std::pair<std::string, std::string>& session_parameter,
8030  int64_t& execution_time_ms) {
8031  result = ExecutionResult();
8032  if (session_parameter.first == "EXECUTOR_DEVICE") {
8033  std::string parameter_value = to_upper(session_parameter.second);
8034  TExecuteMode::type executorType;
8035  if (parameter_value == "GPU") {
8036  executorType = TExecuteMode::type::GPU;
8037  } else if (parameter_value == "CPU") {
8038  executorType = TExecuteMode::type::CPU;
8039  } else {
8040  throw std::runtime_error("Cannot set the " + session_parameter.first + " to " +
8041  session_parameter.second +
8042  ". Valid options are CPU and GPU");
8043  }
8044  execution_time_ms =
8045  measure<>::execution([&]() { set_execution_mode(session_id, executorType); });
8046  } else if (session_parameter.first == "CURRENT_DATABASE") {
8047  execution_time_ms = measure<>::execution(
8048  [&]() { switch_database(session_id, session_parameter.second); });
8049  }
8050 }
8051 
8053  TQueryResult& _return,
8054  const std::string& query_ra,
8055  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
8056  DdlCommandExecutor executor = DdlCommandExecutor(query_ra, session_ptr);
8057  std::string commandStr = executor.commandStr();
8058 
8059  if (executor.isKillQuery()) {
8060  interruptQuery(*session_ptr, executor.getTargetQuerySessionToKill());
8061  } else {
8063  int64_t execution_time_ms;
8064  if (executor.isShowQueries()) {
8065  // getQueries still requires Thrift cannot be nested into DdlCommandExecutor
8066  _return.execution_time_ms +=
8067  measure<>::execution([&]() { result = getQueries(session_ptr); });
8068  } else if (executor.isShowUserSessions()) {
8069  // getUserSessions still requires Thrift cannot be nested into DdlCommandExecutor
8070  _return.execution_time_ms +=
8071  measure<>::execution([&]() { result = getUserSessions(session_ptr); });
8072  } else if (executor.isAlterSystemClear()) {
8073  alterSystemClear(session_ptr->get_session_id(),
8074  result,
8075  executor.returnCacheType(),
8076  execution_time_ms);
8077  _return.execution_time_ms += execution_time_ms;
8078 
8079  } else if (executor.isAlterSessionSet()) {
8080  alterSession(session_ptr->get_session_id(),
8081  result,
8082  executor.getSessionParameter(),
8083  execution_time_ms);
8084  _return.execution_time_ms += execution_time_ms;
8085  } else if (executor.isAlterSystemControlExecutorQueue()) {
8086  result = ExecutionResult();
8087  if (executor.returnQueueAction() == "PAUSE") {
8088  _return.execution_time_ms += measure<>::execution(
8089  [&]() { pause_executor_queue(session_ptr->get_session_id()); });
8090  } else if (executor.returnQueueAction() == "RESUME") {
8091  _return.execution_time_ms += measure<>::execution(
8092  [&]() { resume_executor_queue(session_ptr->get_session_id()); });
8093  } else {
8094  throw std::runtime_error("Unknown queue command.");
8095  }
8096  } else {
8097  _return.execution_time_ms +=
8098  measure<>::execution([&]() { result = executor.execute(read_only_); });
8099  }
8100 
8101  if (!result.empty()) {
8102  // reduce execution time by the time spent during queue waiting
8103  _return.execution_time_ms -= result.getRows()->getQueueTime();
8104  convertResultSet(result, *session_ptr, commandStr, _return);
8105  }
8106  }
8107 }
8108 
8110  ExecutionResult& _return,
8111  const std::string& query_ra,
8112  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
8113  DdlCommandExecutor executor = DdlCommandExecutor(query_ra, session_ptr);
8114  std::string commandStr = executor.commandStr();
8115 
8116  if (executor.isKillQuery()) {
8117  interruptQuery(*session_ptr, executor.getTargetQuerySessionToKill());
8118  } else {
8119  int64_t execution_time_ms;
8120  if (executor.isShowQueries()) {
8121  // getQueries still requires Thrift cannot be nested into DdlCommandExecutor
8122  execution_time_ms =
8123  measure<>::execution([&]() { _return = getQueries(session_ptr); });
8124  } else if (executor.isShowUserSessions()) {
8125  // getUserSessions still requires Thrift cannot be nested into DdlCommandExecutor
8126  execution_time_ms =
8127  measure<>::execution([&]() { _return = getUserSessions(session_ptr); });
8128  } else if (executor.isAlterSystemClear()) {
8129  alterSystemClear(session_ptr->get_session_id(),
8130  _return,
8131  executor.returnCacheType(),
8132  execution_time_ms);
8133  } else if (executor.isAlterSessionSet()) {
8134  alterSession(session_ptr->get_session_id(),
8135  _return,
8136  executor.getSessionParameter(),
8137  execution_time_ms);
8138  } else if (executor.isAlterSystemControlExecutorQueue()) {
8139  _return = ExecutionResult();
8140  if (executor.returnQueueAction() == "PAUSE") {
8141  execution_time_ms = measure<>::execution(
8142  [&]() { pause_executor_queue(session_ptr->get_session_id()); });
8143  } else if (executor.returnQueueAction() == "RESUME") {
8144  execution_time_ms = measure<>::execution(
8145  [&]() { resume_executor_queue(session_ptr->get_session_id()); });
8146  } else {
8147  throw std::runtime_error("Unknwon queue command.");
8148  }
8149  } else {
8150  execution_time_ms =
8151  measure<>::execution([&]() { _return = executor.execute(read_only_); });
8152  }
8153  _return.setExecutionTime(execution_time_ms);
8154  }
8155  if (_return.getResultType() == ExecutionResult::QueryResult) {
8156  // ResultType defaults to QueryResult => which can limit
8157  // the number of lines output via ConvertRow... use CalciteDdl instead
8159  }
8160 }
8161 
8162 void DBHandler::resizeDispatchQueue(size_t queue_size) {
8163  dispatch_queue_ = std::make_unique<QueryDispatchQueue>(queue_size);
8164 }
8165 
8167  const std::unordered_set<shared::TableKey>& selected_table_keys) const {
8168  bool is_in_memory_system_table_query{false};
8169  const auto info_schema_catalog =
8171  if (info_schema_catalog) {
8172  for (const auto& table_key : selected_table_keys) {
8173  if (table_key.db_id == info_schema_catalog->getDatabaseId()) {
8174  auto td = info_schema_catalog->getMetadataForTable(table_key.table_id, false);
8175  CHECK(td);
8176  if (check_and_reset_in_memory_system_table(*info_schema_catalog, *td)) {
8177  is_in_memory_system_table_query = true;
8178  }
8179  }
8180  }
8181  }
8182  return is_in_memory_system_table_query;
8183 }
std::lock_guard< T > lock_guard
std::pair< size_t, size_t > ArraySliceRange
Definition: Importer.h:74
void interrupt(const TSessionId &query_session, const TSessionId &interrupt_session) override
Definition: DBHandler.cpp:792
Classes used to wrap parser calls for calcite redirection.
int64_t process_deferred_copy_from(const TSessionId &session_id)
Definition: DBHandler.cpp:1430
void get_table_details_impl(TTableDetails &_return, query_state::StdLog &stdlog, const std::string &table_name, const bool get_system, const bool get_physical, const std::string &database_name={})
Definition: DBHandler.cpp:2529
std::string to_lower(const std::string &str)
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:636
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void get_tables_for_database(std::vector< std::string > &_return, const TSessionId &session, const std::string &database_name) override
Definition: DBHandler.cpp:2702
static void convertData(TQueryResult &_return, ExecutionResult &result, const QueryStateProxy &query_state_proxy, const bool column_format, const int32_t first_n, const int32_t at_most_n)
Definition: DBHandler.cpp:1250
static void addUdfs(const std::string &json_func_sigs)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:983
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string s3_secret_key
Definition: CopyParams.h:62
boost::filesystem::path import_path_
Definition: DBHandler.h:638
RType getResultType() const
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:660
void add_vsi_archive_prefix(std::string &path)
Definition: DBHandler.cpp:4210
std::vector< int > ChunkKey
Definition: types.h:36
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:272
double g_running_query_interrupt_freq
Definition: Execute.cpp:137
int32_t raster_scanlines_per_thread
Definition: CopyParams.h:90
static const int32_t SERVER_USAGE
Definition: DBObject.h:129
void importGeoTableSingle(const TSessionId &session, const std::string &table_name, const std::string &file_name, const import_export::CopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params)
Definition: DBHandler.cpp:5448
void insert_chunks(const TSessionId &session, const TInsertChunks &insert_chunks) override
Definition: DBHandler.cpp:7108
void set_table_epoch(const TSessionId &session, const int db_id, const int table_id, const int new_epoch) override
Definition: DBHandler.cpp:7344
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:171
const std::string kDataDirectoryName
size_t g_num_tuple_threshold_switch_to_baseline
Definition: Execute.cpp:106
void resume_executor_queue(const TSessionId &session)
Definition: DBHandler.cpp:2964
static const int32_t DROP_VIEW
Definition: DBObject.h:113
#define NULL_DOUBLE
static std::vector< TableFunction > get_table_funcs()
void resetSessionsStore()
Definition: DBHandler.cpp:365
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
static TableSchemaLockMgr & instance()
Definition: LockMgr.h:40
void importGeoTableGlobFilterSort(const TSessionId &session, const std::string &table_name, const std::string &file_name, const import_export::CopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params)
Definition: DBHandler.cpp:5421
ClientProtocol
static void convertExplain(TQueryResult &_return, const ResultSet &results, const bool column_format)
Definition: DBHandler.cpp:6403
const std::string getTargetQuerySessionToKill() const
void validate_configurations()
Definition: DBHandler.cpp:354
const bool renderer_use_parallel_executors_
Definition: DBHandler.h:965
void get_table_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7696
void insert_data(const TSessionId &session, const TInsertData &insert_data) override
Definition: DBHandler.cpp:7167
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
bool is_a_supported_archive_file(const std::string &path)
Definition: DBHandler.cpp:4276
const std::string & udf_filename_
Definition: DBHandler.h:981
static const int32_t ALTER_SERVER
Definition: DBObject.h:128
bool g_multi_instance
Definition: heavyai_locks.h:22
QueryStateProxy createQueryStateProxy()
Definition: QueryState.cpp:71
std::vector< PushedDownFilterInfo > execute_rel_alg(ExecutionResult &_return, QueryStateProxy, const std::string &query_ra, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool just_validate, const bool find_push_down_candidates, const ExplainInfo &explain_info, const std::optional< size_t > executor_index=std::nullopt) const
Definition: DBHandler.cpp:6204
std::string cat(Ts &&...args)
void set_license_key(TLicenseInfo &_return, const TSessionId &session, const std::string &key, const std::string &nonce) override
Definition: DBHandler.cpp:7514
void clearRenderMemory(const TSessionId &session)
Definition: DBHandler.cpp:2934
shared utility for globbing files, paths can be specified as either a single file, directory or wildcards
DBObjectKey getObjectKey() const
Definition: DBObject.h:221
static bool has_view_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2035
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:114
void get_all_effective_roles_for_user(std::vector< std::string > &_return, const TSessionId &session, const std::string &granteeName) override
Definition: DBHandler.cpp:2293
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:178
bool isCalciteExplainDetail() const
Definition: ParserWrapper.h:75
static std::vector< ExtensionFunction > get_ext_funcs(const std::string &name)
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1623
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
Definition: sqltypes.h:76
void get_tables(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2692
void unshare_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::vector< std::string > &groups, const std::vector< std::string > &objects, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5080
bool g_allow_memory_status_log
Definition: Execute.cpp:123
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:1021
TDatumType::type type_to_thrift(const SQLTypeInfo &type_info)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:116
const std::string commandStr() const
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:234
std::string tableName
void init_executor_resource_mgr()
Definition: DBHandler.cpp:252
TRowDescriptor target_meta_infos_to_thrift(const std::vector< TargetMetaInfo > &targets)
void unshare_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5070
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:235
static void convertResult(TQueryResult &_return, const ResultSet &results, const bool column_format)
Definition: DBHandler.cpp:6409
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:663
void sql_execute_impl(ExecutionResult &_return, QueryStateProxy, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool use_calcite, lockmgr::LockedTableDescriptors &locks)
Definition: DBHandler.cpp:6443
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime=false)
TTableRefreshInfo get_refresh_info(const TableDescriptor *td)
Definition: DBHandler.cpp:2463
logger::RequestId requestId() const
Definition: RequestInfo.h:39
void getAllRolesForUserImpl(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr, std::vector< std::string > &roles, const std::string &granteeName, bool effective)
Definition: DBHandler.cpp:2248
void sql_execute_gdf(TDataFrame &_return, const TSessionId &session, const std::string &query, const int32_t device_id, const int32_t first_n) override
Definition: DBHandler.cpp:1543
bool isVerbose() const
Definition: ParserWrapper.h:83
std::string const & getQueryStr() const
Definition: QueryState.h:159
DBObjectType
Definition: DBObject.h:40
static thread_local std::string client_address
Definition: DBHandler.h:154
void get_views(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2725
void get_runtime_table_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7706
static std::string getAstFileName(const std::string &udf_file_name)
static const int32_t CREATE_VIEW
Definition: DBObject.h:112
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:1007
#define NULL_FLOAT
bool path_is_relative(const std::string &path)
Definition: DBHandler.cpp:4244
int64_t query_get_outer_fragment_count(const TSessionId &session, const std::string &select_query) override
Definition: DBHandler.cpp:7003
void share_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::vector< std::string > &groups, const std::vector< std::string > &objects, const TDashboardPermissions &permissions, const bool grant_role) override
Definition: DBHandler.cpp:5061
TCopyParams copyparams_to_thrift(const import_export::CopyParams &cp)
Definition: DBHandler.cpp:4048
auto getExecuteReadLock()
EncodingType thrift_to_encoding(const TEncodingType::type tEncodingType)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
static void loadRuntimeLibs(const std::string &torch_lib_path=std::string())
void clone_session(const TSessionId session1, const TSessionId session2)
static void set_geo_physical_import_buffer_columnar(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column)
Definition: Importer.cpp:1731
#define NULL_BIGINT
void check_and_invalidate_sessions(Parser::DDLStmt *ddl)
Definition: DBHandler.cpp:6431
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:136
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:109
bool has_object_privilege(const TSessionId &sessionId, const std::string &granteeName, const std::string &objectName, const TDBObjectType::type object_type, const TDBObjectPermissions &permissions) override
Definition: DBHandler.cpp:2067
#define LOG(tag)
Definition: Logger.h:285
bool enable_calcite_view_optimize
std::vector< SQLTypeInfo > getBestColumnTypes() const
Definition: Importer.cpp:3498
char unescape_char(std::string str)
Definition: DBHandler.cpp:3836
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
std::unordered_map< std::string, std::unordered_set< std::string > > fill_column_names_by_table(std::vector< std::string > &table_names, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:1770
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
void get_custom_expressions(std::vector< TCustomExpression > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:4641
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:123
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
static const std::string MAPD_EDITION
Definition: release.h:40
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:155
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:395
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:634
void get_hardware_info(TClusterHardwareInfo &_return, const TSessionId &session) override
Definition: DBHandler.cpp:923
ArrowTransport
static bool has_server_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2053
void sql_execute(ExecutionResult &_return, const TSessionId &session, const std::string &query, const bool column_format, const int32_t first_n, const int32_t at_most_n, lockmgr::LockedTableDescriptors &locks)
Definition: DBHandler.cpp:1368
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:161
void get_completion_hints(std::vector< TCompletionHint > &hints, const TSessionId &session, const std::string &sql, const int cursor) override
Definition: DBHandler.cpp:1661
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
Definition: Importer.cpp:6141
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:170
std::vector< TCompletionHint > just_whitelisted_keyword_hints(const std::vector< TCompletionHint > &hints)
TSessionId getInvalidSessionId() const
Definition: DBHandler.cpp:3016
void validate_sort_options(const FilePathOptions &options)
std::string join(T const &container, std::string const &delim)
void get_tables_impl(std::vector< std::string > &table_names, const Catalog_Namespace::SessionInfo &, const GetTablesType get_tables_type, const std::string &database_name={})
Definition: DBHandler.cpp:2675
std::vector< std::string > getTargetNames(const std::vector< TargetMetaInfo > &targets) const
Definition: DBHandler.cpp:6302
static void add(const std::string &json_func_sigs)
int32_t get_table_epoch(const TSessionId &session, const int32_t db_id, const int32_t table_id) override
Definition: DBHandler.cpp:7399
#define UNREACHABLE()
Definition: Logger.h:338
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:6126
static void value_to_thrift_column(const TargetValue &tv, const SQLTypeInfo &ti, TColumn &column)
Definition: DBHandler.cpp:978
void sql_execute_local(TQueryResult &_return, const QueryStateProxy &query_state_proxy, const std::shared_ptr< Catalog_Namespace::SessionInfo > session_ptr, const std::string &query_str, const bool column_format, const std::string &nonce, const int32_t first_n, const int32_t at_most_n, const bool use_calcite)
Definition: DBHandler.cpp:1193
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
void delete_custom_expressions(const TSessionId &session, const std::vector< int32_t > &custom_expression_ids, const bool do_soft_delete) override
Definition: DBHandler.cpp:4676
void get_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
void get_table_epochs(std::vector< TTableEpochInfo > &_return, const TSessionId &session, const int32_t db_id, const int32_t table_id) override
Definition: DBHandler.cpp:7446
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4252
void execute_query_step(TStepResult &_return, const TPendingQuery &pending_query, const TSubqueryId subquery_id, const std::string &start_time_str) override
Definition: DBHandler.cpp:7068
std::vector< bool > is_default
Definition: Fragmenter.h:75
unsigned g_cpu_threads_override
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:964
std::string get_mismatch_attr_warning_text(const std::string &table_name, const std::string &file_path, const std::string &column_name, const std::string &attr, const std::string &got, const std::string &expected)
Definition: DBHandler.cpp:5383
int32_t objectId
Definition: DBObject.h:55
void set_execution_mode_nolock(Catalog_Namespace::SessionInfo *session_ptr, const TExecuteMode::type mode)
Definition: DBHandler.cpp:6184
const std::string base_data_path_
Definition: DBHandler.h:637
void set_execution_mode(const TSessionId &session, const TExecuteMode::type mode) override
Definition: DBHandler.cpp:3087
static const int32_t ALTER_TABLE
Definition: DBObject.h:93
ExtArgumentType from_thrift(const TExtArgumentType::type &t)
void initialize(const bool is_new_db)
Definition: DBHandler.cpp:382
const bool jit_debug_
Definition: DBHandler.h:642
const ResultSetPtr & getDataPtr() const
std::string connection_string
Definition: CopyParams.h:104
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
static std::vector< GeoFileLayerInfo > gdalGetLayersInGeoFile(const std::string &file_name, const CopyParams &copy_params)
Definition: Importer.cpp:5157
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
const size_t render_mem_bytes_
Definition: DBHandler.h:968
const CopyParams & get_copy_params() const
Definition: Importer.h:710
static void init_resource_mgr(const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
Definition: Execute.cpp:5353
void start_render_query(TPendingRenderQuery &_return, const TSessionId &session, const int64_t widget_id, const int16_t node_idx, const std::string &vega_json) override
Definition: DBHandler.cpp:7277
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:227
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:134
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
void switch_database(const TSessionId session, const std::string &dbname)
void setSessionInfo(std::shared_ptr< Catalog_Namespace::SessionInfo >)
Definition: QueryState.cpp:282
void get_db_object_privs(std::vector< TDBObject > &_return, const TSessionId &session, const std::string &objectName, const TDBObjectType::type type) override
Definition: DBHandler.cpp:2162
const std::string kInfoSchemaDbName
void krb5_connect(TKrb5Session &session, const std::string &token, const std::string &dbname) override
Definition: DBHandler.cpp:652
void get_token_based_completions(std::vector< TCompletionHint > &hints, query_state::StdLog &stdlog, std::vector< std::string > &visible_tables, const std::string &sql, const int cursor)
Definition: DBHandler.cpp:1725
Timer createTimer(char const *event_name)
Definition: QueryState.cpp:129
void check_table_load_privileges(const Catalog_Namespace::SessionInfo &session_info, const std::string &table_name)
Definition: DBHandler.cpp:6167
void disconnect(const TSessionId session)
bool isForeignTable() const
static void createSimpleResult(TQueryResult &_return, const ResultSet &results, const bool column_format, const std::string label)
Definition: DBHandler.cpp:6366
std::string raster_import_dimensions
Definition: CopyParams.h:93
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:176
void get_users(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2847
void dispatch_query_task(std::shared_ptr< QueryDispatchQueue::Task > query_task, const bool is_update_delete)
Definition: DBHandler.cpp:1820
bool get_qualified_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:54
#define CHECK_GT(x, y)
Definition: Logger.h:305
void detect_column_types(TDetectResult &_return, const TSessionId &session, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:4338
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:184
static const size_t auto_cpu_mem_bytes
Definition: Execute.h:1643
void initEncoder(const SQLTypeInfo &tmp_sql_type)
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:1018
void execute_distributed_copy_statement(Parser::CopyTableStmt *, const Catalog_Namespace::SessionInfo &session_info)
Definition: DBHandler.cpp:6820
static bool gdalFileExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5053
void import_table_status(TImportStatus &_return, const TSessionId &session, const std::string &import_id) override
Definition: DBHandler.cpp:5904
std::unique_ptr< AbstractImporter > create_importer(Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
Definition: Importer.cpp:6211
std::string sourceName
void fillMissingBuffers(const TSessionId &session, const Catalog_Namespace::Catalog &catalog, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const std::list< const ColumnDescriptor * > &cds, const std::vector< int > &desc_id_to_column_id, size_t num_rows, const std::string &table_name)
Definition: DBHandler.cpp:3275
static TableDataLockMgr & instance()
Definition: LockMgr.h:78
void get_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7666
void setNumElems(const size_t num_elems)
Definition: Encoder.h:285
DBObject * findDbObject(const DBObjectKey &objectKey, bool only_direct) const
Definition: Grantee.cpp:85
ExecutorDeviceType
void emergency_shutdown()
Definition: DBHandler.cpp:7547
std::string to_string(char const *&&v)
size_t get_column_size(const TColumn &column)
Definition: DBHandler.cpp:3454
const std::string kGeoColumnName
Definition: ColumnNames.h:23
std::string find_last_word_from_cursor(const std::string &sql, const int64_t cursor)
void set_leaf_info(const TSessionId &session, const TLeafInfo &info) override
Definition: DBHandler.cpp:973
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:650
std::vector< std::string > getTableNamesForUser(const UserMetadata &user, const GetTablesType get_tables_type) const
Definition: Catalog.cpp:4969
void sql_validate(TRowDescriptor &_return, const TSessionId &session, const std::string &query) override
Definition: DBHandler.cpp:1594
std::vector< int > column_ids_by_names(const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
Definition: DBHandler.cpp:3131
void internal_connect(TSessionId &session, const std::string &username, const std::string &dbname)
Definition: DBHandler.cpp:620
#define LOG_IF(severity, condition)
Definition: Logger.h:384
std::pair< std::string, std::string > getSessionParameter() const
int32_t get_table_epoch_by_name(const TSessionId &session, const std::string &table_name) override
Definition: DBHandler.cpp:7420
#define NULL_INT
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:531
bool is_in_memory_system_table
bool should_suggest_column_hints(const std::string &partial_query)
std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * > > prepare_loader_generic(const Catalog_Namespace::SessionInfo &session_info, const std::string &table_name, size_t num_cols, std::unique_ptr< import_export::Loader > *loader, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> *import_buffers, const std::vector< std::string > &column_names, std::string load_type)
Definition: DBHandler.cpp:3399
std::unique_lock< WrapperType< std::shared_mutex >> ExecutorWriteLock
import_export::CopyParams copy_params
Definition: DBHandler.h:990
void fillGeoColumns(const TSessionId &session, const Catalog_Namespace::Catalog &catalog, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const ColumnDescriptor *cd, size_t &col_idx, size_t num_rows, const std::string &table_name)
Definition: DBHandler.cpp:3237
static void resume_executor_queue()
Definition: Execute.cpp:5395
size_t getCurrentCacheSizeForDevice(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:590
constexpr double a
Definition: Utm.h:32
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:951
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
void convertResultSet(ExecutionResult &result, const Catalog_Namespace::SessionInfo &session_info, const std::string &query_state_str, TQueryResult &_return)
Definition: DBHandler.cpp:7728
std::shared_lock< T > shared_lock
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:509
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
TRowDescriptor validateRelAlg(const std::string &query_ra, QueryStateProxy query_state_proxy)
Definition: DBHandler.cpp:1826
void connect_impl(TSessionId &session, const std::string &passwd, const std::string &dbname, const Catalog_Namespace::UserMetadata &user_meta, std::shared_ptr< Catalog_Namespace::Catalog > cat, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:694
void addExecutionTime(int64_t execution_time_ms)
std::string getDefaultValueLiteral() const
tuple rows
Definition: report.py:114
Driver for running validation on a single node.
void log_cache_size(const Catalog_Namespace::Catalog &cat)
Definition: DBHandler.cpp:3167
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:174
std::string add_metadata_columns
Definition: CopyParams.h:94
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:72
void disconnect_impl(Catalog_Namespace::SessionInfoPtr &session_ptr)
Definition: DBHandler.cpp:727
This file contains the class specification and related data structures for Catalog.
ImportHeaderRow has_header
Definition: CopyParams.h:46
void connect(TSessionId &session, const std::string &username, const std::string &passwd, const std::string &dbname) override
Definition: DBHandler.cpp:658
void load_table_binary_arrow(const TSessionId &session, const std::string &table_name, const std::string &arrow_stream, const bool use_column_names) override
Definition: DBHandler.cpp:3606
void checkpoint(const TSessionId &session, const int32_t table_id) override
Definition: DBHandler.cpp:7334
std::string ActualQuery()
Definition: ParserWrapper.h:81
bool isAlterSystemClear() const
bool isAggregator() const
Definition: DBHandler.cpp:648
bool g_enable_columnar_output
Definition: Execute.cpp:102
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
Definition: Execute.cpp:107
void delete_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids) override
Definition: DBHandler.cpp:4924
void add(AccessPrivileges newprivs)
Definition: DBObject.h:145
bool is_reserved_name(const std::string &name)
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
TRole::type getServerRole() const
Definition: DBHandler.cpp:850
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:85
void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo &session_info, const std::vector< int32_t > &dashboard_ids)
Definition: DBHandler.cpp:4981
std::shared_lock< WrapperType< std::shared_mutex >> ExecutorReadLock
const std::string kDefaultImportDirName
TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog *cat, const ColumnDescriptor *cd)
Definition: DBHandler.cpp:2363
Supported runtime functions management and retrieval.
static bool has_table_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permission)
Definition: DBHandler.cpp:1999
void get_layers_in_geo_file(std::vector< TGeoFileLayerInfo > &_return, const TSessionId &session, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:6005
const size_t reserved_gpu_mem_
Definition: DBHandler.h:970
TColumnType create_geo_column(const TDatumType::type type, const std::string &name, const bool is_array)
Definition: DBHandler.cpp:5158
std::string TTypeInfo_TypeToString(const TDatumType::type &t)
Definition: DBHandler.cpp:5377
static SysCatalog & instance()
Definition: SysCatalog.h:343
void get_first_geo_file_in_archive(std::string &_return, const TSessionId &session, const std::string &archive_path, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5919
void create_table(const TSessionId &session, const std::string &table_name, const TRowDescriptor &row_desc, const TCreateParams &create_params) override
Definition: DBHandler.cpp:5189
void check_geospatial_files(const boost::filesystem::path file_path, const import_export::CopyParams &copy_params)
Definition: DBHandler.cpp:5168
CONSTEXPR DEVICE bool is_null(const T &value)
RasterPointType raster_point_type
Definition: CopyParams.h:88
void update_custom_expression(const TSessionId &session, const int32_t id, const std::string &expression_json) override
Definition: DBHandler.cpp:4658
#define THROW_COLUMN_ATTR_MISMATCH_EXCEPTION(attr, got, expected)
Definition: DBHandler.cpp:5396
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:971
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
void get_function_details(std::vector< TUserDefinedFunction > &_return, const TSessionId &session, const std::vector< std::string > &udf_names) override
Definition: DBHandler.cpp:7686
void get_all_files_in_archive(std::vector< std::string > &_return, const TSessionId &session, const std::string &archive_path, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5966
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
std::string g_base_path
Definition: SysCatalog.cpp:62
const size_t CALCITE_SESSION_ID_LENGTH
Definition: SessionInfo.h:126
static const int32_t DROP_DATABASE
Definition: DBObject.h:79
std::string generate_random_string(const size_t len)
void setQueryState(std::shared_ptr< QueryState >)
Definition: QueryState.cpp:278
#define EXPOSE_THRIFT_MAP(TYPENAME)
Definition: DBHandler.cpp:7555
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void get_tables_meta_impl(std::vector< TTableMeta > &_return, QueryStateProxy query_state_proxy, const Catalog_Namespace::SessionInfo &session_info, const bool with_table_locks=true)
Definition: DBHandler.cpp:2734
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4809
GetTablesType
Definition: Catalog.h:63
static HashtableRecycler * getHashTableCache()
const int max_session_duration_
Definition: DBHandler.h:960
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:639
static void readMetadataSampleGDAL(const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
Definition: Importer.cpp:4625
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
std::shared_ptr< Catalog_Namespace::SessionInfo > get_session_ptr(const TSessionId &session_id)
Definition: DBHandler.cpp:6145
string version
Definition: setup.in.py:73
ProjectionTokensForCompletion extract_projection_tokens_for_completion(const std::string &sql)
Definition: DBHandler.cpp:1637
static std::shared_ptr< QueryEngine > createInstance(CudaMgr_Namespace::CudaMgr *cuda_mgr, bool cpu_only)
Definition: QueryEngine.h:97
std::shared_ptr< Catalog_Namespace::SessionInfo > getSessionInfo() const
Definition: QueryState.cpp:155
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:635
void shutdown()
Definition: DBHandler.cpp:7533
static std::unordered_set< std::string > get_udfs_name(const bool is_runtime)
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:980
void removeInMemoryCalciteSession(const std::string &session_id)
Definition: DBHandler.cpp:613
RequestId set_new_request_id()
Definition: Logger.cpp:889
#define INVALID_SESSION_ID
Definition: DBHandler.cpp:131
static bool has_database_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:1982
static const int32_t DELETE_FROM_TABLE
Definition: DBObject.h:91
const std::vector< TargetMetaInfo > & getTargetsMeta() const
void validate_import_file_path_if_local(const std::string &file_path)
Definition: DBHandler.cpp:4330
const std::string & clang_path_
Definition: DBHandler.h:982
const std::shared_ptr< ResultSet > & getRows() const
bool isShowUserSessions() const
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:190
std::unique_lock< T > unique_lock
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:654
void get_completion_hints_unsorted(std::vector< TCompletionHint > &hints, std::vector< std::string > &visible_tables, query_state::StdLog &stdlog, const std::string &sql, const int cursor)
Definition: DBHandler.cpp:1696
void alterSession(const std::string &sesson_id, ExecutionResult &result, const std::pair< std::string, std::string > &session_parameter, int64_t &execution_time_ms)
Definition: DBHandler.cpp:8027
static const int32_t TRUNCATE_TABLE
Definition: DBObject.h:92
std::string sql_order_by
Definition: CopyParams.h:97
Checked json field retrieval.
bool g_enable_watchdog
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const
void set_cur_session(const TSessionId &parent_session, const TSessionId &leaf_session, const std::string &start_time_str, const std::string &label, bool for_running_query_kernel) override
Definition: DBHandler.cpp:2978
std::string get_import_tag(const std::string &import_tag, const std::string &table_name, const std::string &file_path)
Definition: DBHandler.cpp:3315
~DBHandler() override
Definition: DBHandler.cpp:576
void updateResultSet(const std::string &query_ra, RType type, bool success=true)
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:653
SystemParameters & system_parameters_
Definition: DBHandler.h:652
const size_t num_reader_threads_
Definition: DBHandler.h:973
An AbstractBuffer is a unit of data management for a data manager.
import_export::SourceType source_type
Definition: CopyParams.h:57
std::string get_load_tag(const std::string &load_tag, const std::string &table_name)
Definition: DBHandler.cpp:3309
#define SET_REQUEST_ID(parent_request_id)
Definition: DBHandler.cpp:133
size_t getTotalMemorySizeForDictionariesForDatabase() const
Definition: Catalog.cpp:2370
TDashboard get_dashboard_impl(const std::shared_ptr< Catalog_Namespace::SessionInfo const > &session_ptr, Catalog_Namespace::UserMetadata &user_meta, const DashboardDescriptor *dash, const bool populate_state=true)
Definition: DBHandler.cpp:4743
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1904
specifies the content in-memory of a row in the column metadata table
int32_t max_num_sessions_
Definition: DBHandler.h:984
size_t g_max_log_length
Definition: Execute.cpp:172
void delete_dashboard(const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:4919
std::string getName() const
Definition: DBObject.h:219
static std::unique_ptr< SessionsStore > create(const std::string &base_path, size_t n_workers, int idle_session_duration, int max_session_duration, int capacity, DisconnectCallback disconnect_callback)
void get_session_info(TSessionInfo &_return, const TSessionId &session) override
Definition: DBHandler.cpp:958
QueryStateProxy createQueryStateProxy()
Definition: QueryState.cpp:139
size_t leafCount() const
static const int32_t EDIT_DASHBOARD
Definition: DBObject.h:104
static const int32_t DELETE_DASHBOARD
Definition: DBObject.h:102
std::pair< TPlanResult, lockmgr::LockedTableDescriptors > parse_to_ra(QueryStateProxy, const std::string &query_str, const std::vector< TFilterPushDownInfo > &filter_push_down_info, const bool acquire_locks, const SystemParameters &system_parameters, bool check_privileges=true)
Definition: DBHandler.cpp:6895
bool isOptimizedExplain() const
Definition: ParserWrapper.h:70
void create_link(std::string &_return, const TSessionId &session, const std::string &view_state, const std::string &view_metadata) override
Definition: DBHandler.cpp:5134
#define NULL_BOOLEAN
static const int32_t INSERT_INTO_TABLE
Definition: DBObject.h:89
void get_version(std::string &_return) override
Definition: DBHandler.cpp:2867
RecordBatchVector loadArrowStream(const std::string &stream)
Definition: DBHandler.cpp:3576
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:165
void get_tables_meta(std::vector< TTableMeta > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2828
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:187
int get_precision() const
Definition: sqltypes.h:394
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:822
const bool renderer_prefer_igpu_
Definition: DBHandler.h:963
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:952
DBObjectType getType() const
Definition: DBObject.h:220
std::string to_upper(const std::string &str)
void get_server_status(TServerStatus &_return, const TSessionId &session) override
Definition: DBHandler.cpp:859
void setResultType(RType type)
static bool gdalFileOrDirectoryExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5058
static ResultSet * create(std::vector< TargetMetaInfo > &label_infos, std::vector< RelLogicalValues::RowValues > &logical_values)
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3561
void load_table_binary_columnar(const TSessionId &session, const std::string &table_name, const std::vector< TColumn > &cols, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3468
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4804
bool isSelectExplain() const
Definition: ParserWrapper.h:58
static TDBObject serialize_db_object(const std::string &roleName, const DBObject &inObject)
Definition: DBHandler.cpp:1919
bool is_column() const
Definition: sqltypes.h:598
std::string thrift_to_encoding_name(const TTypeInfo &ti)
void fixup_geo_column_descriptor(TColumnType &col_type, const SQLTypes subtype, const int output_srid)
static const int32_t CREATE_SERVER
Definition: DBObject.h:126
std::string thrift_to_name(const TTypeInfo &ti)
RuntimeUdfRegistrationPolicy runtime_udf_registration_policy
std::vector< TargetMetaInfo > getTargetMetaInfo(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &targets) const
Definition: DBHandler.cpp:6280
std::string get_session_id() const
Definition: SessionInfo.h:93
bool isJustExplain() const
Definition: ParserWrapper.h:52
static void deallocateArrowResultBuffer(const ArrowResult &result, const ExecutorDeviceType device_type, const size_t device_id, std::shared_ptr< Data_Namespace::DataMgr > &data_mgr)
std::string geo_layer_name
Definition: CopyParams.h:81
const bool allow_loop_joins_
Definition: DBHandler.h:646
void loadKey()
Definition: DBObject.cpp:190
void start_query(TPendingQuery &_return, const TSessionId &leaf_session, const TSessionId &parent_session, const std::string &serialized_rel_alg_dag, const std::string &start_time_str, const bool just_explain, const std::vector< int64_t > &outer_fragment_indices) override
Definition: DBHandler.cpp:7035
const AccessPrivileges & getPrivileges() const
Definition: DBObject.h:226
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
bool isAlterSessionSet() const
bool file_or_glob_path_exists(const std::string &path)
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:655
std::optional< std::string > default_value
void load_table(const TSessionId &session, const std::string &table_name, const std::vector< TStringRow > &rows, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3710
Definition: sqltypes.h:79
Definition: sqltypes.h:80
void broadcast_serialized_rows(const TSerializedRows &serialized_rows, const TRowDescriptor &row_desc, const TQueryId query_id, const TSubqueryId subquery_id, const bool is_final_subquery_result) override
Definition: DBHandler.cpp:7088
std::vector< std::string > get_valid_groups(const TSessionId &session, int32_t dashboard_id, std::vector< std::string > groups)
Definition: DBHandler.cpp:4944
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1073
TExecuteMode::type getExecutionMode(const TSessionId &session)
Definition: DBHandler.cpp:3074
void clone_session(TSessionId &session2, const TSessionId &session1) override
Definition: DBHandler.cpp:768
TExtArgumentType::type to_thrift(const ExtArgumentType &t)
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
std::unordered_set< std::string > get_uc_compatible_table_names_by_column(const std::unordered_set< std::string > &uc_column_names, std::vector< std::string > &table_names, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:1795
void get_dashboard(TDashboard &_return, const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:4696
const bool enable_rendering_
Definition: DBHandler.h:962
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3563
std::string json() const
Definition: RequestInfo.cpp:22
void get_table_details(TTableDetails &_return, const TSessionId &session, const std::string &table_name) override
Definition: DBHandler.cpp:2435
void share_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5050
TPlanResult processCalciteRequest(QueryStateProxy, const std::shared_ptr< Catalog_Namespace::Catalog > &cat, const std::string &query_str, const std::vector< TFilterPushDownInfo > &filter_push_down_info, const SystemParameters &system_parameters, const bool check_privileges)
Definition: DBHandler.cpp:6862
void get_db_objects_for_grantee(std::vector< TDBObject > &_return, const TSessionId &session, const std::string &roleName) override
Definition: DBHandler.cpp:2133
ExecutionResult execute(bool read_only_mode)
void appendNameValuePairs(Pairs &&...pairs)
Definition: QueryState.h:312
void get_link_view(TFrontendView &_return, const TSessionId &session, const std::string &link) override
Definition: DBHandler.cpp:2639
const bool intel_jit_profile_
Definition: DBHandler.h:643
bool super_user_rights_
Definition: DBHandler.h:956
std::string format_num_bytes(const size_t bytes)
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
const bool renderer_enable_slab_allocation_
Definition: DBHandler.h:972
std::string returnQueueAction() const
bool is_date_in_days() const
Definition: sqltypes.h:1016
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:183
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
void disconnect(const TSessionId &session) override
Definition: DBHandler.cpp:719
void check_in_memory_system_table_query(const std::vector< std::vector< std::string >> &selected_tables)
Definition: DBHandler.cpp:6846
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
bool table_is_replicated(const TableDescriptor *td)
TCustomExpression create_thrift_obj_from_custom_expr(const CustomExpression &custom_expr, const Catalog &catalog)
Definition: DBHandler.cpp:4599
void pause_executor_queue(const TSessionId &session)
Definition: DBHandler.cpp:2950
Catalog & getCatalog() const
Definition: SessionInfo.h:75
static void convertRows(TQueryResult &_return, QueryStateProxy query_state_proxy, const std::vector< TargetMetaInfo > &targets, const ResultSet &results, const bool column_format, const int32_t first_n, const int32_t at_most_n)
Definition: DBHandler.cpp:6311
static ReadLock getReadLockForTable(Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:238
std::string sanitize_name(const std::string &name, const bool underscore=false)
void import_table(const TSessionId &session, const std::string &table_name, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5278
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:985
void register_runtime_extension_functions(const TSessionId &session, const std::vector< TUserDefinedFunction > &udfs, const std::vector< TUserDefinedTableFunction > &udtfs, const std::map< std::string, std::string > &device_ir_map) override
Definition: DBHandler.cpp:7583
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:657
static ArrayDatum composeNullPointCoords(const SQLTypeInfo &coords_ti, const SQLTypeInfo &geo_ti)
Definition: Importer.cpp:399
Basic constructors and methods of the row set interface.
void get_table_details_for_database(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const std::string &database_name) override
Definition: DBHandler.cpp:2448
void get_status(std::vector< TServerStatus > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:878
const std::vector< PushedDownFilterInfo > & getPushedDownFilterInfo() const
static const int32_t ACCESS
Definition: DBObject.h:81
void switch_database(const TSessionId &session, const std::string &dbname) override
Definition: DBHandler.cpp:747
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:632
void validateGroups(const std::vector< std::string > &groups)
Definition: DBHandler.cpp:4973
const bool read_only_
Definition: DBHandler.h:645
std::string s3_session_token
Definition: CopyParams.h:63
static const int32_t CREATE_DATABASE
Definition: DBObject.h:78
void check_table_not_sharded(const TableDescriptor *td)
Definition: DBHandler.cpp:3099
void executeDdl(TQueryResult &_return, const std::string &query_ra, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:8052
void shareOrUnshareDashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions, const bool do_share)
Definition: DBHandler.cpp:5006
void get_queries_info(std::vector< TQueryInfo > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7874
bool checkInMemorySystemTableQuery(const std::unordered_set< shared::TableKey > &tables_selected_from) const
Definition: DBHandler.cpp:8166
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4260
void get_runtime_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7676
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528
static const AccessPrivileges ACCESS
Definition: DBObject.h:153
void get_databases(std::vector< TDBInfo > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:3056
static const int32_t VIEW_DASHBOARD
Definition: DBObject.h:103
bool user_can_access_table(const Catalog_Namespace::SessionInfo &, const TableDescriptor *td, const AccessPrivileges acess_priv)
Definition: DBHandler.cpp:6416
static const AccessPrivileges ALL_TABLE
Definition: DBObject.h:157
std::vector< std::vector< std::string > > get_sample_rows(size_t n)
Definition: Importer.cpp:3464
void replace_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::string &dashboard_name, const std::string &dashboard_owner, const std::string &dashboard_state, const std::string &image_hash, const std::string &dashboard_metadata) override
Definition: DBHandler.cpp:4867
static const int32_t VIEW_SQL_EDITOR
Definition: DBObject.h:80
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:52
void get_result_row_for_pixel(TPixelTableRowResult &_return, const TSessionId &session, const int64_t widget_id, const TPixel &pixel, const std::map< std::string, std::vector< std::string >> &table_col_names, const bool column_format, const int32_t pixel_radius, const std::string &nonce) override
Definition: DBHandler.cpp:2317
const bool legacy_syntax_
Definition: DBHandler.h:658
void import_geo_table(const TSessionId &session, const std::string &table_name, const std::string &file_name, const TCopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params) override
Definition: DBHandler.cpp:5402
void check_read_only(const std::string &str)
Definition: DBHandler.cpp:580
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
static std::unique_ptr< RexLiteral > genLiteralStr(std::string val)
Definition: DBHandler.cpp:7752
void set_table_epoch_by_name(const TSessionId &session, const std::string &table_name, const int new_epoch) override
Definition: DBHandler.cpp:7369
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
void set_table_epochs(const TSessionId &session, const int32_t db_id, const std::vector< TTableEpochInfo > &table_epochs) override
Definition: DBHandler.cpp:7475
bool g_allow_system_dashboard_update
Definition: DBHandler.cpp:124
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:179
std::string returnCacheType() const
bool g_uniform_request_ids_per_thrift_call
Definition: DBHandler.cpp:125
static const int32_t DROP_TABLE
Definition: DBObject.h:87
#define NULL_TINYINT
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:177
RequestId request_id()
Definition: Logger.cpp:874
dictionary params
Definition: report.py:27
bool g_enable_filter_push_down
Definition: Execute.cpp:98
std::string find_first_geo_file_in_archive(const std::string &archive_path, const import_export::CopyParams &copy_params)
Definition: DBHandler.cpp:4289
std::vector< std::string > get_headers()
Definition: Importer.cpp:3479
void deallocate_df(const TSessionId &session, const TDataFrame &df, const TDeviceType::type device_type, const int32_t device_id) override
Definition: DBHandler.cpp:1562
static const int32_t INSERT_INTO_VIEW
Definition: DBObject.h:115
std::string raster_import_bands
Definition: CopyParams.h:89
static void registerExtensionFunctions(F register_extension_functions)
Definition: Execute.h:470
Catalog_Namespace::SessionInfo get_session_copy(const TSessionId &session_id)
Definition: DBHandler.cpp:6134
bool dashboard_exists(const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
Definition: DBHandler.cpp:152
bool has_role(const TSessionId &sessionId, const std::string &granteeName, const std::string &roleName) override
Definition: DBHandler.cpp:1897
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
int32_t create_custom_expression(const TSessionId &session, const TCustomExpression &custom_expression) override
Definition: DBHandler.cpp:4623
void render_vega(TRenderResult &_return, const TSessionId &session, const int64_t widget_id, const std::string &vega_json, const int32_t compression_level, const std::string &nonce) override
Definition: DBHandler.cpp:4521
std::map< const std::string, const PermissionFuncPtr > permissionFuncMap_
Definition: DBHandler.h:1032
std::unique_ptr< Catalog_Namespace::CustomExpression > create_custom_expr_from_thrift_obj(const TCustomExpression &t_custom_expr, const Catalog &catalog)
Definition: DBHandler.cpp:4580
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:656
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4222
void invalidate_cur_session(const TSessionId &parent_session, const TSessionId &leaf_session, const std::string &start_time_str, const std::string &label, bool for_running_query_kernel) override
Definition: DBHandler.cpp:3001
void add_vsi_network_prefix(std::string &path)
Definition: DBHandler.cpp:4181
#define CHECK(condition)
Definition: Logger.h:291
static const int32_t DELETE_FROM_VIEW
Definition: DBObject.h:117
bool is_geometry() const
Definition: sqltypes.h:595
#define DEBUG_TIMER(name)
Definition: Logger.h:412
static HashtableRecycler * getHashTableCache()
static const int32_t CREATE_TABLE
Definition: DBObject.h:86
static ImportStatus get_import_status(const std::string &id)
Definition: Importer.cpp:231
void check_table_consistency(TTableMeta &_return, const TSessionId &session, const int32_t table_id) override
Definition: DBHandler.cpp:7019
void execute_rel_alg_with_filter_push_down(ExecutionResult &_return, QueryStateProxy, std::string &query_ra, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool just_explain, const bool just_calcite_explain, const std::vector< PushedDownFilterInfo > &filter_push_down_requests)
Definition: DBHandler.cpp:6776
const int idle_session_duration_
Definition: DBHandler.h:959
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:191
void setExecutionTime(int64_t execution_time_ms)
static TDatum value_to_thrift(const TargetValue &tv, const SQLTypeInfo &ti)
Definition: DBHandler.cpp:1103
static bool is_allowed_on_dashboard(const Catalog_Namespace::SessionInfo &session_info, int32_t dashboard_id, AccessPrivileges requestedPermissions)
Definition: DBHandler.cpp:4563
void remove(const std::string &session_id)
Definition: DBHandler.h:1013
static const std::list< ColumnDescriptor > gdalToColumnDescriptors(const std::string &fileName, const bool is_raster, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4820
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438
static const int32_t CREATE_DASHBOARD
Definition: DBObject.h:101
#define NULL_SMALLINT
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:55
void start_heap_profile(const TSessionId &session) override
Definition: DBHandler.cpp:6082
bool g_cluster
void get_table_function_details(std::vector< TUserDefinedTableFunction > &_return, const TSessionId &session, const std::vector< std::string > &udtf_names) override
Definition: DBHandler.cpp:7717
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5130
static void pause_executor_queue()
Definition: Execute.cpp:5386
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
void get_device_parameters(std::map< std::string, std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7566
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:1022
auto getExecuteWriteLock()
void interruptQuery(const Catalog_Namespace::SessionInfo &session_info, const std::string &target_session)
Definition: DBHandler.cpp:7922
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89
void set_executor_device_type(ExecutorDeviceType t)
Definition: SessionInfo.h:92
bool hasTableAccessPrivileges(const TableDescriptor *td, const Catalog_Namespace::SessionInfo &session_info)
Definition: DBHandler.cpp:2658
Serializers for query engine types to/from thrift.
bool isCalciteExplain() const
Definition: ParserWrapper.h:71
std::list< DBSummary > DBSummaryList
Definition: SysCatalog.h:145
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:969
int32_t permissionType
Definition: DBObject.h:53
void log_system_cpu_memory_status(std::string const &query, const Catalog_Namespace::Catalog &cat)
Definition: DBHandler.cpp:3226
std::vector< std::string > local_glob_filter_sort_files(const std::string &file_path, const FilePathOptions &options, const bool recurse)
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void check_valid_column_names(const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
Definition: DBHandler.cpp:3105
Catalog_Namespace::SessionInfoPtr findCalciteSession(TSessionId const &) const
Definition: DBHandler.cpp:6110
bool allow_multifrag_
Definition: DBHandler.h:644
static const AccessPrivileges DELETE_DASHBOARD
Definition: DBObject.h:173
void get_internal_table_details(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const bool include_system_columns) override
Definition: DBHandler.cpp:2410
void get_roles(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:1879
Definition: sqltypes.h:72
static const int32_t SELECT_FROM_TABLE
Definition: DBObject.h:88
static const std::string MAPD_RELEASE
Definition: release.h:42
SQLTypeInfo columnType
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2656
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
void load_table_binary(const TSessionId &session, const std::string &table_name, const std::vector< TRow > &rows, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3324
bool isAlterSystemControlExecutorQueue() const
string name
Definition: setup.in.py:72
std::vector< TServerStatus > getLeafStatus(TSessionId session)
void clear_gpu_memory(const TSessionId &session) override
Definition: DBHandler.cpp:2887
size_t g_leaf_count
Definition: ParserNode.cpp:78
bool check_and_reset_in_memory_system_table(const Catalog &catalog, const TableDescriptor &td)
Definition: DBHandler.cpp:6825
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
void init_table_functions()
int8_t * numbersPtr
Definition: sqltypes.h:233
void get_physical_tables(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2716
void get_dashboards(std::vector< TDashboard > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:4721
std::pair< std::string, std::string > compileUdf(const std::string &udf_file_name) const
const AuthMetadata & authMetadata_
Definition: DBHandler.h:651
import_export::CopyParams thrift_to_copyparams(const TCopyParams &cp)
Definition: DBHandler.cpp:3854
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:88
static bool has_dashboard_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2019
TTypeInfo type_info_to_thrift(const SQLTypeInfo &ti)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
static const AccessPrivileges EDIT_DASHBOARD
Definition: DBObject.h:172
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:150
int cpu_threads()
Definition: thread_count.h:25
static const int32_t UPDATE_IN_TABLE
Definition: DBObject.h:90
static constexpr size_t kDefaultSampleRowsCount
std::string userLoggable() const
Definition: SysCatalog.cpp:158
DBHandler(const std::vector< LeafHostInfo > &db_leaves, const std::vector< LeafHostInfo > &string_leaves, const std::string &base_data_path, const bool allow_multifrag, const bool jit_debug, const bool intel_jit_profile, const bool read_only, const bool allow_loop_joins, const bool enable_rendering, const bool renderer_prefer_igpu, const unsigned renderer_vulkan_timeout_ms, const bool renderer_use_parallel_executors, const bool enable_auto_clear_render_mem, const int render_oom_retry_threshold, const size_t render_mem_bytes, const size_t max_concurrent_render_sessions, const size_t reserved_gpu_mem, const bool render_compositor_use_last_gpu, const bool renderer_enable_slab_allocation, const size_t num_reader_threads, const AuthMetadata &authMetadata, SystemParameters &system_parameters, const bool legacy_syntax, const int idle_session_duration, const int max_session_duration, const std::string &udf_filename, const std::string &clang_path, const std::vector< std::string > &clang_options, const File_Namespace::DiskCacheConfig &disk_cache_config, const bool is_new_db)
Definition: DBHandler.cpp:169
std::string s3_access_key
Definition: CopyParams.h:61
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
bool is_decimal() const
Definition: sqltypes.h:568
void sql_execute_df(TDataFrame &_return, const TSessionId &session, const std::string &query, const TDeviceType::type device_type, const int32_t device_id, const int32_t first_n, const TArrowTransport::type transport_method) override
Definition: DBHandler.cpp:1461
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
std::string columnName
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:84
int64_t getExecutionTime() const
void get_heap_profile(std::string &_return, const TSessionId &session) override
Definition: DBHandler.cpp:6117
RasterPointTransform raster_point_transform
Definition: CopyParams.h:91
bool hasPermission(int permission) const
Definition: DBObject.h:141
ExecutionResult getQueries(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:7795
void get_all_roles_for_user(std::vector< std::string > &_return, const TSessionId &session, const std::string &granteeName) override
Definition: DBHandler.cpp:2281
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:88
std::string const & sessionId() const
Definition: RequestInfo.h:40
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:86
std::string pg_shim(const std::string &query)
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4260
#define IS_GEO(T)
Definition: sqltypes.h:310
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:133
bool cpu_mode_only_
Definition: DBHandler.h:647
const std::string kInfoSchemaMigrationName
std::unique_ptr< Parser::Stmt > create_stmt_for_query(const std::string &queryStr, const Catalog_Namespace::SessionInfo &session_info)
std::string dump_table_col_names(const std::map< std::string, std::vector< std::string >> &table_col_names)
Definition: DBHandler.cpp:2304
static BoundingBoxIntersectTuningParamRecycler * getBoundingBoxIntersectTuningParamCache()
void get_dashboard_grantees(std::vector< TDashboardGrantees > &_return, const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:5088
void get_license_claims(TLicenseInfo &_return, const TSessionId &session, const std::string &nonce) override
Definition: DBHandler.cpp:7524
ExecutionResult getUserSessions(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:7757
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:880
void get_memory(std::vector< TNodeMemoryInfo > &_return, const TSessionId &session, const std::string &memory_level) override
Definition: DBHandler.cpp:3020
void add_vsi_geo_prefix(std::string &path)
Definition: DBHandler.cpp:4203
std::string credential_string
Definition: CopyParams.h:101
ConnectionInfo getConnectionInfo() const
Definition: DBHandler.cpp:1790
bool is_array() const
Definition: sqltypes.h:583
#define STDLOG(...)
Definition: QueryState.h:234
const std::string getQuerySubmittedTime() const
Definition: QueryState.cpp:101
#define VLOG(n)
Definition: Logger.h:388
void resizeDispatchQueue(size_t queue_size)
Definition: DBHandler.cpp:8162
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53
void execute_next_render_step(TRenderStepResult &_return, const TPendingRenderQuery &pending_render, const TRenderAggDataMap &merged_data) override
Definition: DBHandler.cpp:7312
void get_internal_table_details_for_database(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const std::string &database_name) override
Definition: DBHandler.cpp:2422
std::atomic< bool > isSuper
Definition: SysCatalog.h:107
std::string const createInMemoryCalciteSession(const std::shared_ptr< Catalog_Namespace::Catalog > &catalog_ptr)
Definition: DBHandler.cpp:586
bool isPlanExplain() const
Definition: ParserWrapper.h:78
void setRequestId(logger::RequestId const request_id)
Definition: RequestInfo.h:42
static const int32_t DROP_SERVER
Definition: DBObject.h:127
void alterSystemClear(const std::string &sesson_id, ExecutionResult &result, const std::string &cache_type, int64_t &execution_time_ms)
Definition: DBHandler.cpp:8011
void stop_heap_profile(const TSessionId &session) override
Definition: DBHandler.cpp:6096
std::pair< std::string_view, const char * > substring(const std::string &str, size_t substr_length)
return substring of str with postfix if str.size() &gt; substr_length
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:397
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:87
std::atomic< bool > initialized_
Definition: DBHandler.h:686
static void addRTUdfs(const std::string &json_func_sigs)
bool TTypeInfo_IsGeo(const TDatumType::type &t)
Definition: DBHandler.cpp:5371
int32_t create_dashboard(const TSessionId &session, const std::string &dashboard_name, const std::string &dashboard_state, const std::string &image_hash, const std::string &dashboard_metadata) override
Definition: DBHandler.cpp:4822
std::string get_hostname()
void interrupt(const TSessionId query_session, const TSessionId interrupt_session)
TEncodingType::type encoding_to_thrift(const SQLTypeInfo &type_info)
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139
EncodingType geo_coords_encoding
Definition: CopyParams.h:76
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4324
void clear_cpu_memory(const TSessionId &session) override
Definition: DBHandler.cpp:2911