OmniSciDB  f17484ade4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "LeafAggregator.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "Calcite/Calcite.h"
32 #include "Catalog/Catalog.h"
33 #include "Catalog/SessionsStore.h"
35 #include "Geospatial/Transforms.h"
36 #include "ImportExport/Importer.h"
37 #include "LockMgr/LockMgr.h"
38 #include "Logger/Logger.h"
39 #include "Parser/ParserNode.h"
40 #include "Parser/ParserWrapper.h"
44 #include "QueryEngine/Execute.h"
51 #include "Shared/StringTransform.h"
55 #include "Shared/measure.h"
56 #include "Shared/scope.h"
62 
63 #include <sys/types.h>
64 #include <thrift/server/TServer.h>
65 #include <thrift/transport/THttpClient.h>
66 #include <thrift/transport/TSocket.h>
67 #include <thrift/transport/TTransport.h>
68 #include <atomic>
69 #include <boost/algorithm/string.hpp>
70 #include <boost/algorithm/string/replace.hpp>
71 #include <boost/algorithm/string/trim.hpp>
72 #include <boost/filesystem.hpp>
73 #include <boost/make_shared.hpp>
74 #include <boost/noncopyable.hpp>
75 #include <boost/none_t.hpp>
76 #include <boost/optional.hpp>
77 #include <boost/program_options.hpp>
78 #include <boost/tokenizer.hpp>
79 #include <cmath>
80 #include <csignal>
81 #include <fstream>
82 #include <list>
83 #include <map>
84 #include <memory>
85 #include <mutex>
86 #include <random>
87 #include <string>
88 #include <thread>
89 #include <typeinfo>
90 #include <unordered_map>
91 
92 #include "gen-cpp/Heavy.h"
93 #include "gen-cpp/extension_functions_types.h"
94 
95 using namespace std::string_literals;
96 
97 class HeavyDBAggHandler;
98 class HeavyDBLeafHandler;
99 
100 // Multiple concurrent requests for the same session can occur. For that reason, each
101 // request briefly takes a lock to make a copy of the appropriate SessionInfo object. Then
102 // it releases the lock and uses the copy for the remainder of the request.
103 using SessionMap = std::map<TSessionId, std::shared_ptr<Catalog_Namespace::SessionInfo>>;
104 using PermissionFuncPtr = bool (*)(const AccessPrivileges&, const TDBObjectPermissions&);
106 
107 namespace dbhandler {
108 bool is_info_schema_db(const std::string& db_name);
109 
110 void check_not_info_schema_db(const std::string& db_name,
111  bool throw_db_exception = false);
112 } // namespace dbhandler
113 
114 class TrackingProcessor : public HeavyProcessor {
115  public:
116  TrackingProcessor(std::shared_ptr<HeavyIf> handler, const bool check_origin)
117  : HeavyProcessor(handler), check_origin_(check_origin) {}
118 
119  bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol> in,
120  std::shared_ptr<::apache::thrift::protocol::TProtocol> out,
121  void* connectionContext) override {
122  using namespace ::apache::thrift;
123 
124  auto transport = in->getTransport();
125  if (transport && check_origin_) {
126  static std::mutex processor_mutex;
127  std::lock_guard lock(processor_mutex);
128  const auto origin_str = transport->getOrigin();
129  std::vector<std::string> origins;
130  boost::split(origins, origin_str, boost::is_any_of(","));
131  if (origins.empty()) {
133  } else {
134  // Take the first origin, which should be the client IP before any intermediate
135  // servers (e.g. the web server)
136  auto trimmed_origin = origins.front();
137  boost::algorithm::trim(trimmed_origin);
138  TrackingProcessor::client_address = trimmed_origin;
139  }
140  if (dynamic_cast<transport::THttpTransport*>(transport.get())) {
142  } else if (dynamic_cast<transport::TBufferedTransport*>(transport.get())) {
144  } else {
146  }
147  } else {
149  }
150 
151  return HeavyProcessor::process(in, out, connectionContext);
152  }
153 
154  static thread_local std::string client_address;
155  static thread_local ClientProtocol client_protocol;
156 
157  private:
158  const bool check_origin_;
159 };
160 
161 namespace File_Namespace {
162 struct DiskCacheConfig;
163 }
164 
165 class DBHandler : public HeavyIf {
166  public:
167  DBHandler(const std::vector<LeafHostInfo>& db_leaves,
168  const std::vector<LeafHostInfo>& string_leaves,
169  const std::string& base_data_path,
170  const bool allow_multifrag,
171  const bool jit_debug,
172  const bool intel_jit_profile,
173  const bool read_only,
174  const bool allow_loop_joins,
175  const bool enable_rendering,
176  const bool renderer_prefer_igpu,
177  const unsigned renderer_vulkan_timeout_ms,
178  const bool renderer_use_parallel_executors,
179  const bool enable_auto_clear_render_mem,
180  const int render_oom_retry_threshold,
181  const size_t render_mem_bytes,
182  const size_t max_concurrent_render_sessions,
183  const size_t reserved_gpu_mem,
184  const bool render_compositor_use_last_gpu,
185  const size_t num_reader_threads,
186  const AuthMetadata& authMetadata,
187  SystemParameters& system_parameters,
188  const bool legacy_syntax,
189  const int idle_session_duration,
190  const int max_session_duration,
191  const std::string& udf_filename,
192  const std::string& clang_path,
193  const std::vector<std::string>& clang_options,
194 #ifdef ENABLE_GEOS
195  const std::string& libgeos_so_filename,
196 #endif
197 #ifdef HAVE_TORCH_TFS
198  const std::string& torch_lib_path,
199 #endif
200  const File_Namespace::DiskCacheConfig& disk_cache_config,
201  const bool is_new_db);
202  void initialize(const bool is_new_db);
203  ~DBHandler() override;
204 
205  static inline size_t max_bytes_for_thrift() {
206  return 2 * 1000 * 1000 * 1000LL;
207  }
208 
209  // Important ****
210  // This block must be keep in sync with mapd.thrift and HAHandler.h
211  // Please keep in same order for easy check and cut and paste
212  // Important ****
213 
214  void krb5_connect(TKrb5Session& session,
215  const std::string& token,
216  const std::string& dbname) override;
217  // connection, admin
218  void connect(TSessionId& session,
219  const std::string& username,
220  const std::string& passwd,
221  const std::string& dbname) override;
222  void disconnect(const TSessionId& session) override;
223  void switch_database(const TSessionId& session, const std::string& dbname) override;
224  void clone_session(TSessionId& session2, const TSessionId& session1) override;
225  void get_server_status(TServerStatus& _return, const TSessionId& session) override;
226  void get_status(std::vector<TServerStatus>& _return,
227  const TSessionId& session) override;
228  void get_hardware_info(TClusterHardwareInfo& _return,
229  const TSessionId& session) override;
230 
231  bool hasTableAccessPrivileges(const TableDescriptor* td,
232  const Catalog_Namespace::SessionInfo& session_info);
233  void get_tables(std::vector<std::string>& _return, const TSessionId& session) override;
234  void get_tables_for_database(std::vector<std::string>& _return,
235  const TSessionId& session,
236  const std::string& database_name) override;
237  void get_physical_tables(std::vector<std::string>& _return,
238  const TSessionId& session) override;
239  void get_views(std::vector<std::string>& _return, const TSessionId& session) override;
240  void get_tables_meta(std::vector<TTableMeta>& _return,
241  const TSessionId& session) override;
242  void get_table_details(TTableDetails& _return,
243  const TSessionId& session,
244  const std::string& table_name) override;
245  void get_table_details_for_database(TTableDetails& _return,
246  const TSessionId& session,
247  const std::string& table_name,
248  const std::string& database_name) override;
249  void get_internal_table_details(TTableDetails& _return,
250  const TSessionId& session,
251  const std::string& table_name,
252  const bool include_system_columns) override;
253  void get_internal_table_details_for_database(TTableDetails& _return,
254  const TSessionId& session,
255  const std::string& table_name,
256  const std::string& database_name) override;
257  void get_users(std::vector<std::string>& _return, const TSessionId& session) override;
258  void get_databases(std::vector<TDBInfo>& _return, const TSessionId& session) override;
259 
260  void get_version(std::string& _return) override;
261  void start_heap_profile(const TSessionId& session) override;
262  void stop_heap_profile(const TSessionId& session) override;
263  void get_heap_profile(std::string& _return, const TSessionId& session) override;
264  void get_memory(std::vector<TNodeMemoryInfo>& _return,
265  const TSessionId& session,
266  const std::string& memory_level) override;
267  void clear_cpu_memory(const TSessionId& session) override;
268  void clear_gpu_memory(const TSessionId& session) override;
269  void clearRenderMemory(const TSessionId& session); // it's not declared on thrifth
270  // and on persisten leaf client
271 
272  void pause_executor_queue(
273  const TSessionId& session); // Not implemented for persistent leaf client
274  void resume_executor_queue(
275  const TSessionId& session); // Not implemented for persistent leaf client
276 
277  void set_cur_session(const TSessionId& parent_session,
278  const TSessionId& leaf_session,
279  const std::string& start_time_str,
280  const std::string& label,
281  bool for_running_query_kernel) override;
282  void invalidate_cur_session(const TSessionId& parent_session,
283  const TSessionId& leaf_session,
284  const std::string& start_time_str,
285  const std::string& label,
286  bool for_running_query_kernel) override;
287  void set_table_epoch(const TSessionId& session,
288  const int db_id,
289  const int table_id,
290  const int new_epoch) override;
291  void set_table_epoch_by_name(const TSessionId& session,
292  const std::string& table_name,
293  const int new_epoch) override;
294  int32_t get_table_epoch(const TSessionId& session,
295  const int32_t db_id,
296  const int32_t table_id) override;
297  int32_t get_table_epoch_by_name(const TSessionId& session,
298  const std::string& table_name) override;
299  void get_table_epochs(std::vector<TTableEpochInfo>& _return,
300  const TSessionId& session,
301  const int32_t db_id,
302  const int32_t table_id) override;
303  void set_table_epochs(const TSessionId& session,
304  const int32_t db_id,
305  const std::vector<TTableEpochInfo>& table_epochs) override;
306 
307  void get_session_info(TSessionInfo& _return, const TSessionId& session) override;
308 
309  void set_leaf_info(const TSessionId& session, const TLeafInfo& info) override;
310 
311  void sql_execute(ExecutionResult& _return,
312  const TSessionId& session,
313  const std::string& query,
314  const bool column_format,
315  const int32_t first_n,
316  const int32_t at_most_n,
318  // query, render
319  void sql_execute(TQueryResult& _return,
320  const TSessionId& session,
321  const std::string& query,
322  const bool column_format,
323  const std::string& nonce,
324  const int32_t first_n,
325  const int32_t at_most_n) override;
326  void get_completion_hints(std::vector<TCompletionHint>& hints,
327  const TSessionId& session,
328  const std::string& sql,
329  const int cursor) override;
330  // TODO(miyu): merge the following two data frame APIs.
331  void sql_execute_df(TDataFrame& _return,
332  const TSessionId& session,
333  const std::string& query,
334  const TDeviceType::type device_type,
335  const int32_t device_id,
336  const int32_t first_n,
337  const TArrowTransport::type transport_method) override;
338  void sql_execute_gdf(TDataFrame& _return,
339  const TSessionId& session,
340  const std::string& query,
341  const int32_t device_id,
342  const int32_t first_n) override;
343  void deallocate_df(const TSessionId& session,
344  const TDataFrame& df,
345  const TDeviceType::type device_type,
346  const int32_t device_id) override;
347  void interrupt(const TSessionId& query_session,
348  const TSessionId& interrupt_session) override;
349  void sql_validate(TRowDescriptor& _return,
350  const TSessionId& session,
351  const std::string& query) override;
352  TExecuteMode::type getExecutionMode(const TSessionId& session);
353  void set_execution_mode(const TSessionId& session,
354  const TExecuteMode::type mode) override;
355  void render_vega(TRenderResult& _return,
356  const TSessionId& session,
357  const int64_t widget_id,
358  const std::string& vega_json,
359  const int32_t compression_level,
360  const std::string& nonce) override;
361  void get_result_row_for_pixel(
362  TPixelTableRowResult& _return,
363  const TSessionId& session,
364  const int64_t widget_id,
365  const TPixel& pixel,
366  const std::map<std::string, std::vector<std::string>>& table_col_names,
367  const bool column_format,
368  const int32_t pixel_radius,
369  const std::string& nonce) override;
370 
371  // custom expressions
372  int32_t create_custom_expression(const TSessionId& session,
373  const TCustomExpression& custom_expression) override;
374  void get_custom_expressions(std::vector<TCustomExpression>& _return,
375  const TSessionId& session) override;
376  void update_custom_expression(const TSessionId& session,
377  const int32_t id,
378  const std::string& expression_json) override;
379  void delete_custom_expressions(const TSessionId& session,
380  const std::vector<int32_t>& custom_expression_ids,
381  const bool do_soft_delete) override;
382 
383  // dashboards
384  void get_dashboard(TDashboard& _return,
385  const TSessionId& session,
386  const int32_t dashboard_id) override;
387  void get_dashboards(std::vector<TDashboard>& _return,
388  const TSessionId& session) override;
389  int32_t create_dashboard(const TSessionId& session,
390  const std::string& dashboard_name,
391  const std::string& dashboard_state,
392  const std::string& image_hash,
393  const std::string& dashboard_metadata) override;
394  void replace_dashboard(const TSessionId& session,
395  const int32_t dashboard_id,
396  const std::string& dashboard_name,
397  const std::string& dashboard_owner,
398  const std::string& dashboard_state,
399  const std::string& image_hash,
400  const std::string& dashboard_metadata) override;
401  void delete_dashboard(const TSessionId& session, const int32_t dashboard_id) override;
402  void share_dashboards(const TSessionId& session,
403  const std::vector<int32_t>& dashboard_ids,
404  const std::vector<std::string>& groups,
405  const TDashboardPermissions& permissions) override;
406  void delete_dashboards(const TSessionId& session,
407  const std::vector<int32_t>& dashboard_ids) override;
408  void share_dashboard(const TSessionId& session,
409  const int32_t dashboard_id,
410  const std::vector<std::string>& groups,
411  const std::vector<std::string>& objects,
412  const TDashboardPermissions& permissions,
413  const bool grant_role) override;
414  void unshare_dashboards(const TSessionId& session,
415  const std::vector<int32_t>& dashboard_ids,
416  const std::vector<std::string>& groups,
417  const TDashboardPermissions& permissions) override;
418  void unshare_dashboard(const TSessionId& session,
419  const int32_t dashboard_id,
420  const std::vector<std::string>& groups,
421  const std::vector<std::string>& objects,
422  const TDashboardPermissions& permissions) override;
423  void get_dashboard_grantees(std::vector<TDashboardGrantees>& _return,
424  const TSessionId& session,
425  const int32_t dashboard_id) override;
426 
427  void get_link_view(TFrontendView& _return,
428  const TSessionId& session,
429  const std::string& link) override;
430  void create_link(std::string& _return,
431  const TSessionId& session,
432  const std::string& view_state,
433  const std::string& view_metadata) override;
434  // import
435  void load_table_binary(const TSessionId& session,
436  const std::string& table_name,
437  const std::vector<TRow>& rows,
438  const std::vector<std::string>& column_names) override;
439 
440  void load_table_binary_columnar(const TSessionId& session,
441  const std::string& table_name,
442  const std::vector<TColumn>& cols,
443  const std::vector<std::string>& column_names) override;
444  void load_table_binary_arrow(const TSessionId& session,
445  const std::string& table_name,
446  const std::string& arrow_stream,
447  const bool use_column_names) override;
448 
449  void load_table(const TSessionId& session,
450  const std::string& table_name,
451  const std::vector<TStringRow>& rows,
452  const std::vector<std::string>& column_names) override;
453  void detect_column_types(TDetectResult& _return,
454  const TSessionId& session,
455  const std::string& file_name,
456  const TCopyParams& copy_params) override;
457  void create_table(const TSessionId& session,
458  const std::string& table_name,
459  const TRowDescriptor& row_desc,
460  const TCreateParams& create_params) override;
461  void import_table(const TSessionId& session,
462  const std::string& table_name,
463  const std::string& file_name,
464  const TCopyParams& copy_params) override;
465  void import_geo_table(const TSessionId& session,
466  const std::string& table_name,
467  const std::string& file_name,
468  const TCopyParams& copy_params,
469  const TRowDescriptor& row_desc,
470  const TCreateParams& create_params) override;
471  void import_table_status(TImportStatus& _return,
472  const TSessionId& session,
473  const std::string& import_id) override;
474  void get_first_geo_file_in_archive(std::string& _return,
475  const TSessionId& session,
476  const std::string& archive_path,
477  const TCopyParams& copy_params) override;
478  void get_all_files_in_archive(std::vector<std::string>& _return,
479  const TSessionId& session,
480  const std::string& archive_path,
481  const TCopyParams& copy_params) override;
482  void get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
483  const TSessionId& session,
484  const std::string& file_name,
485  const TCopyParams& copy_params) override;
486  // distributed
487  int64_t query_get_outer_fragment_count(const TSessionId& session,
488  const std::string& select_query) override;
489 
490  void check_table_consistency(TTableMeta& _return,
491  const TSessionId& session,
492  const int32_t table_id) override;
493  void start_query(TPendingQuery& _return,
494  const TSessionId& leaf_session,
495  const TSessionId& parent_session,
496  const std::string& serialized_rel_alg_dag,
497  const std::string& start_time_str,
498  const bool just_explain,
499  const std::vector<int64_t>& outer_fragment_indices) override;
500  void execute_query_step(TStepResult& _return,
501  const TPendingQuery& pending_query,
502  const TSubqueryId subquery_id,
503  const std::string& start_time_str) override;
504  void broadcast_serialized_rows(const TSerializedRows& serialized_rows,
505  const TRowDescriptor& row_desc,
506  const TQueryId query_id,
507  const TSubqueryId subquery_id,
508  const bool is_final_subquery_result) override;
509 
510  void start_render_query(TPendingRenderQuery& _return,
511  const TSessionId& session,
512  const int64_t widget_id,
513  const int16_t node_idx,
514  const std::string& vega_json) override;
515  void execute_next_render_step(TRenderStepResult& _return,
516  const TPendingRenderQuery& pending_render,
517  const TRenderAggDataMap& merged_data) override;
518 
519  void insert_data(const TSessionId& session, const TInsertData& insert_data) override;
520  void insert_chunks(const TSessionId& session,
521  const TInsertChunks& insert_chunks) override;
522  void checkpoint(const TSessionId& session, const int32_t table_id) override;
523  // DB Object Privileges
524  void get_roles(std::vector<std::string>& _return, const TSessionId& session) override;
525  bool has_role(const TSessionId& sessionId,
526  const std::string& granteeName,
527  const std::string& roleName) override;
528  bool has_object_privilege(const TSessionId& sessionId,
529  const std::string& granteeName,
530  const std::string& objectName,
531  const TDBObjectType::type object_type,
532  const TDBObjectPermissions& permissions) override;
533  void get_db_objects_for_grantee(std::vector<TDBObject>& _return,
534  const TSessionId& session,
535  const std::string& roleName) override;
536  void get_db_object_privs(std::vector<TDBObject>& _return,
537  const TSessionId& session,
538  const std::string& objectName,
539  const TDBObjectType::type type) override;
540  void get_all_roles_for_user(std::vector<std::string>& _return,
541  const TSessionId& session,
542  const std::string& granteeName) override;
543  void get_all_effective_roles_for_user(std::vector<std::string>& _return,
544  const TSessionId& session,
545  const std::string& granteeName) override;
546  std::vector<std::string> get_valid_groups(const TSessionId& session,
547  int32_t dashboard_id,
548  std::vector<std::string> groups);
549  // licensing
550  void set_license_key(TLicenseInfo& _return,
551  const TSessionId& session,
552  const std::string& key,
553  const std::string& nonce) override;
554  void get_license_claims(TLicenseInfo& _return,
555  const TSessionId& session,
556  const std::string& nonce) override;
557  // user-defined functions
558  /*
559  Returns a mapping of device (CPU, GPU) parameters (name, LLVM IR
560  triplet, features, etc)
561  */
562  void get_device_parameters(std::map<std::string, std::string>& _return,
563  const TSessionId& session) override;
564 
565  /*
566  Register Runtime Extension Functions (UDFs, UDTFs) with given
567  signatures. The extension functions implementations are given in a
568  mapping of a device and the corresponding LLVM/NVVM IR string.
569  */
570 
571  void register_runtime_extension_functions(
572  const TSessionId& session,
573  const std::vector<TUserDefinedFunction>& udfs,
574  const std::vector<TUserDefinedTableFunction>& udtfs,
575  const std::map<std::string, std::string>& device_ir_map) override;
576 
577  /*
578  Returns a list of User-Defined Function names available
579  */
580  void get_function_names(std::vector<std::string>& _return,
581  const TSessionId& session) override;
582 
583  /*
584  Returns a list of runtime User-Defined Function names available
585  */
586  void get_runtime_function_names(std::vector<std::string>& _return,
587  const TSessionId& session) override;
588 
589  /*
590  Returns a list of runtime User-Defined Function names available
591  */
592  void get_function_details(std::vector<TUserDefinedFunction>& _return,
593  const TSessionId& session,
594  const std::vector<std::string>& udf_names) override;
595 
596  /*
597  Returns a list of User-Defined Table Function names available
598  */
599  void get_table_function_names(std::vector<std::string>& _return,
600  const TSessionId& session) override;
601 
602  /*
603  Returns a list of runtime User-Defined Table Function names available
604  */
605  void get_runtime_table_function_names(std::vector<std::string>& _return,
606  const TSessionId& session) override;
607 
608  /*
609  Returns a list of User-Defined Table Function details
610  */
611  void get_table_function_details(std::vector<TUserDefinedTableFunction>& _return,
612  const TSessionId& session,
613  const std::vector<std::string>& udtf_names) override;
614 
615  // end of sync block for HAHandler and mapd.thrift
616 
617  void shutdown();
618  void emergency_shutdown();
619 
620  TSessionId getInvalidSessionId() const;
621 
622  void internal_connect(TSessionId& session,
623  const std::string& username,
624  const std::string& dbname);
625 
626  bool isAggregator() const;
627 
628  bool checkInMemorySystemTableQuery(
629  const std::unordered_set<shared::TableKey>& tables_selected_from) const;
630 
631  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
632 
634  std::vector<LeafHostInfo> db_leaves_;
635  std::vector<LeafHostInfo> string_leaves_;
636  const std::string base_data_path_;
637  boost::filesystem::path import_path_;
639  std::default_random_engine random_gen_;
640  std::uniform_int_distribution<int64_t> session_id_dist_;
641  const bool jit_debug_;
642  const bool intel_jit_profile_;
644  const bool read_only_;
645  const bool allow_loop_joins_;
648  std::mutex render_mutex_;
649  int64_t start_time_;
652  std::shared_ptr<QueryEngine> query_engine_;
653  std::unique_ptr<RenderHandler> render_handler_;
654  std::unique_ptr<HeavyDBAggHandler> agg_handler_;
655  std::unique_ptr<HeavyDBLeafHandler> leaf_handler_;
656  std::shared_ptr<Calcite> calcite_;
657  const bool legacy_syntax_;
658 
659  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
660 
661  template <typename... ARGS>
662  std::shared_ptr<query_state::QueryState> create_query_state(ARGS&&... args) {
663  return query_states_.create(std::forward<ARGS>(args)...);
664  }
665 
666  // Exactly one immutable SessionInfo copy should be taken by a typical request.
667  Catalog_Namespace::SessionInfo get_session_copy(const TSessionId& session_id);
668 
669  void get_tables_meta_impl(std::vector<TTableMeta>& _return,
670  QueryStateProxy query_state_proxy,
671  const Catalog_Namespace::SessionInfo& session_info,
672  const bool with_table_locks = true);
673 
674  // Visible for use in tests.
675  void resizeDispatchQueue(size_t queue_size);
676 
677  protected:
678  // Returns empty std::shared_ptr if session.empty().
679  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_ptr(
680  const TSessionId& session_id);
681 
682  ConnectionInfo getConnectionInfo() const;
683 
684  private:
685  std::atomic<bool> initialized_{false};
686  void init_executor_resource_mgr();
687  void validate_configurations();
688  std::shared_ptr<Catalog_Namespace::SessionInfo> create_new_session(
689  TSessionId& session,
690  const std::string& dbname,
691  const Catalog_Namespace::UserMetadata& user_meta,
692  std::shared_ptr<Catalog_Namespace::Catalog> cat);
693  void connect_impl(TSessionId& session,
694  const std::string& passwd,
695  const std::string& dbname,
696  const Catalog_Namespace::UserMetadata& user_meta,
697  std::shared_ptr<Catalog_Namespace::Catalog> cat,
698  query_state::StdLog& stdlog);
699  void disconnect_impl(Catalog_Namespace::SessionInfoPtr& session_ptr);
700  void check_table_load_privileges(const Catalog_Namespace::SessionInfo& session_info,
701  const std::string& table_name);
702  void get_tables_impl(std::vector<std::string>& table_names,
704  const GetTablesType get_tables_type,
705  const std::string& database_name = {});
706  void get_table_details_impl(TTableDetails& _return,
707  query_state::StdLog& stdlog,
708  const std::string& table_name,
709  const bool get_system,
710  const bool get_physical,
711  const std::string& database_name = {});
712  void getAllRolesForUserImpl(
713  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
714  std::vector<std::string>& roles,
715  const std::string& granteeName,
716  bool effective);
717  void check_read_only(const std::string& str);
718  void validateGroups(const std::vector<std::string>& groups);
719  void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo& session_info,
720  const std::vector<int32_t>& dashboard_ids);
721  void shareOrUnshareDashboards(const TSessionId& session,
722  const std::vector<int32_t>& dashboard_ids,
723  const std::vector<std::string>& groups,
724  const TDashboardPermissions& permissions,
725  const bool do_share);
726 
727  static void value_to_thrift_column(const TargetValue& tv,
728  const SQLTypeInfo& ti,
729  TColumn& column);
730  static TDatum value_to_thrift(const TargetValue& tv, const SQLTypeInfo& ti);
731 
732  std::pair<TPlanResult, lockmgr::LockedTableDescriptors> parse_to_ra(
734  const std::string& query_str,
735  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
736  const bool acquire_locks,
737  const SystemParameters& system_parameters,
738  bool check_privileges = true);
739 
740  void sql_execute_local(
741  TQueryResult& _return,
742  const QueryStateProxy& query_state_proxy,
743  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
744  const std::string& query_str,
745  const bool column_format,
746  const std::string& nonce,
747  const int32_t first_n,
748  const int32_t at_most_n,
749  const bool use_calcite);
750 
751  int64_t process_deferred_copy_from(const TSessionId& session_id);
752 
753  static void convertData(TQueryResult& _return,
755  const QueryStateProxy& query_state_proxy,
756  const bool column_format,
757  const int32_t first_n,
758  const int32_t at_most_n);
759 
760  void sql_execute_impl(ExecutionResult& _return,
762  const bool column_format,
763  const ExecutorDeviceType executor_device_type,
764  const int32_t first_n,
765  const int32_t at_most_n,
766  const bool use_calcite,
768 
770  const TableDescriptor* td,
771  const AccessPrivileges acess_priv);
772 
773  void execute_distributed_copy_statement(
775  const Catalog_Namespace::SessionInfo& session_info);
776 
777  TPlanResult processCalciteRequest(
779  const std::shared_ptr<Catalog_Namespace::Catalog>& cat,
780  const std::string& query_str,
781  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
782  const SystemParameters& system_parameters,
783  const bool check_privileges);
784 
785  TRowDescriptor validateRelAlg(const std::string& query_ra,
786  QueryStateProxy query_state_proxy);
787 
788  void dispatch_query_task(std::shared_ptr<QueryDispatchQueue::Task> query_task,
789  const bool is_update_delete);
790 
791  std::vector<PushedDownFilterInfo> execute_rel_alg(
792  ExecutionResult& _return,
794  const std::string& query_ra,
795  const bool column_format,
796  const ExecutorDeviceType executor_device_type,
797  const int32_t first_n,
798  const int32_t at_most_n,
799  const bool just_validate,
800  const bool find_push_down_candidates,
801  const ExplainInfo& explain_info,
802  const std::optional<size_t> executor_index = std::nullopt) const;
803 
804  void execute_rel_alg_with_filter_push_down(
805  ExecutionResult& _return,
807  std::string& query_ra,
808  const bool column_format,
809  const ExecutorDeviceType executor_device_type,
810  const int32_t first_n,
811  const int32_t at_most_n,
812  const bool just_explain,
813  const bool just_calcite_explain,
814  const std::vector<PushedDownFilterInfo>& filter_push_down_requests);
815 
816  void executeDdl(TQueryResult& _return,
817  const std::string& query_ra,
818  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
819 
820  void executeDdl(ExecutionResult& _return,
821  const std::string& query_ra,
822  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
823 
824  TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog* cat,
825  const ColumnDescriptor* cd);
826 
827  void set_execution_mode_nolock(Catalog_Namespace::SessionInfo* session_ptr,
828  const TExecuteMode::type mode);
829  char unescape_char(std::string str);
830  import_export::CopyParams thrift_to_copyparams(const TCopyParams& cp);
831  TCopyParams copyparams_to_thrift(const import_export::CopyParams& cp);
832  void check_geospatial_files(const boost::filesystem::path file_path,
833  const import_export::CopyParams& copy_params);
834  void render_rel_alg(TRenderResult& _return,
835  const std::string& query_ra,
836  const std::string& query_str,
837  const Catalog_Namespace::SessionInfo& session_info,
838  const std::string& render_type,
839  const bool is_projection_query);
840 
841  TColumnType create_geo_column(const TDatumType::type type,
842  const std::string& name,
843  const bool is_array);
844 
845  static void convertExplain(TQueryResult& _return,
846  const ResultSet& results,
847  const bool column_format);
848  static void convertResult(TQueryResult& _return,
849  const ResultSet& results,
850  const bool column_format);
851 
852  static void convertRows(TQueryResult& _return,
853  QueryStateProxy query_state_proxy,
854  const std::vector<TargetMetaInfo>& targets,
855  const ResultSet& results,
856  const bool column_format,
857  const int32_t first_n,
858  const int32_t at_most_n);
859 
860  // Use ExecutionResult to populate a TQueryResult
861  // calls convertRows, but after some setup using session_info
862  void convertResultSet(ExecutionResult& result,
863  const Catalog_Namespace::SessionInfo& session_info,
864  const std::string& query_state_str,
865  TQueryResult& _return);
866 
867  static void createSimpleResult(TQueryResult& _return,
868  const ResultSet& results,
869  const bool column_format,
870  const std::string label);
871 
872  std::vector<TargetMetaInfo> getTargetMetaInfo(
873  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
874 
875  std::vector<std::string> getTargetNames(
876  const std::vector<TargetMetaInfo>& targets) const;
877 
878  std::vector<std::string> getTargetNames(
879  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
880 
881  void get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
882  std::vector<std::string>& visible_tables,
883  query_state::StdLog& stdlog,
884  const std::string& sql,
885  const int cursor);
886  void get_token_based_completions(std::vector<TCompletionHint>& hints,
887  query_state::StdLog& stdlog,
888  std::vector<std::string>& visible_tables,
889  const std::string& sql,
890  const int cursor);
891 
892  std::unordered_map<std::string, std::unordered_set<std::string>>
893  fill_column_names_by_table(std::vector<std::string>& table_names,
894  query_state::StdLog& stdlog);
895 
896  TDashboard get_dashboard_impl(
897  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
899  const DashboardDescriptor* dash,
900  const bool populate_state = true);
901 
902  static bool has_database_permission(const AccessPrivileges& privs,
903  const TDBObjectPermissions& permissions);
904  static bool has_table_permission(const AccessPrivileges& privs,
905  const TDBObjectPermissions& permission);
906  static bool has_dashboard_permission(const AccessPrivileges& privs,
907  const TDBObjectPermissions& permissions);
908  static bool has_view_permission(const AccessPrivileges& privs,
909  const TDBObjectPermissions& permissions);
910  static bool has_server_permission(const AccessPrivileges& privs,
911  const TDBObjectPermissions& permissions);
912  // For the provided upper case column names `uc_column_names`, return
913  // the tables from `table_names` which contain at least one of them.
914  // Used to rank the TABLE auto-completion hints by the columns
915  // specified in the projection.
916  std::unordered_set<std::string> get_uc_compatible_table_names_by_column(
917  const std::unordered_set<std::string>& uc_column_names,
918  std::vector<std::string>& table_names,
919  query_state::StdLog& stdlog);
920 
921  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
922  prepare_loader_generic(
923  const Catalog_Namespace::SessionInfo& session_info,
924  const std::string& table_name,
925  size_t num_cols,
926  std::unique_ptr<import_export::Loader>* loader,
927  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
928  const std::vector<std::string>& column_names,
929  std::string load_type);
930 
931  void fillGeoColumns(
932  const TSessionId& session,
933  const Catalog_Namespace::Catalog& catalog,
934  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
935  const ColumnDescriptor* cd,
936  size_t& col_idx,
937  size_t num_rows,
938  const std::string& table_name);
939 
940  void fillMissingBuffers(
941  const TSessionId& session,
942  const Catalog_Namespace::Catalog& catalog,
943  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
944  const std::list<const ColumnDescriptor*>& cds,
945  const std::vector<int>& desc_id_to_column_id,
946  size_t num_rows,
947  const std::string& table_name);
948 
950  std::unordered_map<std::string, Catalog_Namespace::SessionInfoPtr> calcite_sessions_;
952 
953  Catalog_Namespace::SessionInfoPtr findCalciteSession(TSessionId const&) const;
954 
955  bool super_user_rights_; // default is "false"; setting to "true"
956  // ignores passwd checks in "connect(..)"
957  // method
958  const int idle_session_duration_; // max duration of idle session
959  const int max_session_duration_; // max duration of session
960 
961  const bool enable_rendering_;
963  const unsigned renderer_vulkan_timeout_;
968  const size_t reserved_gpu_mem_;
970  const size_t render_mem_bytes_;
971  const size_t num_reader_threads_;
972 #ifdef ENABLE_GEOS
973  const std::string& libgeos_so_filename_;
974 #endif
975 #ifdef HAVE_TORCH_TFS
976  const std::string& torch_lib_path_;
977 #endif
979  const std::string& udf_filename_;
980  const std::string& clang_path_;
981  const std::vector<std::string>& clang_options_;
982  int32_t max_num_sessions_{-1};
983  std::unique_ptr<Catalog_Namespace::SessionsStore> sessions_store_;
984 
986  std::string table;
987  std::string file_name;
989  std::string partitions;
990  };
991 
993  std::unordered_map<std::string, DeferredCopyFromState> was_deferred_copy_from;
995 
996  std::optional<DeferredCopyFromState> operator()(const std::string& session_id) {
997  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
998  auto itr = was_deferred_copy_from.find(session_id);
999  if (itr == was_deferred_copy_from.end()) {
1000  return std::nullopt;
1001  }
1002  return itr->second;
1003  }
1004 
1005  void add(const std::string& session_id, const DeferredCopyFromState& state) {
1006  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1007  const auto ret = was_deferred_copy_from.insert(std::make_pair(session_id, state));
1008  CHECK(ret.second);
1009  }
1010 
1011  void remove(const std::string& session_id) {
1012  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1013  was_deferred_copy_from.erase(session_id);
1014  }
1015  };
1017 
1018  // Only for IPC device memory deallocation
1019  mutable std::mutex handle_to_dev_ptr_mutex_;
1020  mutable std::unordered_map<std::string, std::string> ipc_handle_to_dev_ptr_;
1021 
1022  friend void run_warmup_queries(std::shared_ptr<DBHandler> handler,
1023  std::string base_path,
1024  std::string query_file_path);
1025 
1026  friend class RenderHandler::Impl;
1027  friend class HeavyDBAggHandler;
1028  friend class HeavyDBLeafHandler;
1029 
1030  std::map<const std::string, const PermissionFuncPtr> permissionFuncMap_ = {
1031  {"database"s, has_database_permission},
1032  {"dashboard"s, has_dashboard_permission},
1033  {"table"s, has_table_permission},
1034  {"view"s, has_view_permission},
1035  {"server"s, has_server_permission}};
1036 
1037  void check_and_invalidate_sessions(Parser::DDLStmt* ddl);
1038 
1039  std::string const createInMemoryCalciteSession(
1040  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr);
1041  void removeInMemoryCalciteSession(const std::string& session_id);
1042 
1043  ExecutionResult getUserSessions(
1044  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1045 
1046  // getQueries returns a set of queries queued in the DB
1047  // that belongs to the same DB in the caller's session
1048 
1049  ExecutionResult getQueries(
1050  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1051 
1052  void get_queries_info(std::vector<TQueryInfo>& _return,
1053  const TSessionId& session) override;
1054 
1055  // this function passes the interrupt request to the DB executor
1056  void interruptQuery(const Catalog_Namespace::SessionInfo& session_info,
1057  const std::string& target_session);
1058 
1059  void alterSystemClear(const std::string& sesson_id,
1061  const std::string& cache_type,
1062  int64_t& execution_time_ms);
1063 
1064  void alterSession(const std::string& sesson_id,
1066  const std::pair<std::string, std::string>& session_parameter,
1067  int64_t& execution_time_ms);
1068 
1069  TRole::type getServerRole() const;
1070 
1072 
1073  void importGeoTableGlobFilterSort(const TSessionId& session,
1074  const std::string& table_name,
1075  const std::string& file_name,
1076  const import_export::CopyParams& copy_params,
1077  const TRowDescriptor& row_desc,
1078  const TCreateParams& create_params);
1079 
1080  void importGeoTableSingle(const TSessionId& session,
1081  const std::string& table_name,
1082  const std::string& file_name,
1083  const import_export::CopyParams& copy_params,
1084  const TRowDescriptor& row_desc,
1085  const TCreateParams& create_params);
1086 
1087  void resetSessionsStore();
1088 };
std::lock_guard< T > lock_guard
Classes used to wrap parser calls for calcite redirection.
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:635
auto get_users(SysCatalog &syscat, std::unique_ptr< SqliteConnector > &sqliteConnector, const int32_t dbId=-1)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:981
boost::filesystem::path import_path_
Definition: DBHandler.h:637
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:659
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:272
ClientProtocol
const bool renderer_use_parallel_executors_
Definition: DBHandler.h:964
const std::string & udf_filename_
Definition: DBHandler.h:979
std::string cat(Ts &&...args)
const int render_oom_retry_threshold_
Definition: DBHandler.h:966
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
void run_warmup_queries(std::shared_ptr< DBHandler > handler, std::string base_path, std::string query_file_path)
Definition: HeavyDB.cpp:210
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:1019
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:662
static thread_local std::string client_address
Definition: DBHandler.h:154
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:1005
bool user_can_access_table(const Catalog_Namespace::SessionInfo &session_info, const TableDescriptor *td, const AccessPrivileges access_priv)
bool(*)(const AccessPrivileges &, const TDBObjectPermissions &) PermissionFuncPtr
Definition: DBHandler.h:104
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:155
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:633
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:963
const std::string base_data_path_
Definition: DBHandler.h:636
const bool jit_debug_
Definition: DBHandler.h:641
const bool check_origin_
Definition: DBHandler.h:158
const size_t render_mem_bytes_
Definition: DBHandler.h:970
std::map< TSessionId, std::shared_ptr< Catalog_Namespace::SessionInfo >> SessionMap
Definition: DBHandler.h:103
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:1016
ExecutorDeviceType
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:649
import_export::CopyParams copy_params
Definition: DBHandler.h:988
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:950
tuple rows
Definition: report.py:114
This file contains the class specification and related data structures for Catalog.
std::mutex render_mutex_
Definition: DBHandler.h:648
static size_t max_bytes_for_thrift()
Definition: DBHandler.h:205
query_state::QueryStates query_states_
Definition: DBHandler.h:949
Supported runtime functions management and retrieval.
const size_t reserved_gpu_mem_
Definition: DBHandler.h:968
std::optional< DeferredCopyFromState > operator()(const std::string &session_id)
Definition: DBHandler.h:996
Classes representing a parse tree.
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:969
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4703
GetTablesType
Definition: Catalog.h:63
const int max_session_duration_
Definition: DBHandler.h:959
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:638
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:634
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:978
const std::string & clang_path_
Definition: DBHandler.h:980
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:653
Checked json field retrieval.
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:652
SystemParameters & system_parameters_
Definition: DBHandler.h:651
const size_t num_reader_threads_
Definition: DBHandler.h:971
specifies the content in-memory of a row in the column metadata table
const bool enable_auto_clear_render_mem_
Definition: DBHandler.h:965
const bool renderer_prefer_igpu_
Definition: DBHandler.h:962
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:951
std::map< std::string, std::string > get_device_parameters(bool cpu_only)
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4698
const bool allow_loop_joins_
Definition: DBHandler.h:645
heavyai::shared_mutex sessions_mutex_
Definition: DBHandler.h:647
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:654
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1071
const bool enable_rendering_
Definition: DBHandler.h:961
std::unordered_map< std::string, DeferredCopyFromState > was_deferred_copy_from
Definition: DBHandler.h:993
const bool intel_jit_profile_
Definition: DBHandler.h:642
bool super_user_rights_
Definition: DBHandler.h:955
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:983
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:656
void shutdown()
Definition: Logger.cpp:401
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:631
const bool read_only_
Definition: DBHandler.h:644
bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol > in, std::shared_ptr<::apache::thrift::protocol::TProtocol > out, void *connectionContext) override
Definition: DBHandler.h:119
const bool legacy_syntax_
Definition: DBHandler.h:657
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:655
#define CHECK(condition)
Definition: Logger.h:291
const int idle_session_duration_
Definition: DBHandler.h:958
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:1020
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:967
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
bool allow_multifrag_
Definition: DBHandler.h:643
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
const AuthMetadata & authMetadata_
Definition: DBHandler.h:650
TrackingProcessor(std::shared_ptr< HeavyIf > handler, const bool check_origin)
Definition: DBHandler.h:116
bool cpu_mode_only_
Definition: DBHandler.h:646
std::default_random_engine random_gen_
Definition: DBHandler.h:639
std::uniform_int_distribution< int64_t > session_id_dist_
Definition: DBHandler.h:640
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27