OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "LeafAggregator.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "Calcite/Calcite.h"
32 #include "Catalog/Catalog.h"
33 #include "Catalog/SessionsStore.h"
35 #include "Geospatial/Transforms.h"
36 #include "ImportExport/Importer.h"
38 #include "LockMgr/LockMgr.h"
39 #include "Logger/Logger.h"
40 #include "Parser/ParserNode.h"
41 #include "Parser/ParserWrapper.h"
45 #include "QueryEngine/Execute.h"
52 #include "Shared/StringTransform.h"
56 #include "Shared/measure.h"
57 #include "Shared/scope.h"
63 
64 #include <sys/types.h>
65 #include <thrift/server/TServer.h>
66 #include <thrift/transport/THttpClient.h>
67 #include <thrift/transport/TSocket.h>
68 #include <thrift/transport/TTransport.h>
69 #include <atomic>
70 #include <boost/algorithm/string.hpp>
71 #include <boost/algorithm/string/replace.hpp>
72 #include <boost/algorithm/string/trim.hpp>
73 #include <boost/filesystem.hpp>
74 #include <boost/make_shared.hpp>
75 #include <boost/noncopyable.hpp>
76 #include <boost/none_t.hpp>
77 #include <boost/optional.hpp>
78 #include <boost/program_options.hpp>
79 #include <boost/tokenizer.hpp>
80 #include <cmath>
81 #include <csignal>
82 #include <fstream>
83 #include <list>
84 #include <map>
85 #include <memory>
86 #include <mutex>
87 #include <random>
88 #include <string>
89 #include <thread>
90 #include <typeinfo>
91 #include <unordered_map>
92 
93 #include "gen-cpp/Heavy.h"
94 #include "gen-cpp/extension_functions_types.h"
95 
96 using namespace std::string_literals;
97 
98 class HeavyDBAggHandler;
99 class HeavyDBLeafHandler;
100 
101 // Multiple concurrent requests for the same session can occur. For that reason, each
102 // request briefly takes a lock to make a copy of the appropriate SessionInfo object. Then
103 // it releases the lock and uses the copy for the remainder of the request.
104 using SessionMap = std::map<TSessionId, std::shared_ptr<Catalog_Namespace::SessionInfo>>;
105 using PermissionFuncPtr = bool (*)(const AccessPrivileges&, const TDBObjectPermissions&);
107 
108 namespace dbhandler {
109 bool is_info_schema_db(const std::string& db_name);
110 
111 void check_not_info_schema_db(const std::string& db_name,
112  bool throw_db_exception = false);
113 } // namespace dbhandler
114 
115 class TrackingProcessor : public HeavyProcessor {
116  public:
117  TrackingProcessor(std::shared_ptr<HeavyIf> handler, const bool check_origin)
118  : HeavyProcessor(handler), check_origin_(check_origin) {}
119 
120  bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol> in,
121  std::shared_ptr<::apache::thrift::protocol::TProtocol> out,
122  void* connectionContext) override {
123  using namespace ::apache::thrift;
124 
125  auto transport = in->getTransport();
126  if (transport && check_origin_) {
127  static std::mutex processor_mutex;
128  std::lock_guard lock(processor_mutex);
129  const auto origin_str = transport->getOrigin();
130  std::vector<std::string> origins;
131  boost::split(origins, origin_str, boost::is_any_of(","));
132  if (origins.empty()) {
134  } else {
135  // Take the first origin, which should be the client IP before any intermediate
136  // servers (e.g. the web server)
137  auto trimmed_origin = origins.front();
138  boost::algorithm::trim(trimmed_origin);
139  TrackingProcessor::client_address = trimmed_origin;
140  }
141  if (dynamic_cast<transport::THttpTransport*>(transport.get())) {
143  } else if (dynamic_cast<transport::TBufferedTransport*>(transport.get())) {
145  } else {
147  }
148  } else {
150  }
151 
152  return HeavyProcessor::process(in, out, connectionContext);
153  }
154 
155  static thread_local std::string client_address;
156  static thread_local ClientProtocol client_protocol;
157 
158  private:
159  const bool check_origin_;
160 };
161 
162 namespace File_Namespace {
163 struct DiskCacheConfig;
164 }
165 
166 class DBHandler : public HeavyIf {
167  public:
168  DBHandler(const std::vector<LeafHostInfo>& db_leaves,
169  const std::vector<LeafHostInfo>& string_leaves,
170  const std::string& base_data_path,
171  const bool allow_multifrag,
172  const bool jit_debug,
173  const bool intel_jit_profile,
174  const bool read_only,
175  const bool allow_loop_joins,
176  const bool enable_rendering,
177  const bool renderer_use_ppll_polys,
178  const bool renderer_prefer_igpu,
179  const unsigned renderer_vulkan_timeout_ms,
180  const bool renderer_use_parallel_executors,
181  const bool enable_auto_clear_render_mem,
182  const int render_oom_retry_threshold,
183  const size_t render_mem_bytes,
184  const size_t max_concurrent_render_sessions,
185  const size_t reserved_gpu_mem,
186  const bool render_compositor_use_last_gpu,
187  const size_t num_reader_threads,
188  const AuthMetadata& authMetadata,
189  SystemParameters& system_parameters,
190  const bool legacy_syntax,
191  const int idle_session_duration,
192  const int max_session_duration,
193  const std::string& udf_filename,
194  const std::string& clang_path,
195  const std::vector<std::string>& clang_options,
196 #ifdef ENABLE_GEOS
197  const std::string& libgeos_so_filename,
198 #endif
199  const File_Namespace::DiskCacheConfig& disk_cache_config,
200  const bool is_new_db);
201  void initialize(const bool is_new_db);
202  ~DBHandler() override;
203 
204  static inline size_t max_bytes_for_thrift() { return 2 * 1000 * 1000 * 1000LL; }
205 
206  // Important ****
207  // This block must be keep in sync with mapd.thrift and HAHandler.h
208  // Please keep in same order for easy check and cut and paste
209  // Important ****
210 
211  void krb5_connect(TKrb5Session& session,
212  const std::string& token,
213  const std::string& dbname) override;
214  // connection, admin
215  void connect(TSessionId& session,
216  const std::string& username,
217  const std::string& passwd,
218  const std::string& dbname) override;
219  void disconnect(const TSessionId& session) override;
220  void switch_database(const TSessionId& session, const std::string& dbname) override;
221  void clone_session(TSessionId& session2, const TSessionId& session1) override;
222  void get_server_status(TServerStatus& _return, const TSessionId& session) override;
223  void get_status(std::vector<TServerStatus>& _return,
224  const TSessionId& session) override;
225  void get_hardware_info(TClusterHardwareInfo& _return,
226  const TSessionId& session) override;
227 
228  bool hasTableAccessPrivileges(const TableDescriptor* td,
229  const Catalog_Namespace::SessionInfo& session_info);
230  void get_tables(std::vector<std::string>& _return, const TSessionId& session) override;
231  void get_tables_for_database(std::vector<std::string>& _return,
232  const TSessionId& session,
233  const std::string& database_name) override;
234  void get_physical_tables(std::vector<std::string>& _return,
235  const TSessionId& session) override;
236  void get_views(std::vector<std::string>& _return, const TSessionId& session) override;
237  void get_tables_meta(std::vector<TTableMeta>& _return,
238  const TSessionId& session) override;
239  void get_table_details(TTableDetails& _return,
240  const TSessionId& session,
241  const std::string& table_name) override;
242  void get_table_details_for_database(TTableDetails& _return,
243  const TSessionId& session,
244  const std::string& table_name,
245  const std::string& database_name) override;
246  void get_internal_table_details(TTableDetails& _return,
247  const TSessionId& session,
248  const std::string& table_name,
249  const bool include_system_columns) override;
250  void get_internal_table_details_for_database(TTableDetails& _return,
251  const TSessionId& session,
252  const std::string& table_name,
253  const std::string& database_name) override;
254  void get_users(std::vector<std::string>& _return, const TSessionId& session) override;
255  void get_databases(std::vector<TDBInfo>& _return, const TSessionId& session) override;
256 
257  void get_version(std::string& _return) override;
258  void start_heap_profile(const TSessionId& session) override;
259  void stop_heap_profile(const TSessionId& session) override;
260  void get_heap_profile(std::string& _return, const TSessionId& session) override;
261  void get_memory(std::vector<TNodeMemoryInfo>& _return,
262  const TSessionId& session,
263  const std::string& memory_level) override;
264  void clear_cpu_memory(const TSessionId& session) override;
265  void clear_gpu_memory(const TSessionId& session) override;
266  void clearRenderMemory(const TSessionId& session); // it's not declared on thrifth
267  // and on persisten leaf client
268  void set_cur_session(const TSessionId& parent_session,
269  const TSessionId& leaf_session,
270  const std::string& start_time_str,
271  const std::string& label,
272  bool for_running_query_kernel) override;
273  void invalidate_cur_session(const TSessionId& parent_session,
274  const TSessionId& leaf_session,
275  const std::string& start_time_str,
276  const std::string& label,
277  bool for_running_query_kernel) override;
278  void set_table_epoch(const TSessionId& session,
279  const int db_id,
280  const int table_id,
281  const int new_epoch) override;
282  void set_table_epoch_by_name(const TSessionId& session,
283  const std::string& table_name,
284  const int new_epoch) override;
285  int32_t get_table_epoch(const TSessionId& session,
286  const int32_t db_id,
287  const int32_t table_id) override;
288  int32_t get_table_epoch_by_name(const TSessionId& session,
289  const std::string& table_name) override;
290  void get_table_epochs(std::vector<TTableEpochInfo>& _return,
291  const TSessionId& session,
292  const int32_t db_id,
293  const int32_t table_id) override;
294  void set_table_epochs(const TSessionId& session,
295  const int32_t db_id,
296  const std::vector<TTableEpochInfo>& table_epochs) override;
297 
298  void get_session_info(TSessionInfo& _return, const TSessionId& session) override;
299 
300  void set_leaf_info(const TSessionId& session, const TLeafInfo& info) override;
301 
302  void sql_execute(ExecutionResult& _return,
303  const TSessionId& session,
304  const std::string& query,
305  const bool column_format,
306  const int32_t first_n,
307  const int32_t at_most_n,
309  // query, render
310  void sql_execute(TQueryResult& _return,
311  const TSessionId& session,
312  const std::string& query,
313  const bool column_format,
314  const std::string& nonce,
315  const int32_t first_n,
316  const int32_t at_most_n) override;
317  void get_completion_hints(std::vector<TCompletionHint>& hints,
318  const TSessionId& session,
319  const std::string& sql,
320  const int cursor) override;
321  // TODO(miyu): merge the following two data frame APIs.
322  void sql_execute_df(TDataFrame& _return,
323  const TSessionId& session,
324  const std::string& query,
325  const TDeviceType::type device_type,
326  const int32_t device_id,
327  const int32_t first_n,
328  const TArrowTransport::type transport_method) override;
329  void sql_execute_gdf(TDataFrame& _return,
330  const TSessionId& session,
331  const std::string& query,
332  const int32_t device_id,
333  const int32_t first_n) override;
334  void deallocate_df(const TSessionId& session,
335  const TDataFrame& df,
336  const TDeviceType::type device_type,
337  const int32_t device_id) override;
338  void interrupt(const TSessionId& query_session,
339  const TSessionId& interrupt_session) override;
340  void sql_validate(TRowDescriptor& _return,
341  const TSessionId& session,
342  const std::string& query) override;
343  TExecuteMode::type getExecutionMode(const TSessionId& session);
344  void set_execution_mode(const TSessionId& session,
345  const TExecuteMode::type mode) override;
346  void render_vega(TRenderResult& _return,
347  const TSessionId& session,
348  const int64_t widget_id,
349  const std::string& vega_json,
350  const int32_t compression_level,
351  const std::string& nonce) override;
352  void get_result_row_for_pixel(
353  TPixelTableRowResult& _return,
354  const TSessionId& session,
355  const int64_t widget_id,
356  const TPixel& pixel,
357  const std::map<std::string, std::vector<std::string>>& table_col_names,
358  const bool column_format,
359  const int32_t pixel_radius,
360  const std::string& nonce) override;
361 
362  // custom expressions
363  int32_t create_custom_expression(const TSessionId& session,
364  const TCustomExpression& custom_expression) override;
365  void get_custom_expressions(std::vector<TCustomExpression>& _return,
366  const TSessionId& session) override;
367  void update_custom_expression(const TSessionId& session,
368  const int32_t id,
369  const std::string& expression_json) override;
370  void delete_custom_expressions(const TSessionId& session,
371  const std::vector<int32_t>& custom_expression_ids,
372  const bool do_soft_delete) override;
373 
374  // dashboards
375  void get_dashboard(TDashboard& _return,
376  const TSessionId& session,
377  const int32_t dashboard_id) override;
378  void get_dashboards(std::vector<TDashboard>& _return,
379  const TSessionId& session) override;
380  int32_t create_dashboard(const TSessionId& session,
381  const std::string& dashboard_name,
382  const std::string& dashboard_state,
383  const std::string& image_hash,
384  const std::string& dashboard_metadata) override;
385  void replace_dashboard(const TSessionId& session,
386  const int32_t dashboard_id,
387  const std::string& dashboard_name,
388  const std::string& dashboard_owner,
389  const std::string& dashboard_state,
390  const std::string& image_hash,
391  const std::string& dashboard_metadata) override;
392  void delete_dashboard(const TSessionId& session, const int32_t dashboard_id) override;
393  void share_dashboards(const TSessionId& session,
394  const std::vector<int32_t>& dashboard_ids,
395  const std::vector<std::string>& groups,
396  const TDashboardPermissions& permissions) override;
397  void delete_dashboards(const TSessionId& session,
398  const std::vector<int32_t>& dashboard_ids) override;
399  void share_dashboard(const TSessionId& session,
400  const int32_t dashboard_id,
401  const std::vector<std::string>& groups,
402  const std::vector<std::string>& objects,
403  const TDashboardPermissions& permissions,
404  const bool grant_role) override;
405  void unshare_dashboards(const TSessionId& session,
406  const std::vector<int32_t>& dashboard_ids,
407  const std::vector<std::string>& groups,
408  const TDashboardPermissions& permissions) override;
409  void unshare_dashboard(const TSessionId& session,
410  const int32_t dashboard_id,
411  const std::vector<std::string>& groups,
412  const std::vector<std::string>& objects,
413  const TDashboardPermissions& permissions) override;
414  void get_dashboard_grantees(std::vector<TDashboardGrantees>& _return,
415  const TSessionId& session,
416  const int32_t dashboard_id) override;
417 
418  void get_link_view(TFrontendView& _return,
419  const TSessionId& session,
420  const std::string& link) override;
421  void create_link(std::string& _return,
422  const TSessionId& session,
423  const std::string& view_state,
424  const std::string& view_metadata) override;
425  // import
426  void load_table_binary(const TSessionId& session,
427  const std::string& table_name,
428  const std::vector<TRow>& rows,
429  const std::vector<std::string>& column_names) override;
430 
431  void load_table_binary_columnar(const TSessionId& session,
432  const std::string& table_name,
433  const std::vector<TColumn>& cols,
434  const std::vector<std::string>& column_names) override;
435  void load_table_binary_columnar_polys(const TSessionId& session,
436  const std::string& table_name,
437  const std::vector<TColumn>& cols,
438  const std::vector<std::string>& column_names,
439  const bool assign_render_groups) override;
440  void load_table_binary_arrow(const TSessionId& session,
441  const std::string& table_name,
442  const std::string& arrow_stream,
443  const bool use_column_names) override;
444 
445  void load_table(const TSessionId& session,
446  const std::string& table_name,
447  const std::vector<TStringRow>& rows,
448  const std::vector<std::string>& column_names) override;
449  void detect_column_types(TDetectResult& _return,
450  const TSessionId& session,
451  const std::string& file_name,
452  const TCopyParams& copy_params) override;
453  void create_table(const TSessionId& session,
454  const std::string& table_name,
455  const TRowDescriptor& row_desc,
456  const TCreateParams& create_params) override;
457  void import_table(const TSessionId& session,
458  const std::string& table_name,
459  const std::string& file_name,
460  const TCopyParams& copy_params) override;
461  void import_geo_table(const TSessionId& session,
462  const std::string& table_name,
463  const std::string& file_name,
464  const TCopyParams& copy_params,
465  const TRowDescriptor& row_desc,
466  const TCreateParams& create_params) override;
467  void import_table_status(TImportStatus& _return,
468  const TSessionId& session,
469  const std::string& import_id) override;
470  void get_first_geo_file_in_archive(std::string& _return,
471  const TSessionId& session,
472  const std::string& archive_path,
473  const TCopyParams& copy_params) override;
474  void get_all_files_in_archive(std::vector<std::string>& _return,
475  const TSessionId& session,
476  const std::string& archive_path,
477  const TCopyParams& copy_params) override;
478  void get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
479  const TSessionId& session,
480  const std::string& file_name,
481  const TCopyParams& copy_params) override;
482  // distributed
483  int64_t query_get_outer_fragment_count(const TSessionId& session,
484  const std::string& select_query) override;
485 
486  void check_table_consistency(TTableMeta& _return,
487  const TSessionId& session,
488  const int32_t table_id) override;
489  void start_query(TPendingQuery& _return,
490  const TSessionId& leaf_session,
491  const TSessionId& parent_session,
492  const std::string& serialized_rel_alg_dag,
493  const std::string& start_time_str,
494  const bool just_explain,
495  const std::vector<int64_t>& outer_fragment_indices) override;
496  void execute_query_step(TStepResult& _return,
497  const TPendingQuery& pending_query,
498  const TSubqueryId subquery_id,
499  const std::string& start_time_str) override;
500  void broadcast_serialized_rows(const TSerializedRows& serialized_rows,
501  const TRowDescriptor& row_desc,
502  const TQueryId query_id,
503  const TSubqueryId subquery_id,
504  const bool is_final_subquery_result) override;
505 
506  void start_render_query(TPendingRenderQuery& _return,
507  const TSessionId& session,
508  const int64_t widget_id,
509  const int16_t node_idx,
510  const std::string& vega_json) override;
511  void execute_next_render_step(TRenderStepResult& _return,
512  const TPendingRenderQuery& pending_render,
513  const TRenderAggDataMap& merged_data) override;
514 
515  void insert_data(const TSessionId& session, const TInsertData& insert_data) override;
516  void insert_chunks(const TSessionId& session,
517  const TInsertChunks& insert_chunks) override;
518  void checkpoint(const TSessionId& session, const int32_t table_id) override;
519  // DB Object Privileges
520  void get_roles(std::vector<std::string>& _return, const TSessionId& session) override;
521  bool has_role(const TSessionId& sessionId,
522  const std::string& granteeName,
523  const std::string& roleName) override;
524  bool has_object_privilege(const TSessionId& sessionId,
525  const std::string& granteeName,
526  const std::string& objectName,
527  const TDBObjectType::type object_type,
528  const TDBObjectPermissions& permissions) override;
529  void get_db_objects_for_grantee(std::vector<TDBObject>& _return,
530  const TSessionId& session,
531  const std::string& roleName) override;
532  void get_db_object_privs(std::vector<TDBObject>& _return,
533  const TSessionId& session,
534  const std::string& objectName,
535  const TDBObjectType::type type) override;
536  void get_all_roles_for_user(std::vector<std::string>& _return,
537  const TSessionId& session,
538  const std::string& granteeName) override;
539  void get_all_effective_roles_for_user(std::vector<std::string>& _return,
540  const TSessionId& session,
541  const std::string& granteeName) override;
542  std::vector<std::string> get_valid_groups(const TSessionId& session,
543  int32_t dashboard_id,
544  std::vector<std::string> groups);
545  // licensing
546  void set_license_key(TLicenseInfo& _return,
547  const TSessionId& session,
548  const std::string& key,
549  const std::string& nonce) override;
550  void get_license_claims(TLicenseInfo& _return,
551  const TSessionId& session,
552  const std::string& nonce) override;
553  // user-defined functions
554  /*
555  Returns a mapping of device (CPU, GPU) parameters (name, LLVM IR
556  triplet, features, etc)
557  */
558  void get_device_parameters(std::map<std::string, std::string>& _return,
559  const TSessionId& session) override;
560 
561  /*
562  Register Runtime Extension Functions (UDFs, UDTFs) with given
563  signatures. The extension functions implementations are given in a
564  mapping of a device and the corresponding LLVM/NVVM IR string.
565  */
566 
567  void register_runtime_extension_functions(
568  const TSessionId& session,
569  const std::vector<TUserDefinedFunction>& udfs,
570  const std::vector<TUserDefinedTableFunction>& udtfs,
571  const std::map<std::string, std::string>& device_ir_map) override;
572 
573  /*
574  Returns a list of User-Defined Function names available
575  */
576  void get_function_names(std::vector<std::string>& _return,
577  const TSessionId& session) override;
578 
579  /*
580  Returns a list of runtime User-Defined Function names available
581  */
582  void get_runtime_function_names(std::vector<std::string>& _return,
583  const TSessionId& session) override;
584 
585  /*
586  Returns a list of runtime User-Defined Function names available
587  */
588  void get_function_details(std::vector<TUserDefinedFunction>& _return,
589  const TSessionId& session,
590  const std::vector<std::string>& udf_names) override;
591 
592  /*
593  Returns a list of User-Defined Table Function names available
594  */
595  void get_table_function_names(std::vector<std::string>& _return,
596  const TSessionId& session) override;
597 
598  /*
599  Returns a list of runtime User-Defined Table Function names available
600  */
601  void get_runtime_table_function_names(std::vector<std::string>& _return,
602  const TSessionId& session) override;
603 
604  /*
605  Returns a list of User-Defined Table Function details
606  */
607  void get_table_function_details(std::vector<TUserDefinedTableFunction>& _return,
608  const TSessionId& session,
609  const std::vector<std::string>& udtf_names) override;
610 
611  // end of sync block for HAHandler and mapd.thrift
612 
613  void shutdown();
614  void emergency_shutdown();
615 
616  TSessionId getInvalidSessionId() const;
617 
618  void internal_connect(TSessionId& session,
619  const std::string& username,
620  const std::string& dbname);
621 
622  bool isAggregator() const;
623 
624  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
625 
627  std::vector<LeafHostInfo> db_leaves_;
628  std::vector<LeafHostInfo> string_leaves_;
629  const std::string base_data_path_;
630  boost::filesystem::path import_path_;
632  std::default_random_engine random_gen_;
633  std::uniform_int_distribution<int64_t> session_id_dist_;
634  const bool jit_debug_;
635  const bool intel_jit_profile_;
637  const bool read_only_;
638  const bool allow_loop_joins_;
641  std::mutex render_mutex_;
642  int64_t start_time_;
645  std::shared_ptr<QueryEngine> query_engine_;
646  std::unique_ptr<RenderHandler> render_handler_;
647  std::unique_ptr<HeavyDBAggHandler> agg_handler_;
648  std::unique_ptr<HeavyDBLeafHandler> leaf_handler_;
649  std::shared_ptr<Calcite> calcite_;
650  const bool legacy_syntax_;
651 
652  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
653 
654  template <typename... ARGS>
655  std::shared_ptr<query_state::QueryState> create_query_state(ARGS&&... args) {
656  return query_states_.create(std::forward<ARGS>(args)...);
657  }
658 
659  // Exactly one immutable SessionInfo copy should be taken by a typical request.
660  Catalog_Namespace::SessionInfo get_session_copy(const TSessionId& session_id);
661 
662  void get_tables_meta_impl(std::vector<TTableMeta>& _return,
663  QueryStateProxy query_state_proxy,
664  const Catalog_Namespace::SessionInfo& session_info,
665  const bool with_table_locks = true);
666 
667  // Visible for use in tests.
668  void resizeDispatchQueue(size_t queue_size);
669 
670  protected:
671  // Returns empty std::shared_ptr if session.empty().
672  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_ptr(
673  const TSessionId& session_id);
674 
675  ConnectionInfo getConnectionInfo() const;
676 
677  private:
678  std::atomic<bool> initialized_{false};
679  std::shared_ptr<Catalog_Namespace::SessionInfo> create_new_session(
680  TSessionId& session,
681  const std::string& dbname,
682  const Catalog_Namespace::UserMetadata& user_meta,
683  std::shared_ptr<Catalog_Namespace::Catalog> cat);
684  void connect_impl(TSessionId& session,
685  const std::string& passwd,
686  const std::string& dbname,
687  const Catalog_Namespace::UserMetadata& user_meta,
688  std::shared_ptr<Catalog_Namespace::Catalog> cat,
689  query_state::StdLog& stdlog);
690  void disconnect_impl(Catalog_Namespace::SessionInfoPtr& session_ptr);
691  void check_table_load_privileges(const Catalog_Namespace::SessionInfo& session_info,
692  const std::string& table_name);
693  void get_tables_impl(std::vector<std::string>& table_names,
695  const GetTablesType get_tables_type,
696  const std::string& database_name = {});
697  void get_table_details_impl(TTableDetails& _return,
698  query_state::StdLog& stdlog,
699  const std::string& table_name,
700  const bool get_system,
701  const bool get_physical,
702  const std::string& database_name = {});
703  void getAllRolesForUserImpl(
704  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
705  std::vector<std::string>& roles,
706  const std::string& granteeName,
707  bool effective);
708  void check_read_only(const std::string& str);
709  void validateGroups(const std::vector<std::string>& groups);
710  void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo& session_info,
711  const std::vector<int32_t>& dashboard_ids);
712  void shareOrUnshareDashboards(const TSessionId& session,
713  const std::vector<int32_t>& dashboard_ids,
714  const std::vector<std::string>& groups,
715  const TDashboardPermissions& permissions,
716  const bool do_share);
717 
718  static void value_to_thrift_column(const TargetValue& tv,
719  const SQLTypeInfo& ti,
720  TColumn& column);
721  static TDatum value_to_thrift(const TargetValue& tv, const SQLTypeInfo& ti);
722 
723  std::pair<TPlanResult, lockmgr::LockedTableDescriptors> parse_to_ra(
725  const std::string& query_str,
726  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
727  const bool acquire_locks,
728  const SystemParameters& system_parameters,
729  bool check_privileges = true);
730 
731  void sql_execute_local(
732  TQueryResult& _return,
733  const QueryStateProxy& query_state_proxy,
734  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
735  const std::string& query_str,
736  const bool column_format,
737  const std::string& nonce,
738  const int32_t first_n,
739  const int32_t at_most_n,
740  const bool use_calcite);
741 
742  int64_t process_deferred_copy_from(const TSessionId& session_id);
743 
744  static void convertData(TQueryResult& _return,
746  const QueryStateProxy& query_state_proxy,
747  const bool column_format,
748  const int32_t first_n,
749  const int32_t at_most_n);
750 
751  void sql_execute_impl(ExecutionResult& _return,
753  const bool column_format,
754  const ExecutorDeviceType executor_device_type,
755  const int32_t first_n,
756  const int32_t at_most_n,
757  const bool use_calcite,
759 
761  const TableDescriptor* td,
762  const AccessPrivileges acess_priv);
763 
764  void execute_distributed_copy_statement(
766  const Catalog_Namespace::SessionInfo& session_info);
767 
768  TPlanResult processCalciteRequest(
770  const std::shared_ptr<Catalog_Namespace::Catalog>& cat,
771  const std::string& query_str,
772  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
773  const SystemParameters& system_parameters,
774  const bool check_privileges);
775 
776  TQueryResult validate_rel_alg(const std::string& query_ra, QueryStateProxy);
777 
778  void dispatch_query_task(std::shared_ptr<QueryDispatchQueue::Task> query_task,
779  const bool is_update_delete);
780 
781  std::vector<PushedDownFilterInfo> execute_rel_alg(
782  ExecutionResult& _return,
784  const std::string& query_ra,
785  const bool column_format,
786  const ExecutorDeviceType executor_device_type,
787  const int32_t first_n,
788  const int32_t at_most_n,
789  const bool just_validate,
790  const bool find_push_down_candidates,
791  const ExplainInfo& explain_info,
792  const std::optional<size_t> executor_index = std::nullopt) const;
793 
794  void execute_rel_alg_with_filter_push_down(
795  ExecutionResult& _return,
797  std::string& query_ra,
798  const bool column_format,
799  const ExecutorDeviceType executor_device_type,
800  const int32_t first_n,
801  const int32_t at_most_n,
802  const bool just_explain,
803  const bool just_calcite_explain,
804  const std::vector<PushedDownFilterInfo>& filter_push_down_requests);
805 
806  void executeDdl(TQueryResult& _return,
807  const std::string& query_ra,
808  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
809 
810  void executeDdl(ExecutionResult& _return,
811  const std::string& query_ra,
812  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
813 
814  TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog* cat,
815  const ColumnDescriptor* cd);
816  TRowDescriptor fixup_row_descriptor(const TRowDescriptor& row_desc,
818  void set_execution_mode_nolock(Catalog_Namespace::SessionInfo* session_ptr,
819  const TExecuteMode::type mode);
820  char unescape_char(std::string str);
821  import_export::CopyParams thrift_to_copyparams(const TCopyParams& cp);
822  TCopyParams copyparams_to_thrift(const import_export::CopyParams& cp);
823  void check_geospatial_files(const boost::filesystem::path file_path,
824  const import_export::CopyParams& copy_params);
825  void render_rel_alg(TRenderResult& _return,
826  const std::string& query_ra,
827  const std::string& query_str,
828  const Catalog_Namespace::SessionInfo& session_info,
829  const std::string& render_type,
830  const bool is_projection_query);
831 
832  TColumnType create_geo_column(const TDatumType::type type,
833  const std::string& name,
834  const bool is_array);
835 
836  static void convertExplain(TQueryResult& _return,
837  const ResultSet& results,
838  const bool column_format);
839  static void convertResult(TQueryResult& _return,
840  const ResultSet& results,
841  const bool column_format);
842 
843  static void convertRows(TQueryResult& _return,
844  QueryStateProxy query_state_proxy,
845  const std::vector<TargetMetaInfo>& targets,
846  const ResultSet& results,
847  const bool column_format,
848  const int32_t first_n,
849  const int32_t at_most_n);
850 
851  // Use ExecutionResult to populate a TQueryResult
852  // calls convertRows, but after some setup using session_info
853  void convertResultSet(ExecutionResult& result,
854  const Catalog_Namespace::SessionInfo& session_info,
855  const std::string& query_state_str,
856  TQueryResult& _return);
857 
858  static void createSimpleResult(TQueryResult& _return,
859  const ResultSet& results,
860  const bool column_format,
861  const std::string label);
862 
863  std::vector<TargetMetaInfo> getTargetMetaInfo(
864  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
865 
866  std::vector<std::string> getTargetNames(
867  const std::vector<TargetMetaInfo>& targets) const;
868 
869  std::vector<std::string> getTargetNames(
870  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
871 
872  void get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
873  std::vector<std::string>& visible_tables,
874  query_state::StdLog& stdlog,
875  const std::string& sql,
876  const int cursor);
877  void get_token_based_completions(std::vector<TCompletionHint>& hints,
878  query_state::StdLog& stdlog,
879  std::vector<std::string>& visible_tables,
880  const std::string& sql,
881  const int cursor);
882 
883  std::unordered_map<std::string, std::unordered_set<std::string>>
884  fill_column_names_by_table(std::vector<std::string>& table_names,
885  query_state::StdLog& stdlog);
886 
887  TDashboard get_dashboard_impl(
888  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
890  const DashboardDescriptor* dash,
891  const bool populate_state = true);
892 
893  static bool has_database_permission(const AccessPrivileges& privs,
894  const TDBObjectPermissions& permissions);
895  static bool has_table_permission(const AccessPrivileges& privs,
896  const TDBObjectPermissions& permission);
897  static bool has_dashboard_permission(const AccessPrivileges& privs,
898  const TDBObjectPermissions& permissions);
899  static bool has_view_permission(const AccessPrivileges& privs,
900  const TDBObjectPermissions& permissions);
901  static bool has_server_permission(const AccessPrivileges& privs,
902  const TDBObjectPermissions& permissions);
903  // For the provided upper case column names `uc_column_names`, return
904  // the tables from `table_names` which contain at least one of them.
905  // Used to rank the TABLE auto-completion hints by the columns
906  // specified in the projection.
907  std::unordered_set<std::string> get_uc_compatible_table_names_by_column(
908  const std::unordered_set<std::string>& uc_column_names,
909  std::vector<std::string>& table_names,
910  query_state::StdLog& stdlog);
911 
912  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
913  prepare_loader_generic(
914  const Catalog_Namespace::SessionInfo& session_info,
915  const std::string& table_name,
916  size_t num_cols,
917  std::unique_ptr<import_export::Loader>* loader,
918  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
919  const std::vector<std::string>& column_names,
920  std::string load_type);
921 
922  void fillGeoColumns(
923  const TSessionId& session,
924  const Catalog_Namespace::Catalog& catalog,
925  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
926  const ColumnDescriptor* cd,
927  size_t& col_idx,
928  size_t num_rows,
929  const std::string& table_name,
930  bool assign_render_groups);
931 
932  void fillMissingBuffers(
933  const TSessionId& session,
934  const Catalog_Namespace::Catalog& catalog,
935  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
936  const std::list<const ColumnDescriptor*>& cds,
937  const std::vector<int>& desc_id_to_column_id,
938  size_t num_rows,
939  const std::string& table_name,
940  bool assign_render_groups);
941 
943  std::unique_ptr<Catalog_Namespace::SessionsStore> sessions_store_;
944  std::unordered_map<std::string, Catalog_Namespace::SessionInfoPtr> calcite_sessions_;
946 
947  Catalog_Namespace::SessionInfoPtr findCalciteSession(TSessionId const&) const;
948 
949  bool super_user_rights_; // default is "false"; setting to "true"
950  // ignores passwd checks in "connect(..)"
951  // method
952  const int idle_session_duration_; // max duration of idle session
953  const int max_session_duration_; // max duration of session
954 
955  const bool enable_rendering_;
958  const unsigned renderer_vulkan_timeout_;
963  const size_t reserved_gpu_mem_;
965  const size_t render_mem_bytes_;
966  const size_t num_reader_threads_;
967 #ifdef ENABLE_GEOS
968  const std::string& libgeos_so_filename_;
969 #endif
971  const std::string& udf_filename_;
972  const std::string& clang_path_;
973  const std::vector<std::string>& clang_options_;
974 
976  std::string table;
977  std::string file_name;
979  std::string partitions;
980  };
981 
983  std::unordered_map<std::string, DeferredCopyFromState> was_deferred_copy_from;
985 
986  std::optional<DeferredCopyFromState> operator()(const std::string& session_id) {
987  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
988  auto itr = was_deferred_copy_from.find(session_id);
989  if (itr == was_deferred_copy_from.end()) {
990  return std::nullopt;
991  }
992  return itr->second;
993  }
994 
995  void add(const std::string& session_id, const DeferredCopyFromState& state) {
996  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
997  const auto ret = was_deferred_copy_from.insert(std::make_pair(session_id, state));
998  CHECK(ret.second);
999  }
1000 
1001  void remove(const std::string& session_id) {
1002  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1003  was_deferred_copy_from.erase(session_id);
1004  }
1005  };
1007 
1008  // Only for IPC device memory deallocation
1009  mutable std::mutex handle_to_dev_ptr_mutex_;
1010  mutable std::unordered_map<std::string, std::string> ipc_handle_to_dev_ptr_;
1011 
1012  friend void run_warmup_queries(std::shared_ptr<DBHandler> handler,
1013  std::string base_path,
1014  std::string query_file_path);
1015 
1016  friend class RenderHandler::Impl;
1017  friend class HeavyDBAggHandler;
1018  friend class HeavyDBLeafHandler;
1019 
1020  std::map<const std::string, const PermissionFuncPtr> permissionFuncMap_ = {
1021  {"database"s, has_database_permission},
1022  {"dashboard"s, has_dashboard_permission},
1023  {"table"s, has_table_permission},
1024  {"view"s, has_view_permission},
1025  {"server"s, has_server_permission}};
1026 
1027  void check_and_invalidate_sessions(Parser::DDLStmt* ddl);
1028 
1029  std::string const createInMemoryCalciteSession(
1030  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr);
1031  void removeInMemoryCalciteSession(const std::string& session_id);
1032 
1033  ExecutionResult getUserSessions(
1034  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1035 
1036  // getQueries returns a set of queries queued in the DB
1037  // that belongs to the same DB in the caller's session
1038 
1039  ExecutionResult getQueries(
1040  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1041 
1042  void get_queries_info(std::vector<TQueryInfo>& _return,
1043  const TSessionId& session) override;
1044 
1045  // this function passes the interrupt request to the DB executor
1046  void interruptQuery(const Catalog_Namespace::SessionInfo& session_info,
1047  const std::string& target_session);
1048 
1049  void alterSystemClear(const std::string& sesson_id,
1051  const std::string& cache_type,
1052  int64_t& execution_time_ms);
1053 
1054  void alterSession(const std::string& sesson_id,
1056  const std::pair<std::string, std::string>& session_parameter,
1057  int64_t& execution_time_ms);
1058 
1059  // render group assignment
1060 
1061  enum class AssignRenderGroupsMode { kNone, kAssign, kCleanUp };
1062 
1063  void loadTableBinaryColumnarInternal(
1064  const TSessionId& session,
1065  const std::string& table_name,
1066  const std::vector<TColumn>& cols,
1067  const std::vector<std::string>& column_names,
1068  const AssignRenderGroupsMode assign_render_groups_mode);
1069 
1070  TRole::type getServerRole() const;
1071 
1072  using RenderGroupAssignmentColumnMap =
1073  std::unordered_map<std::string,
1074  std::unique_ptr<import_export::RenderGroupAnalyzer>>;
1076  std::unordered_map<std::string, RenderGroupAssignmentColumnMap>;
1078  std::unordered_map<TSessionId, RenderGroupAssignmentTableMap>;
1082 
1083  void importGeoTableGlobFilterSort(const TSessionId& session,
1084  const std::string& table_name,
1085  const std::string& file_name,
1086  const import_export::CopyParams& copy_params,
1087  const TRowDescriptor& row_desc,
1088  const TCreateParams& create_params);
1089 
1090  void importGeoTableSingle(const TSessionId& session,
1091  const std::string& table_name,
1092  const std::string& file_name,
1093  const import_export::CopyParams& copy_params,
1094  const TRowDescriptor& row_desc,
1095  const TCreateParams& create_params);
1096 };
std::lock_guard< T > lock_guard
Classes used to wrap parser calls for calcite redirection.
AssignRenderGroupsMode
Definition: DBHandler.h:1061
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:628
auto get_users(SysCatalog &syscat, std::unique_ptr< SqliteConnector > &sqliteConnector, const int32_t dbId=-1)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:973
boost::filesystem::path import_path_
Definition: DBHandler.h:630
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:652
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:271
std::unordered_map< std::string, RenderGroupAssignmentColumnMap > RenderGroupAssignmentTableMap
Definition: DBHandler.h:1076
ClientProtocol
const bool renderer_use_parallel_executors_
Definition: DBHandler.h:959
const std::string & udf_filename_
Definition: DBHandler.h:971
std::string cat(Ts &&...args)
const int render_oom_retry_threshold_
Definition: DBHandler.h:961
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
void run_warmup_queries(std::shared_ptr< DBHandler > handler, std::string base_path, std::string query_file_path)
Definition: HeavyDB.cpp:206
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:1009
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:655
static thread_local std::string client_address
Definition: DBHandler.h:155
ExecutorDeviceType
std::mutex render_group_assignment_mutex_
Definition: DBHandler.h:1080
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:995
bool user_can_access_table(const Catalog_Namespace::SessionInfo &session_info, const TableDescriptor *td, const AccessPrivileges access_priv)
bool(*)(const AccessPrivileges &, const TDBObjectPermissions &) PermissionFuncPtr
Definition: DBHandler.h:105
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:156
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:626
std::unordered_map< TSessionId, RenderGroupAssignmentTableMap > RenderGroupAnalyzerSessionMap
Definition: DBHandler.h:1078
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:958
const std::string base_data_path_
Definition: DBHandler.h:629
const bool jit_debug_
Definition: DBHandler.h:634
const bool check_origin_
Definition: DBHandler.h:159
const size_t render_mem_bytes_
Definition: DBHandler.h:965
std::map< TSessionId, std::shared_ptr< Catalog_Namespace::SessionInfo >> SessionMap
Definition: DBHandler.h:104
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:1006
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:642
const bool renderer_use_ppll_polys_
Definition: DBHandler.h:956
import_export::CopyParams copy_params
Definition: DBHandler.h:978
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:944
This file contains the class specification and related data structures for Catalog.
std::mutex render_mutex_
Definition: DBHandler.h:641
static size_t max_bytes_for_thrift()
Definition: DBHandler.h:204
query_state::QueryStates query_states_
Definition: DBHandler.h:942
Supported runtime functions management and retrieval.
const size_t reserved_gpu_mem_
Definition: DBHandler.h:963
std::optional< DeferredCopyFromState > operator()(const std::string &session_id)
Definition: DBHandler.h:986
Classes representing a parse tree.
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:964
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4592
GetTablesType
Definition: Catalog.h:63
const int max_session_duration_
Definition: DBHandler.h:953
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:631
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:627
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:970
const std::string & clang_path_
Definition: DBHandler.h:972
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:646
Checked json field retrieval.
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:645
SystemParameters & system_parameters_
Definition: DBHandler.h:644
const size_t num_reader_threads_
Definition: DBHandler.h:966
specifies the content in-memory of a row in the column metadata table
const bool enable_auto_clear_render_mem_
Definition: DBHandler.h:960
const bool renderer_prefer_igpu_
Definition: DBHandler.h:957
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:945
std::map< std::string, std::string > get_device_parameters(bool cpu_only)
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4587
const bool allow_loop_joins_
Definition: DBHandler.h:638
heavyai::shared_mutex sessions_mutex_
Definition: DBHandler.h:640
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:647
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1081
const bool enable_rendering_
Definition: DBHandler.h:955
std::unordered_map< std::string, DeferredCopyFromState > was_deferred_copy_from
Definition: DBHandler.h:983
const bool intel_jit_profile_
Definition: DBHandler.h:635
bool super_user_rights_
Definition: DBHandler.h:949
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:943
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:649
void shutdown()
Definition: Logger.cpp:397
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:624
const bool read_only_
Definition: DBHandler.h:637
bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol > in, std::shared_ptr<::apache::thrift::protocol::TProtocol > out, void *connectionContext) override
Definition: DBHandler.h:120
const bool legacy_syntax_
Definition: DBHandler.h:650
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:648
#define CHECK(condition)
Definition: Logger.h:289
const int idle_session_duration_
Definition: DBHandler.h:952
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:1010
RenderGroupAnalyzerSessionMap render_group_assignment_map_
Definition: DBHandler.h:1079
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:962
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
bool allow_multifrag_
Definition: DBHandler.h:636
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
const AuthMetadata & authMetadata_
Definition: DBHandler.h:643
TrackingProcessor(std::shared_ptr< HeavyIf > handler, const bool check_origin)
Definition: DBHandler.h:117
bool cpu_mode_only_
Definition: DBHandler.h:639
std::default_random_engine random_gen_
Definition: DBHandler.h:632
std::uniform_int_distribution< int64_t > session_id_dist_
Definition: DBHandler.h:633
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27