OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "LeafAggregator.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "Calcite/Calcite.h"
32 #include "Catalog/Catalog.h"
33 #include "Catalog/SessionsStore.h"
35 #include "Geospatial/Transforms.h"
36 #include "ImportExport/Importer.h"
38 #include "LockMgr/LockMgr.h"
39 #include "Logger/Logger.h"
40 #include "Parser/ParserNode.h"
41 #include "Parser/ParserWrapper.h"
45 #include "QueryEngine/Execute.h"
52 #include "Shared/StringTransform.h"
56 #include "Shared/measure.h"
57 #include "Shared/scope.h"
63 
64 #include <sys/types.h>
65 #include <thrift/server/TServer.h>
66 #include <thrift/transport/THttpClient.h>
67 #include <thrift/transport/TSocket.h>
68 #include <thrift/transport/TTransport.h>
69 #include <atomic>
70 #include <boost/algorithm/string.hpp>
71 #include <boost/algorithm/string/replace.hpp>
72 #include <boost/algorithm/string/trim.hpp>
73 #include <boost/filesystem.hpp>
74 #include <boost/make_shared.hpp>
75 #include <boost/noncopyable.hpp>
76 #include <boost/none_t.hpp>
77 #include <boost/optional.hpp>
78 #include <boost/program_options.hpp>
79 #include <boost/tokenizer.hpp>
80 #include <cmath>
81 #include <csignal>
82 #include <fstream>
83 #include <list>
84 #include <map>
85 #include <memory>
86 #include <mutex>
87 #include <random>
88 #include <string>
89 #include <thread>
90 #include <typeinfo>
91 #include <unordered_map>
92 
93 #include "gen-cpp/Heavy.h"
94 #include "gen-cpp/extension_functions_types.h"
95 
96 using namespace std::string_literals;
97 
98 class HeavyDBAggHandler;
99 class HeavyDBLeafHandler;
100 
101 // Multiple concurrent requests for the same session can occur. For that reason, each
102 // request briefly takes a lock to make a copy of the appropriate SessionInfo object. Then
103 // it releases the lock and uses the copy for the remainder of the request.
104 using SessionMap = std::map<TSessionId, std::shared_ptr<Catalog_Namespace::SessionInfo>>;
105 using PermissionFuncPtr = bool (*)(const AccessPrivileges&, const TDBObjectPermissions&);
107 
108 namespace dbhandler {
109 bool is_info_schema_db(const std::string& db_name);
110 
111 void check_not_info_schema_db(const std::string& db_name,
112  bool throw_db_exception = false);
113 } // namespace dbhandler
114 
115 class TrackingProcessor : public HeavyProcessor {
116  public:
117  TrackingProcessor(std::shared_ptr<HeavyIf> handler, const bool check_origin)
118  : HeavyProcessor(handler), check_origin_(check_origin) {}
119 
120  bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol> in,
121  std::shared_ptr<::apache::thrift::protocol::TProtocol> out,
122  void* connectionContext) override {
123  using namespace ::apache::thrift;
124 
125  auto transport = in->getTransport();
126  if (transport && check_origin_) {
127  static std::mutex processor_mutex;
128  std::lock_guard lock(processor_mutex);
129  const auto origin_str = transport->getOrigin();
130  std::vector<std::string> origins;
131  boost::split(origins, origin_str, boost::is_any_of(","));
132  if (origins.empty()) {
134  } else {
135  // Take the first origin, which should be the client IP before any intermediate
136  // servers (e.g. the web server)
137  auto trimmed_origin = origins.front();
138  boost::algorithm::trim(trimmed_origin);
139  TrackingProcessor::client_address = trimmed_origin;
140  }
141  if (dynamic_cast<transport::THttpTransport*>(transport.get())) {
143  } else if (dynamic_cast<transport::TBufferedTransport*>(transport.get())) {
145  } else {
147  }
148  } else {
150  }
151 
152  return HeavyProcessor::process(in, out, connectionContext);
153  }
154 
155  static thread_local std::string client_address;
156  static thread_local ClientProtocol client_protocol;
157 
158  private:
159  const bool check_origin_;
160 };
161 
162 namespace File_Namespace {
163 struct DiskCacheConfig;
164 }
165 
166 class DBHandler : public HeavyIf {
167  public:
168  DBHandler(const std::vector<LeafHostInfo>& db_leaves,
169  const std::vector<LeafHostInfo>& string_leaves,
170  const std::string& base_data_path,
171  const bool allow_multifrag,
172  const bool jit_debug,
173  const bool intel_jit_profile,
174  const bool read_only,
175  const bool allow_loop_joins,
176  const bool enable_rendering,
177  const bool renderer_use_ppll_polys,
178  const bool renderer_prefer_igpu,
179  const unsigned renderer_vulkan_timeout_ms,
180  const bool enable_auto_clear_render_mem,
181  const int render_oom_retry_threshold,
182  const size_t render_mem_bytes,
183  const size_t max_concurrent_render_sessions,
184  const size_t reserved_gpu_mem,
185  const bool render_compositor_use_last_gpu,
186  const size_t num_reader_threads,
187  const AuthMetadata& authMetadata,
188  SystemParameters& system_parameters,
189  const bool legacy_syntax,
190  const int idle_session_duration,
191  const int max_session_duration,
192  const std::string& udf_filename,
193  const std::string& clang_path,
194  const std::vector<std::string>& clang_options,
195 #ifdef ENABLE_GEOS
196  const std::string& libgeos_so_filename,
197 #endif
198  const File_Namespace::DiskCacheConfig& disk_cache_config,
199  const bool is_new_db);
200  void initialize(const bool is_new_db);
201  ~DBHandler() override;
202 
203  static inline size_t max_bytes_for_thrift() { return 2 * 1000 * 1000 * 1000LL; }
204 
205  // Important ****
206  // This block must be keep in sync with mapd.thrift and HAHandler.h
207  // Please keep in same order for easy check and cut and paste
208  // Important ****
209 
210  void krb5_connect(TKrb5Session& session,
211  const std::string& token,
212  const std::string& dbname) override;
213  // connection, admin
214  void connect(TSessionId& session,
215  const std::string& username,
216  const std::string& passwd,
217  const std::string& dbname) override;
218  void disconnect(const TSessionId& session) override;
219  void switch_database(const TSessionId& session, const std::string& dbname) override;
220  void clone_session(TSessionId& session2, const TSessionId& session1) override;
221  void get_server_status(TServerStatus& _return, const TSessionId& session) override;
222  void get_status(std::vector<TServerStatus>& _return,
223  const TSessionId& session) override;
224  void get_hardware_info(TClusterHardwareInfo& _return,
225  const TSessionId& session) override;
226 
227  bool hasTableAccessPrivileges(const TableDescriptor* td,
228  const Catalog_Namespace::SessionInfo& session_info);
229  void get_tables(std::vector<std::string>& _return, const TSessionId& session) override;
230  void get_tables_for_database(std::vector<std::string>& _return,
231  const TSessionId& session,
232  const std::string& database_name) override;
233  void get_physical_tables(std::vector<std::string>& _return,
234  const TSessionId& session) override;
235  void get_views(std::vector<std::string>& _return, const TSessionId& session) override;
236  void get_tables_meta(std::vector<TTableMeta>& _return,
237  const TSessionId& session) override;
238  void get_table_details(TTableDetails& _return,
239  const TSessionId& session,
240  const std::string& table_name) override;
241  void get_table_details_for_database(TTableDetails& _return,
242  const TSessionId& session,
243  const std::string& table_name,
244  const std::string& database_name) override;
245  void get_internal_table_details(TTableDetails& _return,
246  const TSessionId& session,
247  const std::string& table_name,
248  const bool include_system_columns) override;
249  void get_internal_table_details_for_database(TTableDetails& _return,
250  const TSessionId& session,
251  const std::string& table_name,
252  const std::string& database_name) override;
253  void get_users(std::vector<std::string>& _return, const TSessionId& session) override;
254  void get_databases(std::vector<TDBInfo>& _return, const TSessionId& session) override;
255 
256  void get_version(std::string& _return) override;
257  void start_heap_profile(const TSessionId& session) override;
258  void stop_heap_profile(const TSessionId& session) override;
259  void get_heap_profile(std::string& _return, const TSessionId& session) override;
260  void get_memory(std::vector<TNodeMemoryInfo>& _return,
261  const TSessionId& session,
262  const std::string& memory_level) override;
263  void clear_cpu_memory(const TSessionId& session) override;
264  void clear_gpu_memory(const TSessionId& session) override;
265  void clearRenderMemory(const TSessionId& session); // it's not declared on thrifth
266  // and on persisten leaf client
267  void set_cur_session(const TSessionId& parent_session,
268  const TSessionId& leaf_session,
269  const std::string& start_time_str,
270  const std::string& label,
271  bool for_running_query_kernel) override;
272  void invalidate_cur_session(const TSessionId& parent_session,
273  const TSessionId& leaf_session,
274  const std::string& start_time_str,
275  const std::string& label,
276  bool for_running_query_kernel) override;
277  void set_table_epoch(const TSessionId& session,
278  const int db_id,
279  const int table_id,
280  const int new_epoch) override;
281  void set_table_epoch_by_name(const TSessionId& session,
282  const std::string& table_name,
283  const int new_epoch) override;
284  int32_t get_table_epoch(const TSessionId& session,
285  const int32_t db_id,
286  const int32_t table_id) override;
287  int32_t get_table_epoch_by_name(const TSessionId& session,
288  const std::string& table_name) override;
289  void get_table_epochs(std::vector<TTableEpochInfo>& _return,
290  const TSessionId& session,
291  const int32_t db_id,
292  const int32_t table_id) override;
293  void set_table_epochs(const TSessionId& session,
294  const int32_t db_id,
295  const std::vector<TTableEpochInfo>& table_epochs) override;
296 
297  void get_session_info(TSessionInfo& _return, const TSessionId& session) override;
298 
299  void set_leaf_info(const TSessionId& session, const TLeafInfo& info) override;
300 
301  void sql_execute(ExecutionResult& _return,
302  const TSessionId& session,
303  const std::string& query,
304  const bool column_format,
305  const int32_t first_n,
306  const int32_t at_most_n,
308  // query, render
309  void sql_execute(TQueryResult& _return,
310  const TSessionId& session,
311  const std::string& query,
312  const bool column_format,
313  const std::string& nonce,
314  const int32_t first_n,
315  const int32_t at_most_n) override;
316  void get_completion_hints(std::vector<TCompletionHint>& hints,
317  const TSessionId& session,
318  const std::string& sql,
319  const int cursor) override;
320  // TODO(miyu): merge the following two data frame APIs.
321  void sql_execute_df(TDataFrame& _return,
322  const TSessionId& session,
323  const std::string& query,
324  const TDeviceType::type device_type,
325  const int32_t device_id,
326  const int32_t first_n,
327  const TArrowTransport::type transport_method) override;
328  void sql_execute_gdf(TDataFrame& _return,
329  const TSessionId& session,
330  const std::string& query,
331  const int32_t device_id,
332  const int32_t first_n) override;
333  void deallocate_df(const TSessionId& session,
334  const TDataFrame& df,
335  const TDeviceType::type device_type,
336  const int32_t device_id) override;
337  void interrupt(const TSessionId& query_session,
338  const TSessionId& interrupt_session) override;
339  void sql_validate(TRowDescriptor& _return,
340  const TSessionId& session,
341  const std::string& query) override;
342  TExecuteMode::type getExecutionMode(const TSessionId& session);
343  void set_execution_mode(const TSessionId& session,
344  const TExecuteMode::type mode) override;
345  void render_vega(TRenderResult& _return,
346  const TSessionId& session,
347  const int64_t widget_id,
348  const std::string& vega_json,
349  const int32_t compression_level,
350  const std::string& nonce) override;
351  void get_result_row_for_pixel(
352  TPixelTableRowResult& _return,
353  const TSessionId& session,
354  const int64_t widget_id,
355  const TPixel& pixel,
356  const std::map<std::string, std::vector<std::string>>& table_col_names,
357  const bool column_format,
358  const int32_t pixel_radius,
359  const std::string& nonce) override;
360 
361  // custom expressions
362  int32_t create_custom_expression(const TSessionId& session,
363  const TCustomExpression& custom_expression) override;
364  void get_custom_expressions(std::vector<TCustomExpression>& _return,
365  const TSessionId& session) override;
366  void update_custom_expression(const TSessionId& session,
367  const int32_t id,
368  const std::string& expression_json) override;
369  void delete_custom_expressions(const TSessionId& session,
370  const std::vector<int32_t>& custom_expression_ids,
371  const bool do_soft_delete) override;
372 
373  // dashboards
374  void get_dashboard(TDashboard& _return,
375  const TSessionId& session,
376  const int32_t dashboard_id) override;
377  void get_dashboards(std::vector<TDashboard>& _return,
378  const TSessionId& session) override;
379  int32_t create_dashboard(const TSessionId& session,
380  const std::string& dashboard_name,
381  const std::string& dashboard_state,
382  const std::string& image_hash,
383  const std::string& dashboard_metadata) override;
384  void replace_dashboard(const TSessionId& session,
385  const int32_t dashboard_id,
386  const std::string& dashboard_name,
387  const std::string& dashboard_owner,
388  const std::string& dashboard_state,
389  const std::string& image_hash,
390  const std::string& dashboard_metadata) override;
391  void delete_dashboard(const TSessionId& session, const int32_t dashboard_id) override;
392  void share_dashboards(const TSessionId& session,
393  const std::vector<int32_t>& dashboard_ids,
394  const std::vector<std::string>& groups,
395  const TDashboardPermissions& permissions) override;
396  void delete_dashboards(const TSessionId& session,
397  const std::vector<int32_t>& dashboard_ids) override;
398  void share_dashboard(const TSessionId& session,
399  const int32_t dashboard_id,
400  const std::vector<std::string>& groups,
401  const std::vector<std::string>& objects,
402  const TDashboardPermissions& permissions,
403  const bool grant_role) override;
404  void unshare_dashboards(const TSessionId& session,
405  const std::vector<int32_t>& dashboard_ids,
406  const std::vector<std::string>& groups,
407  const TDashboardPermissions& permissions) override;
408  void unshare_dashboard(const TSessionId& session,
409  const int32_t dashboard_id,
410  const std::vector<std::string>& groups,
411  const std::vector<std::string>& objects,
412  const TDashboardPermissions& permissions) override;
413  void get_dashboard_grantees(std::vector<TDashboardGrantees>& _return,
414  const TSessionId& session,
415  const int32_t dashboard_id) override;
416 
417  void get_link_view(TFrontendView& _return,
418  const TSessionId& session,
419  const std::string& link) override;
420  void create_link(std::string& _return,
421  const TSessionId& session,
422  const std::string& view_state,
423  const std::string& view_metadata) override;
424  // import
425  void load_table_binary(const TSessionId& session,
426  const std::string& table_name,
427  const std::vector<TRow>& rows,
428  const std::vector<std::string>& column_names) override;
429 
430  void load_table_binary_columnar(const TSessionId& session,
431  const std::string& table_name,
432  const std::vector<TColumn>& cols,
433  const std::vector<std::string>& column_names) override;
434  void load_table_binary_columnar_polys(const TSessionId& session,
435  const std::string& table_name,
436  const std::vector<TColumn>& cols,
437  const std::vector<std::string>& column_names,
438  const bool assign_render_groups) override;
439  void load_table_binary_arrow(const TSessionId& session,
440  const std::string& table_name,
441  const std::string& arrow_stream,
442  const bool use_column_names) override;
443 
444  void load_table(const TSessionId& session,
445  const std::string& table_name,
446  const std::vector<TStringRow>& rows,
447  const std::vector<std::string>& column_names) override;
448  void detect_column_types(TDetectResult& _return,
449  const TSessionId& session,
450  const std::string& file_name,
451  const TCopyParams& copy_params) override;
452  void create_table(const TSessionId& session,
453  const std::string& table_name,
454  const TRowDescriptor& row_desc,
455  const TCreateParams& create_params) override;
456  void import_table(const TSessionId& session,
457  const std::string& table_name,
458  const std::string& file_name,
459  const TCopyParams& copy_params) override;
460  void import_geo_table(const TSessionId& session,
461  const std::string& table_name,
462  const std::string& file_name,
463  const TCopyParams& copy_params,
464  const TRowDescriptor& row_desc,
465  const TCreateParams& create_params) override;
466  void import_table_status(TImportStatus& _return,
467  const TSessionId& session,
468  const std::string& import_id) override;
469  void get_first_geo_file_in_archive(std::string& _return,
470  const TSessionId& session,
471  const std::string& archive_path,
472  const TCopyParams& copy_params) override;
473  void get_all_files_in_archive(std::vector<std::string>& _return,
474  const TSessionId& session,
475  const std::string& archive_path,
476  const TCopyParams& copy_params) override;
477  void get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
478  const TSessionId& session,
479  const std::string& file_name,
480  const TCopyParams& copy_params) override;
481  // distributed
482  int64_t query_get_outer_fragment_count(const TSessionId& session,
483  const std::string& select_query) override;
484 
485  void check_table_consistency(TTableMeta& _return,
486  const TSessionId& session,
487  const int32_t table_id) override;
488  void start_query(TPendingQuery& _return,
489  const TSessionId& leaf_session,
490  const TSessionId& parent_session,
491  const std::string& query_ra,
492  const std::string& start_time_str,
493  const bool just_explain,
494  const std::vector<int64_t>& outer_fragment_indices) override;
495  void execute_query_step(TStepResult& _return,
496  const TPendingQuery& pending_query,
497  const TSubqueryId subquery_id,
498  const std::string& start_time_str) override;
499  void broadcast_serialized_rows(const TSerializedRows& serialized_rows,
500  const TRowDescriptor& row_desc,
501  const TQueryId query_id,
502  const TSubqueryId subquery_id,
503  const bool is_final_subquery_result) override;
504 
505  void start_render_query(TPendingRenderQuery& _return,
506  const TSessionId& session,
507  const int64_t widget_id,
508  const int16_t node_idx,
509  const std::string& vega_json) override;
510  void execute_next_render_step(TRenderStepResult& _return,
511  const TPendingRenderQuery& pending_render,
512  const TRenderAggDataMap& merged_data) override;
513 
514  void insert_data(const TSessionId& session, const TInsertData& insert_data) override;
515  void insert_chunks(const TSessionId& session,
516  const TInsertChunks& insert_chunks) override;
517  void checkpoint(const TSessionId& session, const int32_t table_id) override;
518  // DB Object Privileges
519  void get_roles(std::vector<std::string>& _return, const TSessionId& session) override;
520  bool has_role(const TSessionId& sessionId,
521  const std::string& granteeName,
522  const std::string& roleName) override;
523  bool has_object_privilege(const TSessionId& sessionId,
524  const std::string& granteeName,
525  const std::string& objectName,
526  const TDBObjectType::type object_type,
527  const TDBObjectPermissions& permissions) override;
528  void get_db_objects_for_grantee(std::vector<TDBObject>& _return,
529  const TSessionId& session,
530  const std::string& roleName) override;
531  void get_db_object_privs(std::vector<TDBObject>& _return,
532  const TSessionId& session,
533  const std::string& objectName,
534  const TDBObjectType::type type) override;
535  void get_all_roles_for_user(std::vector<std::string>& _return,
536  const TSessionId& session,
537  const std::string& granteeName) override;
538  void get_all_effective_roles_for_user(std::vector<std::string>& _return,
539  const TSessionId& session,
540  const std::string& granteeName) override;
541  std::vector<std::string> get_valid_groups(const TSessionId& session,
542  int32_t dashboard_id,
543  std::vector<std::string> groups);
544  // licensing
545  void set_license_key(TLicenseInfo& _return,
546  const TSessionId& session,
547  const std::string& key,
548  const std::string& nonce) override;
549  void get_license_claims(TLicenseInfo& _return,
550  const TSessionId& session,
551  const std::string& nonce) override;
552  // user-defined functions
553  /*
554  Returns a mapping of device (CPU, GPU) parameters (name, LLVM IR
555  triplet, features, etc)
556  */
557  void get_device_parameters(std::map<std::string, std::string>& _return,
558  const TSessionId& session) override;
559 
560  /*
561  Register Runtime Extension Functions (UDFs, UDTFs) with given
562  signatures. The extension functions implementations are given in a
563  mapping of a device and the corresponding LLVM/NVVM IR string.
564  */
565 
566  void register_runtime_extension_functions(
567  const TSessionId& session,
568  const std::vector<TUserDefinedFunction>& udfs,
569  const std::vector<TUserDefinedTableFunction>& udtfs,
570  const std::map<std::string, std::string>& device_ir_map) override;
571 
572  /*
573  Returns a list of User-Defined Function names available
574  */
575  void get_function_names(std::vector<std::string>& _return,
576  const TSessionId& session) override;
577 
578  /*
579  Returns a list of runtime User-Defined Function names available
580  */
581  void get_runtime_function_names(std::vector<std::string>& _return,
582  const TSessionId& session) override;
583 
584  /*
585  Returns a list of runtime User-Defined Function names available
586  */
587  void get_function_details(std::vector<TUserDefinedFunction>& _return,
588  const TSessionId& session,
589  const std::vector<std::string>& udf_names) override;
590 
591  /*
592  Returns a list of User-Defined Table Function names available
593  */
594  void get_table_function_names(std::vector<std::string>& _return,
595  const TSessionId& session) override;
596 
597  /*
598  Returns a list of runtime User-Defined Table Function names available
599  */
600  void get_runtime_table_function_names(std::vector<std::string>& _return,
601  const TSessionId& session) override;
602 
603  /*
604  Returns a list of User-Defined Table Function details
605  */
606  void get_table_function_details(std::vector<TUserDefinedTableFunction>& _return,
607  const TSessionId& session,
608  const std::vector<std::string>& udtf_names) override;
609 
610  // end of sync block for HAHandler and mapd.thrift
611 
612  void shutdown();
613  void emergency_shutdown();
614 
615  TSessionId getInvalidSessionId() const;
616 
617  void internal_connect(TSessionId& session,
618  const std::string& username,
619  const std::string& dbname);
620 
621  bool isAggregator() const;
622 
623  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
624 
626  std::vector<LeafHostInfo> db_leaves_;
627  std::vector<LeafHostInfo> string_leaves_;
628  const std::string base_data_path_;
629  boost::filesystem::path import_path_;
631  std::default_random_engine random_gen_;
632  std::uniform_int_distribution<int64_t> session_id_dist_;
633  const bool jit_debug_;
634  const bool intel_jit_profile_;
636  const bool read_only_;
637  const bool allow_loop_joins_;
640  std::mutex render_mutex_;
641  int64_t start_time_;
644  std::shared_ptr<QueryEngine> query_engine_;
645  std::unique_ptr<RenderHandler> render_handler_;
646  std::unique_ptr<HeavyDBAggHandler> agg_handler_;
647  std::unique_ptr<HeavyDBLeafHandler> leaf_handler_;
648  std::shared_ptr<Calcite> calcite_;
649  const bool legacy_syntax_;
650 
651  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
652 
653  template <typename... ARGS>
654  std::shared_ptr<query_state::QueryState> create_query_state(ARGS&&... args) {
655  return query_states_.create(std::forward<ARGS>(args)...);
656  }
657 
658  // Exactly one immutable SessionInfo copy should be taken by a typical request.
659  Catalog_Namespace::SessionInfo get_session_copy(const TSessionId& session);
660 
661  void get_tables_meta_impl(std::vector<TTableMeta>& _return,
662  QueryStateProxy query_state_proxy,
663  const Catalog_Namespace::SessionInfo& session_info,
664  const bool with_table_locks = true);
665 
666  // Visible for use in tests.
667  void resizeDispatchQueue(size_t queue_size);
668 
669  protected:
670  // Returns empty std::shared_ptr if session.empty().
671  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_ptr(
672  const TSessionId& session_id);
673 
674  ConnectionInfo getConnectionInfo() const;
675 
676  private:
677  std::atomic<bool> initialized_{false};
678  std::shared_ptr<Catalog_Namespace::SessionInfo> create_new_session(
679  TSessionId& session,
680  const std::string& dbname,
681  const Catalog_Namespace::UserMetadata& user_meta,
682  std::shared_ptr<Catalog_Namespace::Catalog> cat);
683  void connect_impl(TSessionId& session,
684  const std::string& passwd,
685  const std::string& dbname,
686  const Catalog_Namespace::UserMetadata& user_meta,
687  std::shared_ptr<Catalog_Namespace::Catalog> cat,
688  query_state::StdLog& stdlog);
689  void disconnect_impl(Catalog_Namespace::SessionInfoPtr& session_ptr);
690  void check_table_load_privileges(const TSessionId& session,
691  const std::string& table_name);
692  void check_table_load_privileges(const Catalog_Namespace::SessionInfo& session_info,
693  const std::string& table_name);
694  void get_tables_impl(std::vector<std::string>& table_names,
696  const GetTablesType get_tables_type,
697  const std::string& database_name = {});
698  void get_table_details_impl(TTableDetails& _return,
699  query_state::StdLog& stdlog,
700  const std::string& table_name,
701  const bool get_system,
702  const bool get_physical,
703  const std::string& database_name = {});
704  void getAllRolesForUserImpl(
705  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
706  std::vector<std::string>& roles,
707  const TSessionId& sessionId,
708  const std::string& granteeName,
709  bool effective);
710  void check_read_only(const std::string& str);
711  void validateGroups(const std::vector<std::string>& groups);
712  void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo& session_info,
713  const std::vector<int32_t>& dashboard_ids);
714  void shareOrUnshareDashboards(const TSessionId& session,
715  const std::vector<int32_t>& dashboard_ids,
716  const std::vector<std::string>& groups,
717  const TDashboardPermissions& permissions,
718  const bool do_share);
719 
720  static void value_to_thrift_column(const TargetValue& tv,
721  const SQLTypeInfo& ti,
722  TColumn& column);
723  static TDatum value_to_thrift(const TargetValue& tv, const SQLTypeInfo& ti);
724 
725  std::pair<TPlanResult, lockmgr::LockedTableDescriptors> parse_to_ra(
727  const std::string& query_str,
728  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
729  const bool acquire_locks,
730  const SystemParameters& system_parameters,
731  bool check_privileges = true);
732 
733  void sql_execute_local(
734  TQueryResult& _return,
735  const QueryStateProxy& query_state_proxy,
736  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
737  const std::string& query_str,
738  const bool column_format,
739  const std::string& nonce,
740  const int32_t first_n,
741  const int32_t at_most_n,
742  const bool use_calcite);
743 
744  int64_t process_deferred_copy_from(const TSessionId& session_id);
745 
746  static void convertData(TQueryResult& _return,
748  const QueryStateProxy& query_state_proxy,
749  const bool column_format,
750  const int32_t first_n,
751  const int32_t at_most_n);
752 
753  void sql_execute_impl(ExecutionResult& _return,
755  const bool column_format,
756  const ExecutorDeviceType executor_device_type,
757  const int32_t first_n,
758  const int32_t at_most_n,
759  const bool use_calcite,
761 
763  const TableDescriptor* td,
764  const AccessPrivileges acess_priv);
765 
766  void execute_distributed_copy_statement(
768  const Catalog_Namespace::SessionInfo& session_info);
769 
770  TQueryResult validate_rel_alg(const std::string& query_ra, QueryStateProxy);
771 
772  std::vector<PushedDownFilterInfo> execute_rel_alg(
773  ExecutionResult& _return,
775  const std::string& query_ra,
776  const bool column_format,
777  const ExecutorDeviceType executor_device_type,
778  const int32_t first_n,
779  const int32_t at_most_n,
780  const bool just_validate,
781  const bool find_push_down_candidates,
782  const ExplainInfo& explain_info,
783  const std::optional<size_t> executor_index = std::nullopt) const;
784 
785  void execute_rel_alg_with_filter_push_down(
786  ExecutionResult& _return,
788  std::string& query_ra,
789  const bool column_format,
790  const ExecutorDeviceType executor_device_type,
791  const int32_t first_n,
792  const int32_t at_most_n,
793  const bool just_explain,
794  const bool just_calcite_explain,
795  const std::vector<PushedDownFilterInfo>& filter_push_down_requests);
796 
797  void executeDdl(TQueryResult& _return,
798  const std::string& query_ra,
799  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
800 
801  void executeDdl(ExecutionResult& _return,
802  const std::string& query_ra,
803  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
804 
805  TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog* cat,
806  const ColumnDescriptor* cd);
807  TRowDescriptor fixup_row_descriptor(const TRowDescriptor& row_desc,
809  void set_execution_mode_nolock(Catalog_Namespace::SessionInfo* session_ptr,
810  const TExecuteMode::type mode);
811  char unescape_char(std::string str);
812  import_export::CopyParams thrift_to_copyparams(const TCopyParams& cp);
813  TCopyParams copyparams_to_thrift(const import_export::CopyParams& cp);
814  void check_geospatial_files(const boost::filesystem::path file_path,
815  const import_export::CopyParams& copy_params);
816  void render_rel_alg(TRenderResult& _return,
817  const std::string& query_ra,
818  const std::string& query_str,
819  const Catalog_Namespace::SessionInfo& session_info,
820  const std::string& render_type,
821  const bool is_projection_query);
822 
823  TColumnType create_geo_column(const TDatumType::type type,
824  const std::string& name,
825  const bool is_array);
826 
827  static void convertExplain(TQueryResult& _return,
828  const ResultSet& results,
829  const bool column_format);
830  static void convertResult(TQueryResult& _return,
831  const ResultSet& results,
832  const bool column_format);
833 
834  static void convertRows(TQueryResult& _return,
835  QueryStateProxy query_state_proxy,
836  const std::vector<TargetMetaInfo>& targets,
837  const ResultSet& results,
838  const bool column_format,
839  const int32_t first_n,
840  const int32_t at_most_n);
841 
842  // Use ExecutionResult to populate a TQueryResult
843  // calls convertRows, but after some setup using session_info
844  void convertResultSet(ExecutionResult& result,
845  const Catalog_Namespace::SessionInfo& session_info,
846  const std::string& query_state_str,
847  TQueryResult& _return);
848 
849  static void createSimpleResult(TQueryResult& _return,
850  const ResultSet& results,
851  const bool column_format,
852  const std::string label);
853 
854  std::vector<TargetMetaInfo> getTargetMetaInfo(
855  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
856 
857  std::vector<std::string> getTargetNames(
858  const std::vector<TargetMetaInfo>& targets) const;
859 
860  std::vector<std::string> getTargetNames(
861  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
862 
863  void get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
864  std::vector<std::string>& visible_tables,
865  query_state::StdLog& stdlog,
866  const std::string& sql,
867  const int cursor);
868  void get_token_based_completions(std::vector<TCompletionHint>& hints,
869  query_state::StdLog& stdlog,
870  std::vector<std::string>& visible_tables,
871  const std::string& sql,
872  const int cursor);
873 
874  std::unordered_map<std::string, std::unordered_set<std::string>>
875  fill_column_names_by_table(std::vector<std::string>& table_names,
876  query_state::StdLog& stdlog);
877 
878  TDashboard get_dashboard_impl(
879  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
881  const DashboardDescriptor* dash,
882  const bool populate_state = true);
883 
884  static bool has_database_permission(const AccessPrivileges& privs,
885  const TDBObjectPermissions& permissions);
886  static bool has_table_permission(const AccessPrivileges& privs,
887  const TDBObjectPermissions& permission);
888  static bool has_dashboard_permission(const AccessPrivileges& privs,
889  const TDBObjectPermissions& permissions);
890  static bool has_view_permission(const AccessPrivileges& privs,
891  const TDBObjectPermissions& permissions);
892  static bool has_server_permission(const AccessPrivileges& privs,
893  const TDBObjectPermissions& permissions);
894  // For the provided upper case column names `uc_column_names`, return
895  // the tables from `table_names` which contain at least one of them.
896  // Used to rank the TABLE auto-completion hints by the columns
897  // specified in the projection.
898  std::unordered_set<std::string> get_uc_compatible_table_names_by_column(
899  const std::unordered_set<std::string>& uc_column_names,
900  std::vector<std::string>& table_names,
901  query_state::StdLog& stdlog);
902 
903  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
904  prepare_loader_generic(
905  const Catalog_Namespace::SessionInfo& session_info,
906  const std::string& table_name,
907  size_t num_cols,
908  std::unique_ptr<import_export::Loader>* loader,
909  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
910  const std::vector<std::string>& column_names,
911  std::string load_type);
912 
913  void fillGeoColumns(
914  const TSessionId& session,
915  const Catalog_Namespace::Catalog& catalog,
916  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
917  const ColumnDescriptor* cd,
918  size_t& col_idx,
919  size_t num_rows,
920  const std::string& table_name,
921  bool assign_render_groups);
922 
923  void fillMissingBuffers(
924  const TSessionId& session,
925  const Catalog_Namespace::Catalog& catalog,
926  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
927  const std::list<const ColumnDescriptor*>& cds,
928  const std::vector<int>& desc_id_to_column_id,
929  size_t num_rows,
930  const std::string& table_name,
931  bool assign_render_groups);
932 
934  std::unique_ptr<Catalog_Namespace::SessionsStore> sessions_store_;
935  std::unordered_map<std::string, Catalog_Namespace::SessionInfoPtr> calcite_sessions_;
937 
938  bool super_user_rights_; // default is "false"; setting to "true"
939  // ignores passwd checks in "connect(..)"
940  // method
941  const int idle_session_duration_; // max duration of idle session
942  const int max_session_duration_; // max duration of session
943 
944  const bool enable_rendering_;
947  const unsigned renderer_vulkan_timeout_;
951  const size_t reserved_gpu_mem_;
953  const size_t render_mem_bytes_;
954  const size_t num_reader_threads_;
955 #ifdef ENABLE_GEOS
956  const std::string& libgeos_so_filename_;
957 #endif
959  const std::string& udf_filename_;
960  const std::string& clang_path_;
961  const std::vector<std::string>& clang_options_;
962 
964  std::string table;
965  std::string file_name;
967  std::string partitions;
968  };
969 
971  std::unordered_map<std::string, DeferredCopyFromState> was_deferred_copy_from;
973 
974  std::optional<DeferredCopyFromState> operator()(const std::string& session_id) {
975  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
976  auto itr = was_deferred_copy_from.find(session_id);
977  if (itr == was_deferred_copy_from.end()) {
978  return std::nullopt;
979  }
980  return itr->second;
981  }
982 
983  void add(const std::string& session_id, const DeferredCopyFromState& state) {
984  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
985  const auto ret = was_deferred_copy_from.insert(std::make_pair(session_id, state));
986  CHECK(ret.second);
987  }
988 
989  void remove(const std::string& session_id) {
990  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
991  was_deferred_copy_from.erase(session_id);
992  }
993  };
995 
996  // Only for IPC device memory deallocation
997  mutable std::mutex handle_to_dev_ptr_mutex_;
998  mutable std::unordered_map<std::string, std::string> ipc_handle_to_dev_ptr_;
999 
1000  friend void run_warmup_queries(std::shared_ptr<DBHandler> handler,
1001  std::string base_path,
1002  std::string query_file_path);
1003 
1004  friend class RenderHandler::Impl;
1005  friend class HeavyDBAggHandler;
1006  friend class HeavyDBLeafHandler;
1007 
1008  std::map<const std::string, const PermissionFuncPtr> permissionFuncMap_ = {
1009  {"database"s, has_database_permission},
1010  {"dashboard"s, has_dashboard_permission},
1011  {"table"s, has_table_permission},
1012  {"view"s, has_view_permission},
1013  {"server"s, has_server_permission}};
1014 
1015  void check_and_invalidate_sessions(Parser::DDLStmt* ddl);
1016 
1017  std::string const createInMemoryCalciteSession(
1018  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr);
1019  void removeInMemoryCalciteSession(const std::string& session_id);
1020 
1021  ExecutionResult getUserSessions(
1022  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1023 
1024  // getQueries returns a set of queries queued in the DB
1025  // that belongs to the same DB in the caller's session
1026 
1027  ExecutionResult getQueries(
1028  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1029 
1030  void get_queries_info(std::vector<TQueryInfo>& _return,
1031  const TSessionId& session) override;
1032 
1033  // this function passes the interrupt request to the DB executor
1034  void interruptQuery(const Catalog_Namespace::SessionInfo& session_info,
1035  const std::string& target_session);
1036 
1037  void alterSystemClear(const std::string& sesson_id,
1039  const std::string& cache_type,
1040  int64_t& execution_time_ms);
1041 
1042  void alterSession(const std::string& sesson_id,
1044  const std::pair<std::string, std::string>& session_parameter,
1045  int64_t& execution_time_ms);
1046 
1047  // render group assignment
1048 
1049  enum class AssignRenderGroupsMode { kNone, kAssign, kCleanUp };
1050 
1051  void loadTableBinaryColumnarInternal(
1052  const TSessionId& session,
1053  const std::string& table_name,
1054  const std::vector<TColumn>& cols,
1055  const std::vector<std::string>& column_names,
1056  const AssignRenderGroupsMode assign_render_groups_mode);
1057 
1058  TRole::type getServerRole() const;
1059 
1060  using RenderGroupAssignmentColumnMap =
1061  std::unordered_map<std::string,
1062  std::unique_ptr<import_export::RenderGroupAnalyzer>>;
1064  std::unordered_map<std::string, RenderGroupAssignmentColumnMap>;
1066  std::unordered_map<TSessionId, RenderGroupAssignmentTableMap>;
1070 
1071  void importGeoTableGlobFilterSort(const TSessionId& session,
1072  const std::string& table_name,
1073  const std::string& file_name,
1074  const import_export::CopyParams& copy_params,
1075  const TRowDescriptor& row_desc,
1076  const TCreateParams& create_params);
1077 
1078  void importGeoTableSingle(const TSessionId& session,
1079  const std::string& table_name,
1080  const std::string& file_name,
1081  const import_export::CopyParams& copy_params,
1082  const TRowDescriptor& row_desc,
1083  const TCreateParams& create_params);
1084 };
std::lock_guard< T > lock_guard
Classes used to wrap parser calls for calcite redirection.
AssignRenderGroupsMode
Definition: DBHandler.h:1049
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:627
auto get_users(SysCatalog &syscat, std::unique_ptr< SqliteConnector > &sqliteConnector, const int32_t dbId=-1)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:961
boost::filesystem::path import_path_
Definition: DBHandler.h:629
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:651
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:271
std::unordered_map< std::string, RenderGroupAssignmentColumnMap > RenderGroupAssignmentTableMap
Definition: DBHandler.h:1064
ClientProtocol
const std::string & udf_filename_
Definition: DBHandler.h:959
std::string cat(Ts &&...args)
const int render_oom_retry_threshold_
Definition: DBHandler.h:949
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
void run_warmup_queries(std::shared_ptr< DBHandler > handler, std::string base_path, std::string query_file_path)
Definition: HeavyDB.cpp:206
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:997
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:654
static thread_local std::string client_address
Definition: DBHandler.h:155
ExecutorDeviceType
std::mutex render_group_assignment_mutex_
Definition: DBHandler.h:1068
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:983
bool user_can_access_table(const Catalog_Namespace::SessionInfo &session_info, const TableDescriptor *td, const AccessPrivileges access_priv)
bool(*)(const AccessPrivileges &, const TDBObjectPermissions &) PermissionFuncPtr
Definition: DBHandler.h:105
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:156
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:625
std::unordered_map< TSessionId, RenderGroupAssignmentTableMap > RenderGroupAnalyzerSessionMap
Definition: DBHandler.h:1066
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:947
const std::string base_data_path_
Definition: DBHandler.h:628
const bool jit_debug_
Definition: DBHandler.h:633
const bool check_origin_
Definition: DBHandler.h:159
const size_t render_mem_bytes_
Definition: DBHandler.h:953
std::map< TSessionId, std::shared_ptr< Catalog_Namespace::SessionInfo >> SessionMap
Definition: DBHandler.h:104
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:994
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:641
const bool renderer_use_ppll_polys_
Definition: DBHandler.h:945
import_export::CopyParams copy_params
Definition: DBHandler.h:966
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:935
This file contains the class specification and related data structures for Catalog.
std::mutex render_mutex_
Definition: DBHandler.h:640
static size_t max_bytes_for_thrift()
Definition: DBHandler.h:203
query_state::QueryStates query_states_
Definition: DBHandler.h:933
Supported runtime functions management and retrieval.
const size_t reserved_gpu_mem_
Definition: DBHandler.h:951
std::optional< DeferredCopyFromState > operator()(const std::string &session_id)
Definition: DBHandler.h:974
Classes representing a parse tree.
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:952
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4440
GetTablesType
Definition: Catalog.h:63
const int max_session_duration_
Definition: DBHandler.h:942
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:630
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:626
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:958
const std::string & clang_path_
Definition: DBHandler.h:960
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:645
Checked json field retrieval.
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:644
SystemParameters & system_parameters_
Definition: DBHandler.h:643
const size_t num_reader_threads_
Definition: DBHandler.h:954
specifies the content in-memory of a row in the column metadata table
const bool enable_auto_clear_render_mem_
Definition: DBHandler.h:948
const bool renderer_prefer_igpu_
Definition: DBHandler.h:946
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:936
std::map< std::string, std::string > get_device_parameters(bool cpu_only)
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4435
const bool allow_loop_joins_
Definition: DBHandler.h:637
heavyai::shared_mutex sessions_mutex_
Definition: DBHandler.h:639
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:646
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1069
const bool enable_rendering_
Definition: DBHandler.h:944
std::unordered_map< std::string, DeferredCopyFromState > was_deferred_copy_from
Definition: DBHandler.h:971
const bool intel_jit_profile_
Definition: DBHandler.h:634
bool super_user_rights_
Definition: DBHandler.h:938
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:934
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:648
void shutdown()
Definition: Logger.cpp:344
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:623
const bool read_only_
Definition: DBHandler.h:636
bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol > in, std::shared_ptr<::apache::thrift::protocol::TProtocol > out, void *connectionContext) override
Definition: DBHandler.h:120
const bool legacy_syntax_
Definition: DBHandler.h:649
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:647
#define CHECK(condition)
Definition: Logger.h:222
QueryId query_id()
Definition: Logger.cpp:473
const int idle_session_duration_
Definition: DBHandler.h:941
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:998
RenderGroupAnalyzerSessionMap render_group_assignment_map_
Definition: DBHandler.h:1067
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:950
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
bool allow_multifrag_
Definition: DBHandler.h:635
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
const AuthMetadata & authMetadata_
Definition: DBHandler.h:642
TrackingProcessor(std::shared_ptr< HeavyIf > handler, const bool check_origin)
Definition: DBHandler.h:117
bool cpu_mode_only_
Definition: DBHandler.h:638
std::default_random_engine random_gen_
Definition: DBHandler.h:631
std::uniform_int_distribution< int64_t > session_id_dist_
Definition: DBHandler.h:632
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27