OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DBHandler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * File: DBHandler.h
19  * Author: michael
20  *
21  * Created on Jan 1, 2017, 12:40 PM
22  */
23 
24 #pragma once
25 
26 #include "LeafAggregator.h"
27 
28 #ifdef HAVE_PROFILER
29 #include <gperftools/heap-profiler.h>
30 #endif // HAVE_PROFILER
31 
32 #include "Calcite/Calcite.h"
33 #include "Catalog/Catalog.h"
35 #include "Geospatial/Transforms.h"
36 #include "ImportExport/Importer.h"
37 #include "LockMgr/LockMgr.h"
38 #include "Logger/Logger.h"
39 #include "Parser/ParserWrapper.h"
41 #include "Parser/parser.h"
44 #include "QueryEngine/Execute.h"
50 #include "Shared/StringTransform.h"
53 #include "Shared/mapd_shared_ptr.h"
54 #include "Shared/measure.h"
55 #include "Shared/scope.h"
61 
62 #include <sys/types.h>
63 #include <thrift/server/TServer.h>
64 #include <thrift/transport/THttpTransport.h>
65 #include <thrift/transport/TSocket.h>
66 #include <thrift/transport/TTransport.h>
67 #include <atomic>
68 #include <boost/algorithm/string.hpp>
69 #include <boost/algorithm/string/replace.hpp>
70 #include <boost/algorithm/string/trim.hpp>
71 #include <boost/filesystem.hpp>
72 #include <boost/make_shared.hpp>
73 #include <boost/noncopyable.hpp>
74 #include <boost/none_t.hpp>
75 #include <boost/optional.hpp>
76 #include <boost/program_options.hpp>
77 #include <boost/regex.hpp>
78 #include <boost/tokenizer.hpp>
79 #include <cmath>
80 #include <csignal>
81 #include <fstream>
82 #include <list>
83 #include <map>
84 #include <memory>
85 #include <mutex>
86 #include <random>
87 #include <regex>
88 #include <string>
89 #include <thread>
90 #include <typeinfo>
91 #include <unordered_map>
92 
93 #include "gen-cpp/OmniSci.h"
94 #include "gen-cpp/extension_functions_types.h"
95 
96 using namespace std::string_literals;
97 
98 class MapDAggHandler;
99 class MapDLeafHandler;
100 
101 // Multiple concurrent requests for the same session can occur. For that reason, each
102 // request briefly takes a lock to make a copy of the appropriate SessionInfo object. Then
103 // it releases the lock and uses the copy for the remainder of the request.
104 using SessionMap = std::map<TSessionId, std::shared_ptr<Catalog_Namespace::SessionInfo>>;
105 using PermissionFuncPtr = bool (*)(const AccessPrivileges&, const TDBObjectPermissions&);
107 
108 class TrackingProcessor : public OmniSciProcessor {
109  public:
110  TrackingProcessor(mapd::shared_ptr<OmniSciIf> handler, const bool check_origin)
111  : OmniSciProcessor(handler), check_origin_(check_origin) {}
112 
113  bool process(mapd::shared_ptr<::apache::thrift::protocol::TProtocol> in,
114  mapd::shared_ptr<::apache::thrift::protocol::TProtocol> out,
115  void* connectionContext) {
116  using namespace ::apache::thrift;
117 
118  auto transport = in->getTransport();
119  if (transport && check_origin_) {
120  static std::mutex processor_mutex;
121  std::lock_guard lock(processor_mutex);
122  const auto origin_str = transport->getOrigin();
123  std::vector<std::string> origins;
124  boost::split(origins, origin_str, boost::is_any_of(","));
125  if (origins.empty()) {
127  } else {
128  // Take the first origin, which should be the client IP before any intermediate
129  // servers (e.g. the web server)
130  auto trimmed_origin = origins.front();
131  boost::algorithm::trim(trimmed_origin);
132  TrackingProcessor::client_address = trimmed_origin;
133  }
134  if (dynamic_cast<transport::THttpTransport*>(transport.get())) {
136  } else if (dynamic_cast<transport::TBufferedTransport*>(transport.get())) {
138  } else {
140  }
141  } else {
143  }
144 
145  return OmniSciProcessor::process(in, out, connectionContext);
146  }
147 
148  static thread_local std::string client_address;
149  static thread_local ClientProtocol client_protocol;
150 
151  private:
152  const bool check_origin_;
153 };
154 
155 struct DiskCacheConfig;
156 
157 class DBHandler : public OmniSciIf {
158  public:
159  DBHandler(const std::vector<LeafHostInfo>& db_leaves,
160  const std::vector<LeafHostInfo>& string_leaves,
161  const std::string& base_data_path,
162  const bool allow_multifrag,
163  const bool jit_debug,
164  const bool intel_jit_profile,
165  const bool read_only,
166  const bool allow_loop_joins,
167  const bool enable_rendering,
168  const bool renderer_use_vulkan_driver,
169  const bool enable_auto_clear_render_mem,
170  const int render_oom_retry_threshold,
171  const size_t render_mem_bytes,
172  const size_t max_concurrent_render_sessions,
173  const size_t reserved_gpu_mem,
174  const bool render_compositor_use_last_gpu,
175  const size_t num_reader_threads,
176  const AuthMetadata& authMetadata,
177  SystemParameters& system_parameters,
178  const bool legacy_syntax,
179  const int idle_session_duration,
180  const int max_session_duration,
181  const bool enable_runtime_udf_registration,
182  const std::string& udf_filename,
183  const std::string& clang_path,
184  const std::vector<std::string>& clang_options,
185 #ifdef ENABLE_GEOS
186  const std::string& libgeos_so_filename,
187 #endif
188  const DiskCacheConfig& disk_cache_config,
189  const bool is_new_db);
190  void initialize(const bool is_new_db);
191  ~DBHandler() override;
192 
193  static inline size_t max_bytes_for_thrift() { return 2 * 1000 * 1000 * 1000LL; }
194 
195  // Important ****
196  // This block must be keep in sync with mapd.thrift and HAHandler.h
197  // Please keep in same order for easy check and cut and paste
198  // Important ****
199  static void parser_with_error_handler(
200  const std::string& query_str,
201  std::list<std::unique_ptr<Parser::Stmt>>& parse_trees);
202 
203  void krb5_connect(TKrb5Session& session,
204  const std::string& token,
205  const std::string& dbname) override;
206  // connection, admin
207  void connect(TSessionId& session,
208  const std::string& username,
209  const std::string& passwd,
210  const std::string& dbname) override;
211  void disconnect(const TSessionId& session) override;
212  void switch_database(const TSessionId& session, const std::string& dbname) override;
213  void clone_session(TSessionId& session2, const TSessionId& session1) override;
214  void get_server_status(TServerStatus& _return, const TSessionId& session) override;
215  void get_status(std::vector<TServerStatus>& _return,
216  const TSessionId& session) override;
217  void get_hardware_info(TClusterHardwareInfo& _return,
218  const TSessionId& session) override;
219 
220  bool hasTableAccessPrivileges(const TableDescriptor* td,
221  const Catalog_Namespace::SessionInfo& session_info);
222  void get_tables(std::vector<std::string>& _return, const TSessionId& session) override;
223  void get_physical_tables(std::vector<std::string>& _return,
224  const TSessionId& session) override;
225  void get_views(std::vector<std::string>& _return, const TSessionId& session) override;
226  void get_tables_meta(std::vector<TTableMeta>& _return,
227  const TSessionId& session) override;
228  void get_table_details(TTableDetails& _return,
229  const TSessionId& session,
230  const std::string& table_name) override;
231  void get_internal_table_details(TTableDetails& _return,
232  const TSessionId& session,
233  const std::string& table_name) override;
234  void get_users(std::vector<std::string>& _return, const TSessionId& session) override;
235  void get_databases(std::vector<TDBInfo>& _return, const TSessionId& session) override;
236 
237  void get_version(std::string& _return) override;
238  void start_heap_profile(const TSessionId& session) override;
239  void stop_heap_profile(const TSessionId& session) override;
240  void get_heap_profile(std::string& _return, const TSessionId& session) override;
241  void get_memory(std::vector<TNodeMemoryInfo>& _return,
242  const TSessionId& session,
243  const std::string& memory_level) override;
244  void clear_cpu_memory(const TSessionId& session) override;
245  void clear_gpu_memory(const TSessionId& session) override;
246  void set_cur_session(const TSessionId& parent_session,
247  const TSessionId& leaf_session,
248  const std::string& start_time_str,
249  const std::string& label) override;
250  void invalidate_cur_session(const TSessionId& parent_session,
251  const TSessionId& leaf_session,
252  const std::string& start_time_str,
253  const std::string& label) override;
254  void set_table_epoch(const TSessionId& session,
255  const int db_id,
256  const int table_id,
257  const int new_epoch) override;
258  void set_table_epoch_by_name(const TSessionId& session,
259  const std::string& table_name,
260  const int new_epoch) override;
261  int32_t get_table_epoch(const TSessionId& session,
262  const int32_t db_id,
263  const int32_t table_id) override;
264  int32_t get_table_epoch_by_name(const TSessionId& session,
265  const std::string& table_name) override;
266  void get_table_epochs(std::vector<TTableEpochInfo>& _return,
267  const TSessionId& session,
268  const int32_t db_id,
269  const int32_t table_id) override;
270  void set_table_epochs(const TSessionId& session,
271  const int32_t db_id,
272  const std::vector<TTableEpochInfo>& table_epochs) override;
273 
274  void get_session_info(TSessionInfo& _return, const TSessionId& session) override;
275 
276  void sql_execute(ExecutionResult& _return,
277  const TSessionId& session,
278  const std::string& query,
279  const bool column_format,
280  const int32_t first_n,
281  const int32_t at_most_n);
282  // query, render
283  void sql_execute(TQueryResult& _return,
284  const TSessionId& session,
285  const std::string& query,
286  const bool column_format,
287  const std::string& nonce,
288  const int32_t first_n,
289  const int32_t at_most_n) override;
290  void get_completion_hints(std::vector<TCompletionHint>& hints,
291  const TSessionId& session,
292  const std::string& sql,
293  const int cursor) override;
294  // TODO(miyu): merge the following two data frame APIs.
295  void sql_execute_df(TDataFrame& _return,
296  const TSessionId& session,
297  const std::string& query,
298  const TDeviceType::type device_type,
299  const int32_t device_id,
300  const int32_t first_n,
301  const TArrowTransport::type transport_method) override;
302  void sql_execute_gdf(TDataFrame& _return,
303  const TSessionId& session,
304  const std::string& query,
305  const int32_t device_id,
306  const int32_t first_n) override;
307  void deallocate_df(const TSessionId& session,
308  const TDataFrame& df,
309  const TDeviceType::type device_type,
310  const int32_t device_id) override;
311  void interrupt(const TSessionId& query_session,
312  const TSessionId& interrupt_session) override;
313  void sql_validate(TRowDescriptor& _return,
314  const TSessionId& session,
315  const std::string& query) override;
316 
317  void set_execution_mode(const TSessionId& session,
318  const TExecuteMode::type mode) override;
319  void render_vega(TRenderResult& _return,
320  const TSessionId& session,
321  const int64_t widget_id,
322  const std::string& vega_json,
323  const int32_t compression_level,
324  const std::string& nonce) override;
325  void get_result_row_for_pixel(
326  TPixelTableRowResult& _return,
327  const TSessionId& session,
328  const int64_t widget_id,
329  const TPixel& pixel,
330  const std::map<std::string, std::vector<std::string>>& table_col_names,
331  const bool column_format,
332  const int32_t pixel_radius,
333  const std::string& nonce) override;
334 
335  // dashboards
336  void get_dashboard(TDashboard& _return,
337  const TSessionId& session,
338  int32_t dashboard_id) override;
339  void get_dashboards(std::vector<TDashboard>& _return,
340  const TSessionId& session) override;
341  int32_t create_dashboard(const TSessionId& session,
342  const std::string& dashboard_name,
343  const std::string& dashboard_state,
344  const std::string& image_hash,
345  const std::string& dashboard_metadata) override;
346  void replace_dashboard(const TSessionId& session,
347  const int32_t dashboard_id,
348  const std::string& dashboard_name,
349  const std::string& dashboard_owner,
350  const std::string& dashboard_state,
351  const std::string& image_hash,
352  const std::string& dashboard_metadata) override;
353  void delete_dashboard(const TSessionId& session, const int32_t dashboard_id) override;
354  void share_dashboards(const TSessionId& session,
355  const std::vector<int32_t>& dashboard_ids,
356  const std::vector<std::string>& groups,
357  const TDashboardPermissions& permissions) override;
358  void delete_dashboards(const TSessionId& session,
359  const std::vector<int32_t>& dashboard_ids) override;
360  void share_dashboard(const TSessionId& session,
361  const int32_t dashboard_id,
362  const std::vector<std::string>& groups,
363  const std::vector<std::string>& objects,
364  const TDashboardPermissions& permissions,
365  const bool grant_role) override;
366  void unshare_dashboards(const TSessionId& session,
367  const std::vector<int32_t>& dashboard_ids,
368  const std::vector<std::string>& groups,
369  const TDashboardPermissions& permissions) override;
370  void unshare_dashboard(const TSessionId& session,
371  const int32_t dashboard_id,
372  const std::vector<std::string>& groups,
373  const std::vector<std::string>& objects,
374  const TDashboardPermissions& permissions) override;
375  void get_dashboard_grantees(std::vector<TDashboardGrantees>& _return,
376  const TSessionId& session,
377  int32_t dashboard_id) override;
378 
379  void get_link_view(TFrontendView& _return,
380  const TSessionId& session,
381  const std::string& link) override;
382  void create_link(std::string& _return,
383  const TSessionId& session,
384  const std::string& view_state,
385  const std::string& view_metadata) override;
386  // import
387  void load_table_binary(const TSessionId& session,
388  const std::string& table_name,
389  const std::vector<TRow>& rows,
390  const std::vector<std::string>& column_names) override;
391 
392  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
393  prepare_columnar_loader(
394  const Catalog_Namespace::SessionInfo& session_info,
395  const std::string& table_name,
396  size_t num_cols,
397  std::unique_ptr<import_export::Loader>* loader,
398  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
399  const std::vector<std::string>& column_names);
400 
401  void load_table_binary_columnar(const TSessionId& session,
402  const std::string& table_name,
403  const std::vector<TColumn>& cols,
404  const std::vector<std::string>& column_names) override;
405  void load_table_binary_columnar_polys(const TSessionId& session,
406  const std::string& table_name,
407  const std::vector<TColumn>& cols,
408  const std::vector<std::string>& column_names,
409  const bool assign_render_groups) override;
410  void load_table_binary_arrow(const TSessionId& session,
411  const std::string& table_name,
412  const std::string& arrow_stream,
413  const bool use_column_names) override;
414 
415  void load_table(const TSessionId& session,
416  const std::string& table_name,
417  const std::vector<TStringRow>& rows,
418  const std::vector<std::string>& column_names) override;
419  void detect_column_types(TDetectResult& _return,
420  const TSessionId& session,
421  const std::string& file_name,
422  const TCopyParams& copy_params) override;
423  void create_table(const TSessionId& session,
424  const std::string& table_name,
425  const TRowDescriptor& row_desc,
426  const TFileType::type file_type,
427  const TCreateParams& create_params) override;
428  void import_table(const TSessionId& session,
429  const std::string& table_name,
430  const std::string& file_name,
431  const TCopyParams& copy_params) override;
432  void import_geo_table(const TSessionId& session,
433  const std::string& table_name,
434  const std::string& file_name,
435  const TCopyParams& copy_params,
436  const TRowDescriptor& row_desc,
437  const TCreateParams& create_params) override;
438  void import_table_status(TImportStatus& _return,
439  const TSessionId& session,
440  const std::string& import_id) override;
441  void get_first_geo_file_in_archive(std::string& _return,
442  const TSessionId& session,
443  const std::string& archive_path,
444  const TCopyParams& copy_params) override;
445  void get_all_files_in_archive(std::vector<std::string>& _return,
446  const TSessionId& session,
447  const std::string& archive_path,
448  const TCopyParams& copy_params) override;
449  void get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
450  const TSessionId& session,
451  const std::string& file_name,
452  const TCopyParams& copy_params) override;
453  // distributed
454  int64_t query_get_outer_fragment_count(const TSessionId& session,
455  const std::string& select_query) override;
456 
457  void check_table_consistency(TTableMeta& _return,
458  const TSessionId& session,
459  const int32_t table_id) override;
460  void start_query(TPendingQuery& _return,
461  const TSessionId& leaf_session,
462  const TSessionId& parent_session,
463  const std::string& query_ra,
464  const std::string& start_time_str,
465  const bool just_explain,
466  const std::vector<int64_t>& outer_fragment_indices) override;
467  void execute_query_step(TStepResult& _return,
468  const TPendingQuery& pending_query,
469  const TSubqueryId subquery_id,
470  const std::string& start_time_str) override;
471  void broadcast_serialized_rows(const TSerializedRows& serialized_rows,
472  const TRowDescriptor& row_desc,
473  const TQueryId query_id,
474  const TSubqueryId subquery_id,
475  const bool is_final_subquery_result) override;
476 
477  void start_render_query(TPendingRenderQuery& _return,
478  const TSessionId& session,
479  const int64_t widget_id,
480  const int16_t node_idx,
481  const std::string& vega_json) override;
482  void execute_next_render_step(TRenderStepResult& _return,
483  const TPendingRenderQuery& pending_render,
484  const TRenderAggDataMap& merged_data) override;
485 
486  void insert_data(const TSessionId& session, const TInsertData& insert_data) override;
487  void checkpoint(const TSessionId& session, const int32_t table_id) override;
488  // DB Object Privileges
489  void get_roles(std::vector<std::string>& _return, const TSessionId& session) override;
490  bool has_role(const TSessionId& sessionId,
491  const std::string& granteeName,
492  const std::string& roleName) override;
493  bool has_object_privilege(const TSessionId& sessionId,
494  const std::string& granteeName,
495  const std::string& objectName,
496  const TDBObjectType::type object_type,
497  const TDBObjectPermissions& permissions) override;
498  void get_db_objects_for_grantee(std::vector<TDBObject>& _return,
499  const TSessionId& session,
500  const std::string& roleName) override;
501  void get_db_object_privs(std::vector<TDBObject>& _return,
502  const TSessionId& session,
503  const std::string& objectName,
504  const TDBObjectType::type type) override;
505  void get_all_roles_for_user(std::vector<std::string>& _return,
506  const TSessionId& session,
507  const std::string& granteeName) override;
508  std::vector<std::string> get_valid_groups(const TSessionId& session,
509  int32_t dashboard_id,
510  std::vector<std::string> groups);
511  // licensing
512  void set_license_key(TLicenseInfo& _return,
513  const TSessionId& session,
514  const std::string& key,
515  const std::string& nonce) override;
516  void get_license_claims(TLicenseInfo& _return,
517  const TSessionId& session,
518  const std::string& nonce) override;
519  // user-defined functions
520  /*
521  Returns a mapping of device (CPU, GPU) parameters (name, LLVM IR
522  triplet, features, etc)
523  */
524  void get_device_parameters(std::map<std::string, std::string>& _return,
525  const TSessionId& session) override;
526 
527  /*
528  Register Runtime Extension Functions (UDFs, UDTFs) with given
529  signatures. The extension functions implementations are given in a
530  mapping of a device and the corresponding LLVM/NVVM IR string.
531  */
532 
533  void register_runtime_extension_functions(
534  const TSessionId& session,
535  const std::vector<TUserDefinedFunction>& udfs,
536  const std::vector<TUserDefinedTableFunction>& udtfs,
537  const std::map<std::string, std::string>& device_ir_map) override;
538 
539  // end of sync block for HAHandler and mapd.thrift
540 
541  void shutdown();
542  void emergency_shutdown();
543 
544  TSessionId getInvalidSessionId() const;
545 
546  void internal_connect(TSessionId& session,
547  const std::string& username,
548  const std::string& dbname);
549 
550  bool isAggregator() const;
551 
552  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
553 
555  std::vector<LeafHostInfo> db_leaves_;
556  std::vector<LeafHostInfo> string_leaves_;
557  const std::string base_data_path_;
558  boost::filesystem::path import_path_;
560  std::default_random_engine random_gen_;
561  std::uniform_int_distribution<int64_t> session_id_dist_;
562  const bool jit_debug_;
563  const bool intel_jit_profile_;
565  const bool read_only_;
566  const bool allow_loop_joins_;
569  std::mutex render_mutex_;
570  int64_t start_time_;
573  std::unique_ptr<RenderHandler> render_handler_;
574  std::unique_ptr<MapDAggHandler> agg_handler_;
575  std::unique_ptr<MapDLeafHandler> leaf_handler_;
576  std::shared_ptr<Calcite> calcite_;
577  const bool legacy_syntax_;
578 
579  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
580 
581  template <typename... ARGS>
582  std::shared_ptr<query_state::QueryState> create_query_state(ARGS&&... args) {
583  return query_states_.create(std::forward<ARGS>(args)...);
584  }
585 
586  // Exactly one immutable SessionInfo copy should be taken by a typical request.
587  Catalog_Namespace::SessionInfo get_session_copy(const TSessionId& session);
588  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_copy_ptr(
589  const TSessionId& session);
590 
591  void get_tables_meta_impl(std::vector<TTableMeta>& _return,
592  QueryStateProxy query_state_proxy,
593  const Catalog_Namespace::SessionInfo& session_info,
594  const bool with_table_locks = true);
595 
596  private:
597  std::atomic<bool> initialized_{false};
598  std::shared_ptr<Catalog_Namespace::SessionInfo> create_new_session(
599  TSessionId& session,
600  const std::string& dbname,
601  const Catalog_Namespace::UserMetadata& user_meta,
602  std::shared_ptr<Catalog_Namespace::Catalog> cat);
603  void connect_impl(TSessionId& session,
604  const std::string& passwd,
605  const std::string& dbname,
606  const Catalog_Namespace::UserMetadata& user_meta,
607  std::shared_ptr<Catalog_Namespace::Catalog> cat,
608  query_state::StdLog& stdlog);
609  void disconnect_impl(const SessionMap::iterator& session_it,
610  mapd_unique_lock<mapd_shared_mutex>& write_lock);
611  void check_table_load_privileges(const TSessionId& session,
612  const std::string& table_name);
613  void check_table_load_privileges(const Catalog_Namespace::SessionInfo& session_info,
614  const std::string& table_name);
615  void get_tables_impl(std::vector<std::string>& table_names,
617  const GetTablesType get_tables_type);
618  void get_table_details_impl(TTableDetails& _return,
619  query_state::StdLog& stdlog,
620  const std::string& table_name,
621  const bool get_system,
622  const bool get_physical);
623  void check_read_only(const std::string& str);
624  void check_session_exp_unsafe(const SessionMap::iterator& session_it);
625  void validateGroups(const std::vector<std::string>& groups);
626  void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo& session_info,
627  const std::vector<int32_t>& dashboard_ids);
628  void shareOrUnshareDashboards(const TSessionId& session,
629  const std::vector<int32_t>& dashboard_ids,
630  const std::vector<std::string>& groups,
631  const TDashboardPermissions& permissions,
632  const bool do_share);
633 
634  // Use get_session_copy() or get_session_copy_ptr() instead of get_const_session_ptr()
635  // unless you know what you are doing. If you need to save a SessionInfo beyond the
636  // present Thrift call, then the safe way to do this is by saving the return value of
637  // get_const_session_ptr() as a std::weak_ptr.
638  // Returns empty std::shared_ptr if session.empty().
639  std::shared_ptr<const Catalog_Namespace::SessionInfo> get_const_session_ptr(
640  const TSessionId& session);
641  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_ptr(
642  const TSessionId& session_id);
643  template <typename SESSION_MAP_LOCK>
644  SessionMap::iterator get_session_it_unsafe(const TSessionId& session,
645  SESSION_MAP_LOCK& lock);
646  template <typename SESSION_MAP_LOCK>
647  void expire_idle_sessions_unsafe(SESSION_MAP_LOCK& lock);
648  static void value_to_thrift_column(const TargetValue& tv,
649  const SQLTypeInfo& ti,
650  TColumn& column);
651  static TDatum value_to_thrift(const TargetValue& tv, const SQLTypeInfo& ti);
652  static std::string apply_copy_to_shim(const std::string& query_str);
653 
654  std::pair<TPlanResult, lockmgr::LockedTableDescriptors> parse_to_ra(
656  const std::string& query_str,
657  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
658  const bool acquire_locks,
659  const SystemParameters& system_parameters,
660  bool check_privileges = true);
661 
662  void sql_execute_local(
663  TQueryResult& _return,
664  const QueryStateProxy& query_state_proxy,
665  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
666  const std::string& query_str,
667  const bool column_format,
668  const std::string& nonce,
669  const int32_t first_n,
670  const int32_t at_most_n,
671  const bool use_calcite);
672 
673  int64_t process_geo_copy_from(const TSessionId& session_id);
674 
675  static void convertData(TQueryResult& _return,
677  const QueryStateProxy& query_state_proxy,
678  const std::string& query_str,
679  const bool column_format,
680  const int32_t first_n,
681  const int32_t at_most_n);
682 
683  void sql_execute_impl(ExecutionResult& _return,
685  const bool column_format,
686  const ExecutorDeviceType executor_device_type,
687  const int32_t first_n,
688  const int32_t at_most_n,
689  const bool use_calcite);
690 
691  bool user_can_access_table(const Catalog_Namespace::SessionInfo&,
692  const TableDescriptor* td,
693  const AccessPrivileges acess_priv);
694 
695  void execute_distributed_copy_statement(
697  const Catalog_Namespace::SessionInfo& session_info);
698 
699  TQueryResult validate_rel_alg(const std::string& query_ra, QueryStateProxy);
700 
701  std::vector<PushedDownFilterInfo> execute_rel_alg(
702  ExecutionResult& _return,
704  const std::string& query_ra,
705  const bool column_format,
706  const ExecutorDeviceType executor_device_type,
707  const int32_t first_n,
708  const int32_t at_most_n,
709  const bool just_validate,
710  const bool find_push_down_candidates,
711  const ExplainInfo& explain_info,
712  const std::optional<size_t> executor_index = std::nullopt) const;
713 
714  void execute_rel_alg_with_filter_push_down(
715  ExecutionResult& _return,
717  std::string& query_ra,
718  const bool column_format,
719  const ExecutorDeviceType executor_device_type,
720  const int32_t first_n,
721  const int32_t at_most_n,
722  const bool just_explain,
723  const bool just_calcite_explain,
724  const std::vector<PushedDownFilterInfo>& filter_push_down_requests);
725 
726  void execute_rel_alg_df(TDataFrame& _return,
727  const std::string& query_ra,
728  QueryStateProxy query_state_proxy,
729  const Catalog_Namespace::SessionInfo& session_info,
730  const ExecutorDeviceType device_type,
731  const size_t device_id,
732  const int32_t first_n,
733  const TArrowTransport::type transport_method) const;
734 
735  void executeDdl(TQueryResult& _return,
736  const std::string& query_ra,
737  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
738 
739  void executeDdl(ExecutionResult& _return,
740  const std::string& query_ra,
741  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
742 
743  TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog* cat,
744  const ColumnDescriptor* cd);
745  TRowDescriptor fixup_row_descriptor(const TRowDescriptor& row_desc,
747  void set_execution_mode_nolock(Catalog_Namespace::SessionInfo* session_ptr,
748  const TExecuteMode::type mode);
749  char unescape_char(std::string str);
750  import_export::CopyParams thrift_to_copyparams(const TCopyParams& cp);
751  TCopyParams copyparams_to_thrift(const import_export::CopyParams& cp);
752  void check_geospatial_files(const boost::filesystem::path file_path,
753  const import_export::CopyParams& copy_params);
754  void render_rel_alg(TRenderResult& _return,
755  const std::string& query_ra,
756  const std::string& query_str,
757  const Catalog_Namespace::SessionInfo& session_info,
758  const std::string& render_type,
759  const bool is_projection_query);
760 
761  TColumnType create_geo_column(const TDatumType::type type,
762  const std::string& name,
763  const bool is_array);
764 
765  static void convertExplain(TQueryResult& _return,
766  const ResultSet& results,
767  const bool column_format);
768  static void convertResult(TQueryResult& _return,
769  const ResultSet& results,
770  const bool column_format);
771 
772  static void convertRows(TQueryResult& _return,
773  QueryStateProxy query_state_proxy,
774  const std::vector<TargetMetaInfo>& targets,
775  const ResultSet& results,
776  const bool column_format,
777  const int32_t first_n,
778  const int32_t at_most_n);
779 
780  // Use ExecutionResult to populate a TQueryResult
781  // calls convertRows, but after some setup using session_info
782  void convertResultSet(ExecutionResult& result,
783  const Catalog_Namespace::SessionInfo& session_info,
784  const std::string& query_state_str,
785  TQueryResult& _return);
786 
787  static void createSimpleResult(TQueryResult& _return,
788  const ResultSet& results,
789  const bool column_format,
790  const std::string label);
791 
792  std::vector<TargetMetaInfo> getTargetMetaInfo(
793  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
794 
795  std::vector<std::string> getTargetNames(
796  const std::vector<TargetMetaInfo>& targets) const;
797 
798  std::vector<std::string> getTargetNames(
799  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
800 
801  void get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
802  std::vector<std::string>& visible_tables,
803  query_state::StdLog& stdlog,
804  const std::string& sql,
805  const int cursor);
806  void get_token_based_completions(std::vector<TCompletionHint>& hints,
807  query_state::StdLog& stdlog,
808  std::vector<std::string>& visible_tables,
809  const std::string& sql,
810  const int cursor);
811 
812  std::unordered_map<std::string, std::unordered_set<std::string>>
813  fill_column_names_by_table(std::vector<std::string>& table_names,
814  query_state::StdLog& stdlog);
815 
816  ConnectionInfo getConnectionInfo() const;
817 
818  TDashboard get_dashboard_impl(
819  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
821  const DashboardDescriptor* dash,
822  const bool populate_state = true);
823 
824  static bool has_database_permission(const AccessPrivileges& privs,
825  const TDBObjectPermissions& permissions);
826  static bool has_table_permission(const AccessPrivileges& privs,
827  const TDBObjectPermissions& permission);
828  static bool has_dashboard_permission(const AccessPrivileges& privs,
829  const TDBObjectPermissions& permissions);
830  static bool has_view_permission(const AccessPrivileges& privs,
831  const TDBObjectPermissions& permissions);
832  static bool has_server_permission(const AccessPrivileges& privs,
833  const TDBObjectPermissions& permissions);
834  // For the provided upper case column names `uc_column_names`, return
835  // the tables from `table_names` which contain at least one of them.
836  // Used to rank the TABLE auto-completion hints by the columns
837  // specified in the projection.
838  std::unordered_set<std::string> get_uc_compatible_table_names_by_column(
839  const std::unordered_set<std::string>& uc_column_names,
840  std::vector<std::string>& table_names,
841  query_state::StdLog& stdlog);
842 
843  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
844  prepare_loader_generic(
845  const Catalog_Namespace::SessionInfo& session_info,
846  const std::string& table_name,
847  size_t num_cols,
848  std::unique_ptr<import_export::Loader>* loader,
849  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
850  const std::vector<std::string>& column_names,
851  std::string load_type);
852 
855 
856  bool super_user_rights_; // default is "false"; setting to "true"
857  // ignores passwd checks in "connect(..)"
858  // method
859  const int idle_session_duration_; // max duration of idle session
860  const int max_session_duration_; // max duration of session
861 
863 
864  const bool enable_rendering_;
869  const size_t reserved_gpu_mem_;
871  const size_t render_mem_bytes_;
872  const size_t num_reader_threads_;
873 #ifdef ENABLE_GEOS
874  const std::string& libgeos_so_filename_;
875 #endif
877  const std::string& udf_filename_;
878  const std::string& clang_path_;
879  const std::vector<std::string>& clang_options_;
880 
882  std::string geo_copy_from_table;
886  };
887 
889  std::unordered_map<std::string, GeoCopyFromState> was_geo_copy_from;
891 
892  std::optional<GeoCopyFromState> operator()(const std::string& session_id) {
893  std::lock_guard<std::mutex> map_lock(geo_copy_from_mutex);
894  auto itr = was_geo_copy_from.find(session_id);
895  if (itr == was_geo_copy_from.end()) {
896  return std::nullopt;
897  }
898  return itr->second;
899  }
900 
901  void add(const std::string& session_id, const GeoCopyFromState& state) {
902  std::lock_guard<std::mutex> map_lock(geo_copy_from_mutex);
903  const auto ret = was_geo_copy_from.insert(std::make_pair(session_id, state));
904  CHECK(ret.second);
905  }
906 
907  void remove(const std::string& session_id) {
908  std::lock_guard<std::mutex> map_lock(geo_copy_from_mutex);
909  was_geo_copy_from.erase(session_id);
910  }
911  };
913 
914  // Only for IPC device memory deallocation
915  mutable std::mutex handle_to_dev_ptr_mutex_;
916  mutable std::unordered_map<std::string, std::string> ipc_handle_to_dev_ptr_;
917 
918  friend void run_warmup_queries(mapd::shared_ptr<DBHandler> handler,
919  std::string base_path,
920  std::string query_file_path);
921 
922  friend class RenderHandler::Impl;
923  friend class MapDAggHandler;
924  friend class MapDLeafHandler;
925 
926  std::map<const std::string, const PermissionFuncPtr> permissionFuncMap_ = {
927  {"database"s, has_database_permission},
928  {"dashboard"s, has_dashboard_permission},
929  {"table"s, has_table_permission},
930  {"view"s, has_view_permission},
931  {"server"s, has_server_permission}};
932 
933  void check_and_invalidate_sessions(Parser::DDLStmt* ddl);
934 
935  template <typename STMT_TYPE>
936  void invalidate_sessions(std::string& name, STMT_TYPE* stmt) {
937  using namespace Parser;
938  auto is_match = [&](auto session_it) {
940  return boost::iequals(name,
941  session_it->second->getCatalog().getCurrentDB().dbName);
943  return boost::iequals(name, session_it->second->get_currentUser().userName);
944  }
945  return false;
946  };
947  auto check_and_remove_sessions = [&]() {
948  for (auto it = sessions_.begin(); it != sessions_.end();) {
949  if (is_match(it)) {
950  it = sessions_.erase(it);
951  } else {
952  ++it;
953  }
954  }
955  };
956  check_and_remove_sessions();
957  }
958 
959  std::string const createInMemoryCalciteSession(
960  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr);
961  bool isInMemoryCalciteSession(const Catalog_Namespace::UserMetadata user_meta);
962  void removeInMemoryCalciteSession(const std::string& session_id);
963 
964  ExecutionResult getUserSessions(
965  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
966 
967  // getQueries returns a set of queries queued in the DB
968  // that belongs to the same DB in the caller's session
969 
970  ExecutionResult getQueries(
971  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
972 
973  // this function passes the interrupt request to the DB executor
974  void interruptQuery(const Catalog_Namespace::SessionInfo& session_info,
975  const std::string& target_session);
976 
977  // render group assignment
978 
979  enum class AssignRenderGroupsMode { kNone, kAssign, kCleanUp };
980 
981  void load_table_binary_columnar_internal(
982  const TSessionId& session,
983  const std::string& table_name,
984  const std::vector<TColumn>& cols,
985  const std::vector<std::string>& column_names,
986  const AssignRenderGroupsMode assign_render_groups_mode);
987 
988  using RenderGroupAssignmentColumnMap =
989  std::unordered_map<std::string,
990  std::unique_ptr<import_export::RenderGroupAnalyzer>>;
992  std::unordered_map<std::string, RenderGroupAssignmentColumnMap>;
994  std::unordered_map<TSessionId, RenderGroupAssignmentTableMap>;
997 };
Classes used to wrap parser calls for calcite redirection.
AssignRenderGroupsMode
Definition: DBHandler.h:979
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:556
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:879
boost::filesystem::path import_path_
Definition: DBHandler.h:558
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:579
bool process(mapd::shared_ptr<::apache::thrift::protocol::TProtocol > in, mapd::shared_ptr<::apache::thrift::protocol::TProtocol > out, void *connectionContext)
Definition: DBHandler.h:113
std::unordered_map< std::string, RenderGroupAssignmentColumnMap > RenderGroupAssignmentTableMap
Definition: DBHandler.h:992
const bool runtime_udf_registration_enabled_
Definition: DBHandler.h:862
ClientProtocol
const std::string & udf_filename_
Definition: DBHandler.h:877
std::unordered_map< std::string, GeoCopyFromState > was_geo_copy_from
Definition: DBHandler.h:889
std::string cat(Ts &&...args)
const int render_oom_retry_threshold_
Definition: DBHandler.h:867
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:915
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:582
static thread_local std::string client_address
Definition: DBHandler.h:148
ExecutorDeviceType
std::mutex render_group_assignment_mutex_
Definition: DBHandler.h:996
bool(*)(const AccessPrivileges &, const TDBObjectPermissions &) PermissionFuncPtr
Definition: DBHandler.h:105
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:149
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:554
string name
Definition: setup.in.py:62
std::unordered_map< TSessionId, RenderGroupAssignmentTableMap > RenderGroupAnalyzerSessionMap
Definition: DBHandler.h:994
const std::string base_data_path_
Definition: DBHandler.h:557
const bool jit_debug_
Definition: DBHandler.h:562
const bool check_origin_
Definition: DBHandler.h:152
const size_t render_mem_bytes_
Definition: DBHandler.h:871
std::string geo_copy_from_table
Definition: DBHandler.h:882
const bool renderer_use_vulkan_driver_
Definition: DBHandler.h:865
std::map< TSessionId, std::shared_ptr< Catalog_Namespace::SessionInfo >> SessionMap
Definition: DBHandler.h:104
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:570
void run_warmup_queries(mapd::shared_ptr< DBHandler > handler, std::string base_path, std::string query_file_path)
Definition: MapDServer.cpp:203
mapd_shared_mutex sessions_mutex_
Definition: DBHandler.h:568
This file contains the class specification and related data structures for Catalog.
std::mutex render_mutex_
Definition: DBHandler.h:569
static size_t max_bytes_for_thrift()
Definition: DBHandler.h:193
query_state::QueryStates query_states_
Definition: DBHandler.h:853
const size_t reserved_gpu_mem_
Definition: DBHandler.h:869
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:870
GetTablesType
Definition: Catalog.h:61
std::shared_timed_mutex mapd_shared_mutex
const int max_session_duration_
Definition: DBHandler.h:860
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:559
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:555
std::string geo_copy_from_file_name
Definition: DBHandler.h:883
const std::string & clang_path_
Definition: DBHandler.h:878
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:573
SystemParameters & system_parameters_
Definition: DBHandler.h:572
const size_t num_reader_threads_
Definition: DBHandler.h:872
specifies the content in-memory of a row in the column metadata table
TrackingProcessor(mapd::shared_ptr< OmniSciIf > handler, const bool check_origin)
Definition: DBHandler.h:110
const DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:876
std::unique_ptr< MapDLeafHandler > leaf_handler_
Definition: DBHandler.h:575
const bool enable_auto_clear_render_mem_
Definition: DBHandler.h:866
std::map< std::string, std::string > get_device_parameters(bool cpu_only)
const bool allow_loop_joins_
Definition: DBHandler.h:566
const bool enable_rendering_
Definition: DBHandler.h:864
GeoCopyFromSessions geo_copy_from_sessions
Definition: DBHandler.h:912
const bool intel_jit_profile_
Definition: DBHandler.h:563
bool super_user_rights_
Definition: DBHandler.h:856
void add(const std::string &session_id, const GeoCopyFromState &state)
Definition: DBHandler.h:901
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:576
void shutdown()
Definition: Logger.cpp:314
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:552
const bool read_only_
Definition: DBHandler.h:565
const bool legacy_syntax_
Definition: DBHandler.h:577
auto get_users(std::unique_ptr< SqliteConnector > &sqliteConnector, const int32_t dbId=-1)
#define CHECK(condition)
Definition: Logger.h:197
const int idle_session_duration_
Definition: DBHandler.h:859
std::unique_ptr< MapDAggHandler > agg_handler_
Definition: DBHandler.h:574
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:916
RenderGroupAnalyzerSessionMap render_group_assignment_map_
Definition: DBHandler.h:995
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:868
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
bool allow_multifrag_
Definition: DBHandler.h:564
mapd_unique_lock< mapd_shared_mutex > write_lock
const AuthMetadata & authMetadata_
Definition: DBHandler.h:571
std::optional< GeoCopyFromState > operator()(const std::string &session_id)
Definition: DBHandler.h:892
std::string geo_copy_from_partitions
Definition: DBHandler.h:885
void invalidate_sessions(std::string &name, STMT_TYPE *stmt)
Definition: DBHandler.h:936
bool cpu_mode_only_
Definition: DBHandler.h:567
std::default_random_engine random_gen_
Definition: DBHandler.h:560
SessionMap sessions_
Definition: DBHandler.h:854
import_export::CopyParams geo_copy_from_copy_params
Definition: DBHandler.h:884
std::uniform_int_distribution< int64_t > session_id_dist_
Definition: DBHandler.h:561
std::string apply_copy_to_shim(const std::string &query_str)