OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryRunner.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERY_RUNNER_H
18 #define QUERY_RUNNER_H
19 
20 #include <fstream>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 
25 #include "Catalog/SessionInfo.h"
26 #include "Catalog/SysCatalog.h"
28 #include "LeafAggregator.h"
34 #include "QueryEngine/QueryHint.h"
36 
37 namespace Catalog_Namespace {
38 class Catalog;
39 struct UserMetadata;
40 } // namespace Catalog_Namespace
41 
42 class ResultSet;
43 class ExecutionResult;
44 
45 namespace Parser {
46 class CopyTableStmt;
47 }
48 
50 
51 namespace import_export {
52 class Loader;
53 }
54 
55 class Calcite;
56 
57 namespace QueryRunner {
58 
59 class QueryRunner {
60  public:
61  static QueryRunner* init(const char* db_path,
62  const std::string& udf_filename = "",
63  const size_t max_gpu_mem = 0, // use all available mem
64  const int reserved_gpu_mem = 256 << 20);
65 
66  static QueryRunner* init(const char* db_path,
67  const std::vector<LeafHostInfo>& string_servers,
68  const std::vector<LeafHostInfo>& leaf_servers) {
69  return QueryRunner::init(db_path,
70  std::string{OMNISCI_ROOT_USER},
71  "HyperInteractive",
72  std::string{OMNISCI_DEFAULT_DB},
73  string_servers,
74  leaf_servers);
75  }
76 
77  static QueryRunner* init(const char* db_path,
78  const std::string& user,
79  const std::string& pass,
80  const std::string& db_name,
81  const std::vector<LeafHostInfo>& string_servers,
82  const std::vector<LeafHostInfo>& leaf_servers,
83  const std::string& udf_filename = "",
84  bool uses_gpus = true,
85  const size_t max_gpu_mem = 0, // use all available mem
86  const int reserved_gpu_mem = 256 << 20,
87  const bool create_user = false,
88  const bool create_db = false);
89 
90  static QueryRunner* init(std::unique_ptr<Catalog_Namespace::SessionInfo>& session) {
91  qr_instance_.reset(new QueryRunner(std::move(session)));
92  return qr_instance_.get();
93  }
94 
95  static QueryRunner* get() {
96  if (!qr_instance_) {
97  throw std::runtime_error("QueryRunner must be initialized before calling get().");
98  }
99  return qr_instance_.get();
100  }
101 
102  static void reset();
103 
104  std::shared_ptr<Catalog_Namespace::SessionInfo> getSession() const {
105  return session_info_;
106  }
107 
108  void addSessionId(const std::string& session_id,
110  session_info_ =
111  std::make_unique<Catalog_Namespace::SessionInfo>(session_info_->get_catalog_ptr(),
112  session_info_->get_currentUser(),
113  device_type,
114  session_id);
115  }
116 
117  void clearSessionId() { session_info_ = nullptr; }
118 
119  std::shared_ptr<Catalog_Namespace::Catalog> getCatalog() const;
120  std::shared_ptr<Calcite> getCalcite() const;
121  std::shared_ptr<Executor> getExecutor() const;
123 
124  bool gpusPresent() const;
125  virtual void clearGpuMemory() const;
126  virtual void clearCpuMemory() const;
127 
128  virtual void runDDLStatement(const std::string&);
129 
130  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
132  ExecutionOptions eo);
133  virtual std::shared_ptr<ExecutionResult> runSelectQuery(const std::string& query_str,
135  ExecutionOptions eo);
136  static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins = true,
137  bool just_explain = false);
138 
139  // TODO: Refactor away functions such as runSQL() and runSelectQuery() with arbitrary
140  // parameters that grow over time. Instead, pass CompilationOptions and
141  // ExecutionOptions which can be extended without changing the function signatures.
142  // Why?
143  // * Functions with a large number of parameters are hard to maintain and error-prone.
144  // * "Default arguments are banned on virtual functions"
145  // https://google.github.io/styleguide/cppguide.html#Default_Arguments
146  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
147  const ExecutorDeviceType device_type,
148  const bool hoist_literals = true,
149  const bool allow_loop_joins = true);
150  virtual std::shared_ptr<ExecutionResult> runSelectQuery(
151  const std::string& query_str,
152  const ExecutorDeviceType device_type,
153  const bool hoist_literals,
154  const bool allow_loop_joins,
155  const bool just_explain = false);
156  virtual std::shared_ptr<ResultSet> runSQLWithAllowingInterrupt(
157  const std::string& query_str,
158  const std::string& session_id,
159  const ExecutorDeviceType device_type,
160  const double running_query_check_freq = 0.9,
161  const unsigned pending_query_check_freq = 1000);
162 
163  virtual std::vector<std::shared_ptr<ResultSet>> runMultipleStatements(
164  const std::string&,
165  const ExecutorDeviceType);
166  virtual RegisteredQueryHint getParsedQueryHint(const std::string&);
167 
168  virtual void runImport(Parser::CopyTableStmt* import_stmt);
169  virtual std::unique_ptr<import_export::Loader> getLoader(
170  const TableDescriptor* td) const;
171 
172  const int32_t* getCachedJoinHashTable(size_t idx);
173  const int8_t* getCachedBaselineHashTable(size_t idx);
174  size_t getEntryCntCachedBaselineHashTable(size_t idx);
178 
179  void resizeDispatchQueue(const size_t num_executors);
180 
181  QueryRunner(std::unique_ptr<Catalog_Namespace::SessionInfo> session);
182 
183  virtual ~QueryRunner();
184 
186 
187  template <typename... Ts>
188  static std::shared_ptr<query_state::QueryState> create_query_state(Ts&&... args) {
189  return query_states_.create(std::forward<Ts>(args)...);
190  }
191 
192  protected:
193  QueryRunner(const char* db_path,
194  const std::string& user,
195  const std::string& pass,
196  const std::string& db_name,
197  const std::vector<LeafHostInfo>& string_servers,
198  const std::vector<LeafHostInfo>& leaf_servers,
199  const std::string& udf_filename,
200  bool uses_gpus,
201  const size_t max_gpu_mem,
202  const int reserved_gpu_mem,
203  const bool create_user,
204  const bool create_db);
205 
206  static std::unique_ptr<QueryRunner> qr_instance_;
207 
208  std::shared_ptr<Catalog_Namespace::SessionInfo> session_info_;
209  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
210  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
211 };
212 
213 class ImportDriver : public QueryRunner {
214  public:
215  ImportDriver(std::shared_ptr<Catalog_Namespace::Catalog> cat,
218  const std::string session_id = "");
219 
220  void importGeoTable(const std::string& file_path,
221  const std::string& table_name,
222  const bool compression,
223  const bool create_table,
224  const bool explode_collections);
225 };
226 
227 } // namespace QueryRunner
228 
229 #endif // QUERY_RUNNER_H
static query_state::QueryStates query_states_
Definition: QueryRunner.h:185
std::shared_ptr< Catalog_Namespace::SessionInfo > getSession() const
Definition: QueryRunner.h:104
std::string cat(Ts &&...args)
void addSessionId(const std::string &session_id, ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
Definition: QueryRunner.h:108
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: QueryRunner.h:209
virtual void clearGpuMemory() const
ExecutorDeviceType
ImportDriver(std::shared_ptr< Catalog_Namespace::Catalog > cat, const Catalog_Namespace::UserMetadata &user, const ExecutorDeviceType dt=ExecutorDeviceType::GPU, const std::string session_id="")
size_t getNumberOfCachedOverlapsHashTables()
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:60
virtual std::vector< std::shared_ptr< ResultSet > > runMultipleStatements(const std::string &, const ExecutorDeviceType)
CircleBuffer::value_type create(ARGS &&...args)
Definition: QueryState.h:189
const int8_t * getCachedBaselineHashTable(size_t idx)
static QueryRunner * init(const char *db_path, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers)
Definition: QueryRunner.h:66
static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins=true, bool just_explain=false)
virtual std::shared_ptr< ResultSet > runSQL(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
This file contains the class specification and related data structures for SysCatalog.
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
const std::string OMNISCI_DEFAULT_DB
Definition: SysCatalog.h:59
virtual std::unique_ptr< import_export::Loader > getLoader(const TableDescriptor *td) const
QueryRunner(std::unique_ptr< Catalog_Namespace::SessionInfo > session)
virtual void runImport(Parser::CopyTableStmt *import_stmt)
size_t getEntryCntCachedBaselineHashTable(size_t idx)
virtual void runDDLStatement(const std::string &)
static std::unique_ptr< QueryRunner > qr_instance_
Definition: QueryRunner.h:206
size_t getNumberOfCachedBaselineJoinHashTables()
virtual RegisteredQueryHint getParsedQueryHint(const std::string &)
void importGeoTable(const std::string &file_path, const std::string &table_name, const bool compression, const bool create_table, const bool explode_collections)
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:60
std::shared_ptr< Executor > getExecutor() const
virtual void clearCpuMemory() const
std::shared_ptr< Catalog_Namespace::SessionInfo > session_info_
Definition: QueryRunner.h:208
size_t getNumberOfCachedJoinHashTables()
virtual std::shared_ptr< ExecutionResult > runSelectQuery(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.cpp:80
static std::shared_ptr< query_state::QueryState > create_query_state(Ts &&...args)
Definition: QueryRunner.h:188
std::shared_ptr< Calcite > getCalcite() const
void resizeDispatchQueue(const size_t num_executors)
virtual std::shared_ptr< ResultSet > runSQLWithAllowingInterrupt(const std::string &query_str, const std::string &session_id, const ExecutorDeviceType device_type, const double running_query_check_freq=0.9, const unsigned pending_query_check_freq=1000)
static QueryRunner * init(std::unique_ptr< Catalog_Namespace::SessionInfo > &session)
Definition: QueryRunner.h:90
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
const int32_t * getCachedJoinHashTable(size_t idx)
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: QueryRunner.h:210
Catalog_Namespace::UserMetadata & getUserMetadata() const