OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRunner.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERY_RUNNER_H
18 #define QUERY_RUNNER_H
19 
20 #include <fstream>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 
25 #include "Catalog/SessionInfo.h"
26 #include "Catalog/SysCatalog.h"
28 #include "LeafAggregator.h"
35 #include "QueryEngine/QueryHint.h"
37 #include "QueryEngine/RelAlgDag.h"
40 #include "Shared/SysDefinitions.h"
42 
43 namespace Catalog_Namespace {
44 class Catalog;
45 struct UserMetadata;
46 } // namespace Catalog_Namespace
47 
48 class ResultSet;
49 class ExecutionResult;
50 
51 namespace Parser {
52 class Stmt;
53 class CopyTableStmt;
54 } // namespace Parser
55 
57 
58 namespace import_export {
59 class Loader;
60 }
61 
62 class Calcite;
63 
64 namespace QueryRunner {
65 
67  std::shared_ptr<const RelAlgNode> root_node;
68  std::vector<unsigned> left_deep_trees_id;
69  std::unordered_map<unsigned, JoinQualsPerNestingLevel> left_deep_trees_info;
70  std::shared_ptr<RelAlgTranslator> rel_alg_translator;
71 };
72 
73 // Keep original values of data recycler related flags
74 // and restore them when QR instance is destructed
75 // Our test environment checks various bare-metal components of the system
76 // including computing various relational operations and expressions,
77 // building hash table, and so on
78 // Thus, unless we explicitly test those cases, we must disable all of them
79 // in the test framework by default
80 // Since we enable data recycler and hash table recycler by default,
81 // we keep them as is, but disable resultset recycler and its relevant
82 // stuffs to keep our testing environment as is
84  public:
94  }
95 
97  // restore the flag values
102  }
103 
104  private:
109 };
110 
114  ALL // CLEAN + DIRTY
115 };
116 
118  size_t num_buffers;
119  size_t num_bytes;
120  size_t num_tables;
121  size_t num_columns;
123  size_t num_chunks;
124 
125  void print() const {
126  std::cout << std::endl
127  << std::endl
128  << "------------ Buffer Pool Stats ------------" << std::endl;
129  std::cout << "Num buffers: " << num_buffers << std::endl;
130  std::cout << "Num bytes: " << num_bytes << std::endl;
131  std::cout << "Num tables: " << num_tables << std::endl;
132  std::cout << "Num columns: " << num_columns << std::endl;
133  std::cout << "Num fragments: " << num_fragments << std::endl;
134  std::cout << "Num chunks: " << num_chunks << std::endl;
135  std::cout << "--------------------------------------------" << std::endl << std::endl;
136  }
137 };
138 
139 class QueryRunner {
140  public:
141  static QueryRunner* init(const char* db_path,
142  const std::string& udf_filename = "",
143  const size_t max_gpu_mem = 0, // use all available mem
144  const int reserved_gpu_mem = 256 << 20);
145 
146  static QueryRunner* init(const File_Namespace::DiskCacheConfig* disk_cache_config,
147  const char* db_path,
148  const std::vector<LeafHostInfo>& string_servers = {},
149  const std::vector<LeafHostInfo>& leaf_servers = {});
150 
151  static QueryRunner* init(const char* db_path,
152  const std::vector<LeafHostInfo>& string_servers,
153  const std::vector<LeafHostInfo>& leaf_servers) {
154  return QueryRunner::init(db_path,
156  "HyperInteractive",
158  string_servers,
159  leaf_servers);
160  }
161 
162  static QueryRunner* init(const char* db_path,
163  const std::string& user,
164  const std::string& pass,
165  const std::string& db_name,
166  const std::vector<LeafHostInfo>& string_servers,
167  const std::vector<LeafHostInfo>& leaf_servers,
168  const std::string& udf_filename = "",
169  bool uses_gpus = true,
170  const size_t max_gpu_mem = 0, // use all available mem
171  const int reserved_gpu_mem = 256 << 20,
172  const bool create_user = false,
173  const bool create_db = false,
174  const File_Namespace::DiskCacheConfig* config = nullptr);
175 
176  static QueryRunner* init(std::unique_ptr<Catalog_Namespace::SessionInfo>& session) {
177  qr_instance_.reset(new QueryRunner(std::move(session)));
178  return qr_instance_.get();
179  }
180 
181  static QueryRunner* get() {
182  if (!qr_instance_) {
183  throw std::runtime_error("QueryRunner must be initialized before calling get().");
184  }
185  return qr_instance_.get();
186  }
187 
188  static void reset();
189 
190  std::shared_ptr<Catalog_Namespace::SessionInfo> getSession() const {
191  return session_info_;
192  }
193 
194  void addSessionId(const std::string& session_id,
196  auto user_info = session_info_->get_currentUser();
197  session_info_ = std::make_unique<Catalog_Namespace::SessionInfo>(
198  session_info_->get_catalog_ptr(), user_info, device_type, session_id);
199  }
200 
201  void clearSessionId() { session_info_ = nullptr; }
202 
203  std::shared_ptr<Catalog_Namespace::Catalog> getCatalog() const;
204  std::shared_ptr<Calcite> getCalcite() const;
205  std::shared_ptr<Executor> getExecutor() const;
207 
208  bool gpusPresent() const;
209  virtual void clearGpuMemory() const;
210  virtual void clearCpuMemory() const;
211  std::vector<MemoryInfo> getMemoryInfo(
212  const Data_Namespace::MemoryLevel memory_level) const;
214  const bool current_db_only) const;
215 
216  virtual std::unique_ptr<Parser::Stmt> createStatement(const std::string&);
217  virtual void runDDLStatement(const std::string&);
218  virtual void validateDDLStatement(const std::string&);
219 
220  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
222  ExecutionOptions eo);
223  virtual std::shared_ptr<ExecutionResult> runSelectQuery(const std::string& query_str,
225  ExecutionOptions eo);
226  static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins = true,
227  bool just_explain = false);
228 
229  // TODO: Refactor away functions such as runSQL() and runSelectQuery() with arbitrary
230  // parameters that grow over time. Instead, pass CompilationOptions and
231  // ExecutionOptions which can be extended without changing the function signatures.
232  // Why?
233  // * Functions with a large number of parameters are hard to maintain and error-prone.
234  // * "Default arguments are banned on virtual functions"
235  // https://google.github.io/styleguide/cppguide.html#Default_Arguments
236  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
237  const ExecutorDeviceType device_type,
238  const bool hoist_literals = true,
239  const bool allow_loop_joins = true);
240  virtual std::shared_ptr<ExecutionResult> runSelectQuery(
241  const std::string& query_str,
242  const ExecutorDeviceType device_type,
243  const bool hoist_literals,
244  const bool allow_loop_joins,
245  const bool just_explain = false);
246  virtual std::shared_ptr<ResultSet> runSQLWithAllowingInterrupt(
247  const std::string& query_str,
248  const std::string& session_id,
249  const ExecutorDeviceType device_type,
250  const double running_query_check_freq = 0.9,
251  const unsigned pending_query_check_freq = 1000);
252 
253  virtual std::vector<std::shared_ptr<ResultSet>> runMultipleStatements(
254  const std::string&,
255  const ExecutorDeviceType);
256  virtual void runImport(Parser::CopyTableStmt* import_stmt);
257  virtual std::unique_ptr<import_export::Loader> getLoader(
258  const TableDescriptor* td) const;
259 
260  RegisteredQueryHint getParsedQueryHint(const std::string&);
262  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>>
263  getParsedQueryHints(const std::string& query_str);
264  std::shared_ptr<const RelAlgNode> getRootNodeFromParsedQuery(
265  const std::string& query_str);
266  std::optional<RegisteredQueryHint> getParsedGlobalQueryHints(
267  const std::string& query_str);
268  RaExecutionSequence getRaExecutionSequence(const std::string& query_str);
269  virtual std::shared_ptr<ResultSet> getCalcitePlan(const std::string& query_str,
270  bool enable_watchdog,
271  bool is_explain_as_json_str,
272  bool is_explain_detailed) const;
273 
274  std::tuple<QueryPlanHash,
275  std::shared_ptr<HashTable>,
276  std::optional<HashtableCacheMetaInfo>>
277  getCachedHashtableWithoutCacheKey(std::set<size_t>& visited,
278  CacheItemType hash_table_type,
279  DeviceIdentifier device_identifier);
280  std::shared_ptr<CacheItemMetric> getCacheItemMetric(QueryPlanHash cache_key,
281  CacheItemType hash_table_type,
282  DeviceIdentifier device_identifier);
283  size_t getNumberOfCachedItem(CacheItemStatus item_status,
284  CacheItemType hash_table_type,
285  bool with_bbox_intersect_tuning_param = false) const;
286 
287  void resizeDispatchQueue(const size_t num_executors);
288 
290 
291  std::shared_ptr<RelAlgTranslator> getRelAlgTranslator(const std::string&, Executor*);
292 
293  ExtractedQueryPlanDag extractQueryPlanDag(const std::string&);
294 
295  std::unique_ptr<RelAlgDag> getRelAlgDag(const std::string&);
296 
297  QueryRunner(std::unique_ptr<Catalog_Namespace::SessionInfo> session);
298 
299  virtual ~QueryRunner() = default;
300 
302 
303  template <typename... Ts>
304  static std::shared_ptr<query_state::QueryState> create_query_state(Ts&&... args) {
305  return query_states_.create(std::forward<Ts>(args)...);
306  }
307 
308  void setExplainType(const ExecutorExplainType explain_type) {
309  explain_type_ = explain_type;
310  }
311 
312  protected:
313  QueryRunner(const char* db_path,
314  const std::string& user,
315  const std::string& pass,
316  const std::string& db_name,
317  const std::vector<LeafHostInfo>& string_servers,
318  const std::vector<LeafHostInfo>& leaf_servers,
319  const std::string& udf_filename,
320  bool uses_gpus,
321  const size_t max_gpu_mem,
322  const int reserved_gpu_mem,
323  const bool create_user,
324  const bool create_db,
325  const File_Namespace::DiskCacheConfig* disk_cache_config = nullptr);
326  static std::unique_ptr<QueryRunner> qr_instance_;
327 
329 
331  std::shared_ptr<Catalog_Namespace::SessionInfo> session_info_;
332  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
333  std::shared_ptr<QueryEngine> query_engine_;
334 };
335 
336 class ImportDriver : public QueryRunner {
337  public:
338  ImportDriver(std::shared_ptr<Catalog_Namespace::Catalog> cat,
341  const std::string session_id = "");
342 
343  void importGeoTable(const std::string& file_path,
344  const std::string& table_name,
345  const bool compression,
346  const bool create_table,
347  const bool explode_collections);
348 };
349 
350 } // namespace QueryRunner
351 
352 #endif // QUERY_RUNNER_H
void setExplainType(const ExecutorExplainType explain_type)
Definition: QueryRunner.h:308
static query_state::QueryStates query_states_
Definition: QueryRunner.h:301
size_t DeviceIdentifier
Definition: DataRecycler.h:129
std::shared_ptr< Catalog_Namespace::SessionInfo > getSession() const
Definition: QueryRunner.h:190
bool g_use_query_resultset_cache
Definition: Execute.cpp:156
std::string cat(Ts &&...args)
void addSessionId(const std::string &session_id, ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
Definition: QueryRunner.h:194
std::optional< RegisteredQueryHint > getParsedGlobalQueryHints(const std::string &query_str)
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: QueryRunner.h:332
virtual void clearGpuMemory() const
ImportDriver(std::shared_ptr< Catalog_Namespace::Catalog > cat, const Catalog_Namespace::UserMetadata &user, const ExecutorDeviceType dt=ExecutorDeviceType::GPU, const std::string session_id="")
bool g_allow_query_step_skipping
Definition: Execute.cpp:159
ExecutorExplainType explain_type_
Definition: QueryRunner.h:328
virtual std::unique_ptr< Parser::Stmt > createStatement(const std::string &)
virtual std::vector< std::shared_ptr< ResultSet > > runMultipleStatements(const std::string &, const ExecutorDeviceType)
CircleBuffer::value_type create(ARGS &&...args)
Definition: QueryState.h:194
static QueryRunner * init(const char *db_path, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers)
Definition: QueryRunner.h:151
Catalog_Namespace::DBMetadata db_metadata_
Definition: QueryRunner.h:330
static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins=true, bool just_explain=false)
ExecutorDeviceType
virtual std::shared_ptr< ResultSet > runSQL(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:157
std::shared_ptr< CacheItemMetric > getCacheItemMetric(QueryPlanHash cache_key, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
BufferPoolStats getBufferPoolStats(const Data_Namespace::MemoryLevel memory_level, const bool current_db_only) const
std::shared_ptr< QueryEngine > query_engine_
Definition: QueryRunner.h:333
This file contains the class specification and related data structures for SysCatalog.
std::shared_ptr< const RelAlgNode > getRootNodeFromParsedQuery(const std::string &query_str)
CacheItemType
Definition: DataRecycler.h:38
const std::string kDefaultDbName
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
A container for relational algebra descriptors defining the execution order for a relational algebra ...
virtual std::unique_ptr< import_export::Loader > getLoader(const TableDescriptor *td) const
QueryRunner(std::unique_ptr< Catalog_Namespace::SessionInfo > session)
virtual void runImport(Parser::CopyTableStmt *import_stmt)
virtual void runDDLStatement(const std::string &)
static std::unique_ptr< QueryRunner > qr_instance_
Definition: QueryRunner.h:326
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:158
std::unique_ptr< RelAlgDag > getRelAlgDag(const std::string &)
ExtractedQueryPlanDag extractQueryPlanDag(const std::string &)
RegisteredQueryHint getParsedQueryHint(const std::string &)
virtual void validateDDLStatement(const std::string &)
void importGeoTable(const std::string &file_path, const std::string &table_name, const bool compression, const bool create_table, const bool explode_collections)
std::shared_ptr< const RelAlgNode > root_node
Definition: QueryRunner.h:67
std::shared_ptr< RelAlgTranslator > getRelAlgTranslator(const std::string &, Executor *)
const std::string kRootUsername
std::vector< unsigned > left_deep_trees_id
Definition: QueryRunner.h:68
std::shared_ptr< Executor > getExecutor() const
virtual void clearCpuMemory() const
virtual std::shared_ptr< ResultSet > getCalcitePlan(const std::string &query_str, bool enable_watchdog, bool is_explain_as_json_str, bool is_explain_detailed) const
std::shared_ptr< Catalog_Namespace::SessionInfo > session_info_
Definition: QueryRunner.h:331
size_t QueryPlanHash
virtual std::shared_ptr< ExecutionResult > runSelectQuery(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.cpp:87
static std::shared_ptr< query_state::QueryState > create_query_state(Ts &&...args)
Definition: QueryRunner.h:304
std::shared_ptr< Calcite > getCalcite() const
size_t getNumberOfCachedItem(CacheItemStatus item_status, CacheItemType hash_table_type, bool with_bbox_intersect_tuning_param=false) const
QueryPlanDagInfo getQueryInfoForDataRecyclerTest(const std::string &)
void resizeDispatchQueue(const size_t num_executors)
virtual std::shared_ptr< ResultSet > runSQLWithAllowingInterrupt(const std::string &query_str, const std::string &session_id, const ExecutorDeviceType device_type, const double running_query_check_freq=0.9, const unsigned pending_query_check_freq=1000)
static QueryRunner * init(std::unique_ptr< Catalog_Namespace::SessionInfo > &session)
Definition: QueryRunner.h:176
std::shared_ptr< RelAlgTranslator > rel_alg_translator
Definition: QueryRunner.h:70
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > getCachedHashtableWithoutCacheKey(std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
std::optional< std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > > getParsedQueryHints(const std::string &query_str)
RaExecutionSequence getRaExecutionSequence(const std::string &query_str)
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
ExecutorExplainType
std::vector< MemoryInfo > getMemoryInfo(const Data_Namespace::MemoryLevel memory_level) const
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:64
virtual ~QueryRunner()=default
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_trees_info
Definition: QueryRunner.h:69
Catalog_Namespace::UserMetadata & getUserMetadata() const