OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRunner.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERY_RUNNER_H
18 #define QUERY_RUNNER_H
19 
20 #include <fstream>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 
25 #include "Catalog/SessionInfo.h"
26 #include "Catalog/SysCatalog.h"
28 #include "LeafAggregator.h"
34 #include "QueryEngine/QueryHint.h"
40 
41 namespace Catalog_Namespace {
42 class Catalog;
43 struct UserMetadata;
44 } // namespace Catalog_Namespace
45 
46 class ResultSet;
47 class ExecutionResult;
48 
49 namespace Parser {
50 class DDLStmt;
51 class CopyTableStmt;
52 } // namespace Parser
53 
55 
56 namespace import_export {
57 class Loader;
58 }
59 
60 class Calcite;
61 
62 namespace QueryRunner {
63 
65  std::shared_ptr<const RelAlgNode> root_node;
66  std::vector<unsigned> left_deep_trees_id;
67  std::unordered_map<unsigned, JoinQualsPerNestingLevel> left_deep_trees_info;
68  std::shared_ptr<RelAlgTranslator> rel_alg_translator;
69 };
70 
72  size_t num_buffers;
73  size_t num_bytes;
74  size_t num_tables;
75  size_t num_columns;
76  size_t num_fragments;
77  size_t num_chunks;
78 
79  void print() const {
80  std::cout << std::endl
81  << std::endl
82  << "------------ Buffer Pool Stats ------------" << std::endl;
83  std::cout << "Num buffers: " << num_buffers << std::endl;
84  std::cout << "Num bytes: " << num_bytes << std::endl;
85  std::cout << "Num tables: " << num_tables << std::endl;
86  std::cout << "Num columns: " << num_columns << std::endl;
87  std::cout << "Num fragments: " << num_fragments << std::endl;
88  std::cout << "Num chunks: " << num_chunks << std::endl;
89  std::cout << "--------------------------------------------" << std::endl << std::endl;
90  }
91 };
92 
93 class QueryRunner {
94  public:
95  static QueryRunner* init(const char* db_path,
96  const std::string& udf_filename = "",
97  const size_t max_gpu_mem = 0, // use all available mem
98  const int reserved_gpu_mem = 256 << 20);
99 
100  static QueryRunner* init(const File_Namespace::DiskCacheConfig* disk_cache_config,
101  const char* db_path,
102  const std::vector<LeafHostInfo>& string_servers = {},
103  const std::vector<LeafHostInfo>& leaf_servers = {});
104 
105  static QueryRunner* init(const char* db_path,
106  const std::vector<LeafHostInfo>& string_servers,
107  const std::vector<LeafHostInfo>& leaf_servers) {
108  return QueryRunner::init(db_path,
109  std::string{OMNISCI_ROOT_USER},
110  "HyperInteractive",
111  std::string{OMNISCI_DEFAULT_DB},
112  string_servers,
113  leaf_servers);
114  }
115 
116  static QueryRunner* init(const char* db_path,
117  const std::string& user,
118  const std::string& pass,
119  const std::string& db_name,
120  const std::vector<LeafHostInfo>& string_servers,
121  const std::vector<LeafHostInfo>& leaf_servers,
122  const std::string& udf_filename = "",
123  bool uses_gpus = true,
124  const size_t max_gpu_mem = 0, // use all available mem
125  const int reserved_gpu_mem = 256 << 20,
126  const bool create_user = false,
127  const bool create_db = false,
128  const File_Namespace::DiskCacheConfig* config = nullptr);
129 
130  static QueryRunner* init(std::unique_ptr<Catalog_Namespace::SessionInfo>& session) {
131  qr_instance_.reset(new QueryRunner(std::move(session)));
132  return qr_instance_.get();
133  }
134 
135  static QueryRunner* get() {
136  if (!qr_instance_) {
137  throw std::runtime_error("QueryRunner must be initialized before calling get().");
138  }
139  return qr_instance_.get();
140  }
141 
142  static void reset();
143 
144  std::shared_ptr<Catalog_Namespace::SessionInfo> getSession() const {
145  return session_info_;
146  }
147 
148  void addSessionId(const std::string& session_id,
150  auto user_info = session_info_->get_currentUser();
151  session_info_ = std::make_unique<Catalog_Namespace::SessionInfo>(
152  session_info_->get_catalog_ptr(), user_info, device_type, session_id);
153  }
154 
155  void clearSessionId() { session_info_ = nullptr; }
156 
157  std::shared_ptr<Catalog_Namespace::Catalog> getCatalog() const;
158  std::shared_ptr<Calcite> getCalcite() const;
159  std::shared_ptr<Executor> getExecutor() const;
161 
162  bool gpusPresent() const;
163  virtual void clearGpuMemory() const;
164  virtual void clearCpuMemory() const;
165  std::vector<MemoryInfo> getMemoryInfo(
166  const Data_Namespace::MemoryLevel memory_level) const;
168  const bool current_db_only) const;
169 
170  virtual std::unique_ptr<Parser::DDLStmt> createDDLStatement(const std::string&);
171  virtual void runDDLStatement(const std::string&);
172  virtual void validateDDLStatement(const std::string&);
173 
174  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
176  ExecutionOptions eo);
177  virtual std::shared_ptr<ExecutionResult> runSelectQuery(const std::string& query_str,
179  ExecutionOptions eo);
180  static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins = true,
181  bool just_explain = false);
182 
183  // TODO: Refactor away functions such as runSQL() and runSelectQuery() with arbitrary
184  // parameters that grow over time. Instead, pass CompilationOptions and
185  // ExecutionOptions which can be extended without changing the function signatures.
186  // Why?
187  // * Functions with a large number of parameters are hard to maintain and error-prone.
188  // * "Default arguments are banned on virtual functions"
189  // https://google.github.io/styleguide/cppguide.html#Default_Arguments
190  virtual std::shared_ptr<ResultSet> runSQL(const std::string& query_str,
191  const ExecutorDeviceType device_type,
192  const bool hoist_literals = true,
193  const bool allow_loop_joins = true);
194  virtual std::shared_ptr<ExecutionResult> runSelectQuery(
195  const std::string& query_str,
196  const ExecutorDeviceType device_type,
197  const bool hoist_literals,
198  const bool allow_loop_joins,
199  const bool just_explain = false);
200  virtual std::shared_ptr<ResultSet> runSQLWithAllowingInterrupt(
201  const std::string& query_str,
202  const std::string& session_id,
203  const ExecutorDeviceType device_type,
204  const double running_query_check_freq = 0.9,
205  const unsigned pending_query_check_freq = 1000);
206 
207  virtual std::vector<std::shared_ptr<ResultSet>> runMultipleStatements(
208  const std::string&,
209  const ExecutorDeviceType);
210  virtual void runImport(Parser::CopyTableStmt* import_stmt);
211  virtual std::unique_ptr<import_export::Loader> getLoader(
212  const TableDescriptor* td) const;
213 
214  RegisteredQueryHint getParsedQueryHint(const std::string&);
216  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>>
217  getParsedQueryHints(const std::string& query_str);
218  std::optional<RegisteredQueryHint> getParsedGlobalQueryHints(
219  const std::string& query_str);
220  const int32_t* getCachedPerfectHashTable(QueryPlan plan_dag);
221  const int8_t* getCachedBaselineHashTable(QueryPlan plan_dag);
223  std::tuple<QueryPlanHash,
224  std::shared_ptr<HashTable>,
225  std::optional<HashtableCacheMetaInfo>>
226  getCachedHashtableWithoutCacheKey(std::set<size_t>& visited,
227  CacheItemType hash_table_type,
228  DeviceIdentifier device_identifier);
229  std::shared_ptr<CacheItemMetric> getCacheItemMetric(QueryPlanHash cache_key,
230  CacheItemType hash_table_type,
231  DeviceIdentifier device_identifier);
237 
238  void resizeDispatchQueue(const size_t num_executors);
239 
241 
242  std::shared_ptr<RelAlgTranslator> getRelAlgTranslator(const std::string&, Executor*);
243 
244  ExtractedPlanDag extractQueryPlanDag(const std::string&);
245 
246  QueryRunner(std::unique_ptr<Catalog_Namespace::SessionInfo> session);
247 
248  virtual ~QueryRunner() = default;
249 
251 
252  template <typename... Ts>
253  static std::shared_ptr<query_state::QueryState> create_query_state(Ts&&... args) {
254  return query_states_.create(std::forward<Ts>(args)...);
255  }
256 
257  void setExplainType(const ExecutorExplainType explain_type) {
258  explain_type_ = explain_type;
259  }
260 
261  protected:
262  QueryRunner(const char* db_path,
263  const std::string& user,
264  const std::string& pass,
265  const std::string& db_name,
266  const std::vector<LeafHostInfo>& string_servers,
267  const std::vector<LeafHostInfo>& leaf_servers,
268  const std::string& udf_filename,
269  bool uses_gpus,
270  const size_t max_gpu_mem,
271  const int reserved_gpu_mem,
272  const bool create_user,
273  const bool create_db,
274  const File_Namespace::DiskCacheConfig* disk_cache_config = nullptr);
275  static std::unique_ptr<QueryRunner> qr_instance_;
276 
278 
280  std::shared_ptr<Catalog_Namespace::SessionInfo> session_info_;
281  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
282  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
283 };
284 
285 class ImportDriver : public QueryRunner {
286  public:
287  ImportDriver(std::shared_ptr<Catalog_Namespace::Catalog> cat,
290  const std::string session_id = "");
291 
292  void importGeoTable(const std::string& file_path,
293  const std::string& table_name,
294  const bool compression,
295  const bool create_table,
296  const bool explode_collections);
297 };
298 
299 } // namespace QueryRunner
300 
301 #endif // QUERY_RUNNER_H
void setExplainType(const ExecutorExplainType explain_type)
Definition: QueryRunner.h:257
static query_state::QueryStates query_states_
Definition: QueryRunner.h:250
size_t DeviceIdentifier
Definition: DataRecycler.h:111
std::shared_ptr< Catalog_Namespace::SessionInfo > getSession() const
Definition: QueryRunner.h:144
std::string cat(Ts &&...args)
void addSessionId(const std::string &session_id, ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
Definition: QueryRunner.h:148
std::optional< RegisteredQueryHint > getParsedGlobalQueryHints(const std::string &query_str)
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: QueryRunner.h:281
virtual void clearGpuMemory() const
ExecutorDeviceType
virtual std::unique_ptr< Parser::DDLStmt > createDDLStatement(const std::string &)
ImportDriver(std::shared_ptr< Catalog_Namespace::Catalog > cat, const Catalog_Namespace::UserMetadata &user, const ExecutorDeviceType dt=ExecutorDeviceType::GPU, const std::string session_id="")
size_t getNumberOfCachedOverlapsHashTables()
const int32_t * getCachedPerfectHashTable(QueryPlan plan_dag)
ExecutorExplainType explain_type_
Definition: QueryRunner.h:277
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:58
virtual std::vector< std::shared_ptr< ResultSet > > runMultipleStatements(const std::string &, const ExecutorDeviceType)
CircleBuffer::value_type create(ARGS &&...args)
Definition: QueryState.h:195
size_t getNumberOfCachedOverlapsHashTablesAndTuningParams()
static QueryRunner * init(const char *db_path, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers)
Definition: QueryRunner.h:105
Catalog_Namespace::DBMetadata db_metadata_
Definition: QueryRunner.h:279
static ExecutionOptions defaultExecutionOptionsForRunSQL(bool allow_loop_joins=true, bool just_explain=false)
virtual std::shared_ptr< ResultSet > runSQL(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
std::shared_ptr< CacheItemMetric > getCacheItemMetric(QueryPlanHash cache_key, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
BufferPoolStats getBufferPoolStats(const Data_Namespace::MemoryLevel memory_level, const bool current_db_only) const
This file contains the class specification and related data structures for SysCatalog.
CacheItemType
Definition: DataRecycler.h:36
void init(LogOptions const &log_opts)
Definition: Logger.cpp:305
const std::string OMNISCI_DEFAULT_DB
Definition: SysCatalog.h:59
virtual std::unique_ptr< import_export::Loader > getLoader(const TableDescriptor *td) const
QueryRunner(std::unique_ptr< Catalog_Namespace::SessionInfo > session)
virtual void runImport(Parser::CopyTableStmt *import_stmt)
virtual void runDDLStatement(const std::string &)
static std::unique_ptr< QueryRunner > qr_instance_
Definition: QueryRunner.h:275
size_t getNumberOfCachedBaselineJoinHashTables()
size_t getNumberOfCachedPerfectHashTables()
RegisteredQueryHint getParsedQueryHint(const std::string &)
virtual void validateDDLStatement(const std::string &)
ExtractedPlanDag extractQueryPlanDag(const std::string &)
void importGeoTable(const std::string &file_path, const std::string &table_name, const bool compression, const bool create_table, const bool explode_collections)
std::shared_ptr< const RelAlgNode > root_node
Definition: QueryRunner.h:65
std::shared_ptr< RelAlgTranslator > getRelAlgTranslator(const std::string &, Executor *)
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:60
std::vector< unsigned > left_deep_trees_id
Definition: QueryRunner.h:66
std::shared_ptr< Executor > getExecutor() const
virtual void clearCpuMemory() const
size_t getEntryCntCachedBaselineHashTable(QueryPlan plan_dag)
std::shared_ptr< Catalog_Namespace::SessionInfo > session_info_
Definition: QueryRunner.h:280
size_t QueryPlanHash
virtual std::shared_ptr< ExecutionResult > runSelectQuery(const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.cpp:83
static std::shared_ptr< query_state::QueryState > create_query_state(Ts &&...args)
Definition: QueryRunner.h:253
std::shared_ptr< Calcite > getCalcite() const
QueryPlanDagInfo getQueryInfoForDataRecyclerTest(const std::string &)
void resizeDispatchQueue(const size_t num_executors)
size_t getNumberOfCachedOverlapsHashTableTuringParams()
virtual std::shared_ptr< ResultSet > runSQLWithAllowingInterrupt(const std::string &query_str, const std::string &session_id, const ExecutorDeviceType device_type, const double running_query_check_freq=0.9, const unsigned pending_query_check_freq=1000)
static QueryRunner * init(std::unique_ptr< Catalog_Namespace::SessionInfo > &session)
Definition: QueryRunner.h:130
std::shared_ptr< RelAlgTranslator > rel_alg_translator
Definition: QueryRunner.h:68
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > getCachedHashtableWithoutCacheKey(std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
std::optional< std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > > getParsedQueryHints(const std::string &query_str)
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: QueryRunner.h:282
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
ExecutorExplainType
std::string QueryPlan
std::vector< MemoryInfo > getMemoryInfo(const Data_Namespace::MemoryLevel memory_level) const
const int8_t * getCachedBaselineHashTable(QueryPlan plan_dag)
virtual ~QueryRunner()=default
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_trees_info
Definition: QueryRunner.h:67
Catalog_Namespace::UserMetadata & getUserMetadata() const