OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDagSerializer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <boost/archive/text_iarchive.hpp>
20 #include <boost/archive/text_oarchive.hpp>
21 #include <boost/serialization/access.hpp>
22 #include <boost/serialization/shared_ptr.hpp>
23 #include <boost/serialization/unique_ptr.hpp>
24 #include <boost/serialization/unordered_map.hpp>
25 #include <boost/serialization/variant.hpp>
26 #include <boost/serialization/vector.hpp>
27 
28 #include "QueryEngine/RelAlgDag.h"
36 #include "Shared/scope.h"
37 
47 // NOTE: RelTranslatedJoin is not in this list as it is a RelAlgNode only created
48 // during query execution and therefore not relevant here as RelAlgDag serialization
49 // should only be performed before query execution to avoid having to serialize any query
50 // state
51 #define REL_ALG_NODE_DERIVED_CLASSES \
52  RelScan, RelProject, RelAggregate, RelJoin, RelFilter, RelLeftDeepInnerJoin, \
53  RelCompound, RelSort, RelModify, RelTableFunction, RelLogicalValues, \
54  RelLogicalUnion
55 
56 #define REX_DERIVED_CLASSES \
57  RexAbstractInput, RexLiteral, RexOperator, RexSubQuery, RexInput, RexCase, \
58  RexFunctionOperator, RexWindowFunctionOperator, RexRef, RexAgg
59 
60 namespace {
61 
65 template <class T, class... Ts>
67  : std::bool_constant<(std::is_same_v<T, typename std::remove_cv_t<Ts>> || ...)> {};
68 
73 template <class T>
76 
77 template <class T>
79 
83 template <class T>
85 
86 template <class T>
87 inline constexpr bool is_rex_class_v = is_rex_class<T>::value;
88 
92 template <class T>
94  : std::bool_constant<is_rel_alg_node_class_v<T> || is_rex_class_v<T> ||
95  std::is_same_v<T, RelAlgDag>> {};
96 
97 template <class T>
98 inline constexpr bool all_serializable_rel_alg_classes_v =
100 
101 } // namespace
102 
119  template <class Archive,
120  class RexClass,
121  typename std::enable_if_t<is_rex_class_v<RexClass>>* = nullptr>
122  static void serialize(Archive& ar, RexClass& obj, const unsigned int version) {
123  if constexpr (std::is_same_v<Rex, RexClass>) {
124  (ar & obj.hash_);
125  } else if constexpr (std::is_same_v<RexScalar, RexClass>) {
126  (ar & boost::serialization::base_object<Rex>(obj));
127  } else if constexpr (std::is_same_v<RexAbstractInput, RexClass>) {
128  (ar & boost::serialization::base_object<RexScalar>(obj));
129  (ar & obj.in_index_);
130  } else if constexpr (std::is_same_v<RexLiteral, RexClass>) {
131  (ar & boost::serialization::base_object<RexScalar>(obj));
132  (ar & obj.literal_);
133  (ar & obj.type_);
134  (ar & obj.target_type_);
135  (ar & obj.scale_);
136  (ar & obj.precision_);
137  (ar & obj.target_scale_);
138  (ar & obj.target_precision_);
139  } else if constexpr (std::is_same_v<RexOperator, RexClass>) {
140  (ar & boost::serialization::base_object<RexScalar>(obj));
141  (ar & obj.op_);
142  (ar & obj.operands_);
143  (ar & obj.type_);
144  } else if constexpr (std::is_same_v<RexSubQuery, RexClass>) {
145  (ar & boost::serialization::base_object<RexScalar>(obj));
146  (ar & obj.type_);
147 
148  // Execution result should not be set before serialization. If it is means
149  // RelAlgExecutor got its hands on it first before serialization. This is not
150  // advised. Serialization should happen before any RelAlgExecutor processing.
151  CHECK(obj.result_);
152  CHECK(*obj.result_ == nullptr);
153 
154  // BUT we still need to serialize the RexSubQuery::result_. It is a shared_ptr of a
155  // shared_ptr. The outer shared ptr should always be defined, pointing to the
156  // interior shared_ptr that should be null. The way it is designed, this 2-tiered
157  // shared ptr acts as a link between RexSubQuery instances that were deep copied
158  // from a parent. A result should not exist, but the link should, so we need to
159  // serialize result_ (or find a better linking mechanism)
160  (ar & obj.result_);
161 
162  (ar & obj.ra_);
163  } else if constexpr (std::is_same_v<RexInput, RexClass>) {
164  (ar & boost::serialization::base_object<RexAbstractInput>(obj));
165  (ar & obj.node_);
166  } else if constexpr (std::is_same_v<RexCase, RexClass>) {
167  (ar & boost::serialization::base_object<RexScalar>(obj));
168  (ar & obj.expr_pair_list_);
169  (ar & obj.else_expr_);
170  } else if constexpr (std::is_same_v<RexFunctionOperator, RexClass>) {
171  (ar & boost::serialization::base_object<RexOperator>(obj));
172  (ar & obj.name_);
173  } else if constexpr (std::is_same_v<RexWindowFunctionOperator, RexClass>) {
174  (ar & boost::serialization::base_object<RexFunctionOperator>(obj));
175  (ar & obj.kind_);
176  (ar & obj.partition_keys_);
177  (ar & obj.order_keys_);
178  (ar & obj.collation_);
179  (ar & obj.frame_start_bound_);
180  (ar & obj.frame_end_bound_);
181  (ar & obj.is_rows_);
182  } else if constexpr (std::is_same_v<RexRef, RexClass>) {
183  (ar & boost::serialization::base_object<RexScalar>(obj));
184  (ar & obj.index_);
185  } else if constexpr (std::is_same_v<RexAgg, RexClass>) {
186  (ar & boost::serialization::base_object<Rex>(obj));
187  (ar & obj.agg_);
188  (ar & obj.distinct_);
189  (ar & obj.type_);
190  (ar & obj.operands_);
191  } else {
192  static_assert(!sizeof(RexClass), "Unhandled Rex class during serialization.");
193  }
194  }
195 
205  template <class Archive, class... RelAlgNodeClasses>
207  (ar.template register_type<RelAlgNodeClasses>(), ...);
208  }
209 
218  template <class Archive,
219  class RelAlgClass,
220  typename std::enable_if_t<is_rel_alg_node_class_v<RelAlgClass>>* = nullptr>
221  static void serialize(Archive& ar, RelAlgClass& obj, const unsigned int version) {
222  if constexpr (std::is_same_v<RelAlgNode, RelAlgClass>) {
223  (ar & obj.inputs_);
224  (ar & obj.id_);
225  (ar & obj.hash_);
226  (ar & obj.is_nop_);
227 
228  // NOTE: not serializing the id_in_plan_tree_, context_data_, targets_metainfo_,
229  // dag_node_id_, query_plan_dag_, & query_plan_dag_hash_ members. They are only
230  // needed for RelAlgExecutor pathways and not needed at the time serialization
231  // is needed.
232  } else if constexpr (std::is_same_v<RelScan, RelAlgClass>) {
233  (ar & boost::serialization::base_object<RelAlgNode>(obj));
234 
235  // NOTE: we're not serializing anything in regard to the member RelScan::td_. The
236  // table descriptor is instead a construction-dependent argument and will be
237  // serialized as part of the save/load contruction data. See
238  // boost::serialization::save_construct_data override below.
239  (ar & obj.field_names_);
240  (ar & obj.hint_applied_);
241  (ar & obj.hints_);
242  } else if constexpr (std::is_same_v<ModifyManipulationTarget, RelAlgClass>) {
243  (ar & obj.is_update_via_select_);
244  (ar & obj.is_delete_via_select_);
245  (ar & obj.varlen_update_required_);
246  (ar & obj.target_columns_);
247  (ar & obj.force_rowwise_output_);
248 
249  // NOTE: we're not serializing table_descriptor_. The table descriptor is
250  // instead a constructor-dependent argument and will be saved/loaded as part of
251  // custom contructor data. See: boost::serializer::load_construct_data below for
252  // more details.
253  } else if constexpr (std::is_same_v<RelProject, RelAlgClass>) {
254  (ar & boost::serialization::base_object<RelAlgNode>(obj));
255  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
256  (ar & obj.scalar_exprs_);
257  (ar & obj.fields_);
258  (ar & obj.hint_applied_);
259  (ar & obj.hints_);
260  (ar & obj.has_pushed_down_window_expr_);
261  } else if constexpr (std::is_same_v<RelAggregate, RelAlgClass>) {
262  (ar & boost::serialization::base_object<RelAlgNode>(obj));
263  (ar & obj.groupby_count_);
264  (ar & obj.agg_exprs_);
265  (ar & obj.fields_);
266  (ar & obj.hint_applied_);
267  (ar & obj.hints_);
268  } else if constexpr (std::is_same_v<RelJoin, RelAlgClass>) {
269  (ar & boost::serialization::base_object<RelAlgNode>(obj));
270  (ar & obj.condition_);
271  (ar & obj.join_type_);
272  (ar & obj.hint_applied_);
273  (ar & obj.hints_);
274  } else if constexpr (std::is_same_v<RelFilter, RelAlgClass>) {
275  (ar & boost::serialization::base_object<RelAlgNode>(obj));
276  (ar & obj.filter_);
277  } else if constexpr (std::is_same_v<RelLeftDeepInnerJoin, RelAlgClass>) {
278  (ar & boost::serialization::base_object<RelAlgNode>(obj));
279  (ar & obj.condition_);
280  (ar & obj.outer_conditions_per_level_);
281  (ar & obj.original_filter_);
282  (ar & obj.original_joins_);
283  } else if constexpr (std::is_same_v<RelCompound, RelAlgClass>) {
284  (ar & boost::serialization::base_object<RelAlgNode>(obj));
285  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
286 
287  (ar & obj.filter_expr_);
288  (ar & obj.groupby_count_);
289  (ar & obj.agg_exprs_);
290  (ar & obj.fields_);
291  (ar & obj.is_agg_);
292  (ar & obj.scalar_sources_);
293  (ar & obj.target_exprs_);
294  (ar & obj.hint_applied_);
295  (ar & obj.hints_);
296  } else if constexpr (std::is_same_v<RelSort, RelAlgClass>) {
297  (ar & boost::serialization::base_object<RelAlgNode>(obj));
298  (ar & obj.collation_);
299  (ar & obj.limit_);
300  (ar & obj.offset_);
301  (ar & obj.empty_result_);
302  (ar & obj.limit_delivered_);
303  } else if constexpr (std::is_same_v<RelModify, RelAlgClass>) {
304  (ar & boost::serialization::base_object<RelAlgNode>(obj));
305  // NOTE: not serializing anything in regard to RelModify::catalog_ or
306  // table_descriptor_ members. They will be used as constructor-dependent arguments
307  // instead and will be saved/loaded with custom constuctor data. See:
308  // RelAlgSerializer for more.
309  (ar & obj.flattened_);
310  (ar & obj.operation_);
311  (ar & obj.target_column_list_);
312  } else if constexpr (std::is_same_v<RelTableFunction, RelAlgClass>) {
313  (ar & boost::serialization::base_object<RelAlgNode>(obj));
314  (ar & obj.function_name_);
315  (ar & obj.fields_);
316  (ar & obj.col_inputs_);
317  (ar & obj.table_func_inputs_);
318  (ar & obj.target_exprs_);
319  } else if constexpr (std::is_same_v<RelLogicalValues, RelAlgClass>) {
320  (ar & boost::serialization::base_object<RelAlgNode>(obj));
321  (ar & obj.tuple_type_);
322  (ar & obj.values_);
323  } else if constexpr (std::is_same_v<RelLogicalUnion, RelAlgClass>) {
324  (ar & boost::serialization::base_object<RelAlgNode>(obj));
325  (ar & obj.is_all_);
326  } else {
327  static_assert(!sizeof(RelAlgClass),
328  "Unhandled RelAlgNode class during serialization");
329  }
330  }
331 
335  template <class Archive>
336  static void serialize(Archive& ar, RelAlgDag& rel_alg_dag, const unsigned int version) {
337  // Need to register all RelAlgNode and RexRexScalar-derived classes for
338  // serialization. This is to ensure derived classes referenced via polymorphic
339  // pointer get properly designated for serialization.
340  registerClassesWithArchive<Archive, REL_ALG_NODE_DERIVED_CLASSES>(ar);
341  registerClassesWithArchive<Archive, REX_DERIVED_CLASSES>(ar);
342 
343  // NOTE: we are not archiving RelTranslatedJoin as it is a RelAlgNode only created
344  // during query execution and therefore not relevant here as the serialization
345  // archive for the RelAlgDag should only be saved/loaded before query execution to
346  // avoid having to serialize any query state
347 
348  // now archive relevant RelAlgDag members
349  (ar & rel_alg_dag.build_state_);
350  (ar & rel_alg_dag.nodes_);
351  (ar & rel_alg_dag.subqueries_);
352  (ar & rel_alg_dag.query_hint_);
353  (ar & rel_alg_dag.global_hints_);
354  }
355 };
356 
357 namespace boost {
358 namespace serialization {
359 
369 template <
370  class RelAlgType,
371  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
372 void serialize(boost::archive::text_iarchive& ar,
373  RelAlgType& obj,
374  const unsigned int version) {
375  RelAlgDagSerializer::serialize(ar, obj, version);
376 }
377 
378 template <
379  class RelAlgType,
380  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
381 void serialize(boost::archive::text_oarchive& ar,
382  RelAlgType& obj,
383  const unsigned int version) {
384  RelAlgDagSerializer::serialize(ar, obj, version);
385 }
386 
390 template <class Archive>
391 void serialize(Archive& ar, boost::blank& blank, const unsigned int version) {
392  // no-op. does nothing with an empty class
393 }
394 
395 /*******************************************************************************
396  * The following serializes constructor arguments for TableDescriptor-dependent
397  * classes, which are RelScan, RelProject, RelCompound, & RelModify.
398  *******************************************************************************/
399 
403 template <class T>
405  : std::bool_constant<std::is_same_v<RelScan, typename std::remove_cv_t<T>> ||
406  std::is_same_v<RelProject, typename std::remove_cv_t<T>> ||
407  std::is_same_v<RelCompound, typename std::remove_cv_t<T>> ||
408  std::is_same_v<RelModify, typename std::remove_cv_t<T>>> {};
409 
410 template <class T>
412 
413 template <class T>
415  : std::bool_constant<std::is_same_v<RelProject, typename std::remove_cv_t<T>> ||
416  std::is_same_v<RelCompound, typename std::remove_cv_t<T>>> {};
417 
418 template <class T>
419 inline constexpr bool is_modify_target_rel_alg_node_v =
421 
433 template <class RelAlgNodeType,
434  typename std::enable_if_t<is_catalog_rel_alg_node_v<RelAlgNodeType>>* = nullptr>
435 inline void save_construct_data(boost::archive::text_oarchive& ar,
436  const RelAlgNodeType* node,
437  const unsigned int version) {
438  const Catalog_Namespace::Catalog* catalog{nullptr};
439  if constexpr (is_modify_target_rel_alg_node_v<RelAlgNodeType>) {
440  catalog = node->getModifiedTableCatalog();
441  } else {
442  catalog = &node->getCatalog();
443  }
444 
445  if (catalog) {
446  ar << catalog->name();
447  } else {
448  ar << std::string();
449  }
450 
451  auto* td = node->getTableDescriptor();
452  if (td) {
453  CHECK(!td->tableName.empty());
454  ar << td->tableName;
455  } else {
456  // we need to serialize an empty string as deserialization will expect to see a
457  // string. The empty string will indicate a null table descriptor. There are many
458  // circumstances in which a catalog-dependent RelAlgNode might have a null
459  // TableDescriptor. Generally speaking, RelScan and RelModify nodes require a valid
460  // table descriptor. RelCompound and RelProject do not.
461  ar << std::string();
462  }
463 }
464 
468 template <class RelAlgNodeType>
469 inline void construct_catalog_rel_alg_node(RelAlgNodeType* node,
471  const TableDescriptor* td) {
472  ::new (node) RelAlgNodeType(td, cat);
473 }
474 
480  const TableDescriptor* td) {
481  ::new (node) RelModify(cat, td);
482 }
483 
489  const TableDescriptor* td) {
490  ::new (node) RelScan(td, cat);
491 }
492 
503 template <
504  class RelAlgNodeType,
505  typename std::enable_if_t<is_catalog_rel_alg_node<RelAlgNodeType>::value>* = nullptr>
506 inline void load_construct_data(boost::archive::text_iarchive& ar,
507  RelAlgNodeType* node,
508  const unsigned int version) {
509  std::string db_name;
510  ar >> db_name;
511  const Catalog_Namespace::Catalog* cat{nullptr};
512  const TableDescriptor* td{nullptr};
513  if (!db_name.empty()) {
515  CHECK(cat) << "Catalog not found for database: " << db_name;
516  }
517 
518  std::string table_name;
519  ar >> table_name;
520  if (!table_name.empty()) {
521  CHECK(cat);
522  td = cat->getMetadataForTable(table_name, false);
523  CHECK(td) << "Table metadata not found for table: " << table_name
524  << " in catalog: " << cat->name();
525  }
526 
527  if constexpr (is_modify_target_rel_alg_node_v<RelAlgNodeType>) {
529  } else {
530  CHECK(cat);
532  }
533 }
534 
535 } // namespace serialization
536 } // namespace boost
constexpr bool is_catalog_rel_alg_node_v
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > query_hint_
Definition: RelAlgDag.h:2993
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
static void serialize(Archive &ar, RexClass &obj, const unsigned int version)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:2986
std::string name() const
Definition: Catalog.h:321
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
Definition: RelAlgDag.h:2987
string version
Definition: setup.in.py:73
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
void serialize(Archive &ar, RegisteredQueryHint &query_hint, const unsigned int version)
void construct_catalog_rel_alg_node(RelAlgNodeType *node, const Catalog_Namespace::Catalog *cat, const TableDescriptor *td)
static void registerClassesWithArchive(Archive &ar)
constexpr bool is_modify_target_rel_alg_node_v
#define CHECK(condition)
Definition: Logger.h:291
static void serialize(Archive &ar, RelAlgClass &obj, const unsigned int version)
RegisteredQueryHint global_hints_
Definition: RelAlgDag.h:2994
static void serialize(Archive &ar, RelAlgDag &rel_alg_dag, const unsigned int version)
void save_construct_data(Archive &ar, const ExplainedQueryHint *query_hint, const unsigned int version)
void load_construct_data(Archive &ar, ExplainedQueryHint *query_hint, const unsigned int version)
BuildState build_state_
Definition: RelAlgDag.h:2984