OmniSciDB  b28c0d5765
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDagSerializer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <boost/archive/text_iarchive.hpp>
20 #include <boost/archive/text_oarchive.hpp>
21 #include <boost/serialization/access.hpp>
22 #include <boost/serialization/shared_ptr.hpp>
23 #include <boost/serialization/unique_ptr.hpp>
24 #include <boost/serialization/unordered_map.hpp>
25 #include <boost/serialization/variant.hpp>
26 #include <boost/serialization/vector.hpp>
27 
28 #include "QueryEngine/RelAlgDag.h"
36 #include "Shared/scope.h"
37 
47 // NOTE: RelTranslatedJoin is not in this list as it is a RelAlgNode only created
48 // during query execution and therefore not relevant here as RelAlgDag serialization
49 // should only be performed before query execution to avoid having to serialize any query
50 // state
51 #define REL_ALG_NODE_DERIVED_CLASSES \
52  RelScan, RelProject, RelAggregate, RelJoin, RelFilter, RelLeftDeepInnerJoin, \
53  RelCompound, RelSort, RelModify, RelTableFunction, RelLogicalValues, \
54  RelLogicalUnion
55 
56 #define REX_DERIVED_CLASSES \
57  RexAbstractInput, RexLiteral, RexOperator, RexSubQuery, RexInput, RexCase, \
58  RexFunctionOperator, RexWindowFunctionOperator, RexRef, RexAgg
59 
60 namespace {
61 
65 template <class T, class... Ts>
67  : std::bool_constant<(std::is_same_v<T, typename std::remove_cv_t<Ts>> || ...)> {};
68 
73 template <class T>
76 
77 template <class T>
79 
83 template <class T>
85 
86 template <class T>
87 inline constexpr bool is_rex_class_v = is_rex_class<T>::value;
88 
92 template <class T>
94  : std::bool_constant<is_rel_alg_node_class_v<T> || is_rex_class_v<T> ||
95  std::is_same_v<T, RelAlgDag>> {};
96 
97 template <class T>
98 inline constexpr bool all_serializable_rel_alg_classes_v =
100 
101 } // namespace
102 
110  // forward-declaring a deserialize context and thread-local storage for it in order
111  // to access a Catalog_Namespace::Catalog instance that will be used to populate
112  // RelAlgDag components that are dependent on catalog items, such as table/column
113  // descriptors.
115  static thread_local std::unique_ptr<RelAlgDagDeserializeContext>
117 
124 
129  static const Catalog_Namespace::Catalog& getCatalog();
130 
140  template <class Archive,
141  class RexClass,
142  typename std::enable_if_t<is_rex_class_v<RexClass>>* = nullptr>
143  static void serialize(Archive& ar, RexClass& obj, const unsigned int version) {
144  if constexpr (std::is_same_v<Rex, RexClass>) {
145  (ar & obj.hash_);
146  } else if constexpr (std::is_same_v<RexScalar, RexClass>) {
147  (ar & boost::serialization::base_object<Rex>(obj));
148  } else if constexpr (std::is_same_v<RexAbstractInput, RexClass>) {
149  (ar & boost::serialization::base_object<RexScalar>(obj));
150  (ar & obj.in_index_);
151  } else if constexpr (std::is_same_v<RexLiteral, RexClass>) {
152  (ar & boost::serialization::base_object<RexScalar>(obj));
153  (ar & obj.literal_);
154  (ar & obj.type_);
155  (ar & obj.target_type_);
156  (ar & obj.scale_);
157  (ar & obj.precision_);
158  (ar & obj.target_scale_);
159  (ar & obj.target_precision_);
160  } else if constexpr (std::is_same_v<RexOperator, RexClass>) {
161  (ar & boost::serialization::base_object<RexScalar>(obj));
162  (ar & obj.op_);
163  (ar & obj.operands_);
164  (ar & obj.type_);
165  } else if constexpr (std::is_same_v<RexSubQuery, RexClass>) {
166  (ar & boost::serialization::base_object<RexScalar>(obj));
167  (ar & obj.type_);
168 
169  // Execution result should not be set before serialization. If it is means
170  // RelAlgExecutor got its hands on it first before serialization. This is not
171  // advised. Serialization should happen before any RelAlgExecutor processing.
172  CHECK(obj.result_);
173  CHECK(*obj.result_ == nullptr);
174 
175  // BUT we still need to serialize the RexSubQuery::result_. It is a shared_ptr of a
176  // shared_ptr. The outer shared ptr should always be defined, pointing to the
177  // interior shared_ptr that should be null. The way it is designed, this 2-tiered
178  // shared ptr acts as a link between RexSubQuery instances that were deep copied
179  // from a parent. A result should not exist, but the link should, so we need to
180  // serialize result_ (or find a better linking mechanism)
181  (ar & obj.result_);
182 
183  (ar & obj.ra_);
184  } else if constexpr (std::is_same_v<RexInput, RexClass>) {
185  (ar & boost::serialization::base_object<RexAbstractInput>(obj));
186  (ar & obj.node_);
187  } else if constexpr (std::is_same_v<RexCase, RexClass>) {
188  (ar & boost::serialization::base_object<RexScalar>(obj));
189  (ar & obj.expr_pair_list_);
190  (ar & obj.else_expr_);
191  } else if constexpr (std::is_same_v<RexFunctionOperator, RexClass>) {
192  (ar & boost::serialization::base_object<RexOperator>(obj));
193  (ar & obj.name_);
194  } else if constexpr (std::is_same_v<RexWindowFunctionOperator, RexClass>) {
195  (ar & boost::serialization::base_object<RexFunctionOperator>(obj));
196  (ar & obj.kind_);
197  (ar & obj.partition_keys_);
198  (ar & obj.order_keys_);
199  (ar & obj.collation_);
200  (ar & obj.frame_start_bound_);
201  (ar & obj.frame_end_bound_);
202  (ar & obj.is_rows_);
203  } else if constexpr (std::is_same_v<RexRef, RexClass>) {
204  (ar & boost::serialization::base_object<RexScalar>(obj));
205  (ar & obj.index_);
206  } else if constexpr (std::is_same_v<RexAgg, RexClass>) {
207  (ar & boost::serialization::base_object<Rex>(obj));
208  (ar & obj.agg_);
209  (ar & obj.distinct_);
210  (ar & obj.type_);
211  (ar & obj.operands_);
212  } else {
213  static_assert(!sizeof(RexClass), "Unhandled Rex class during serialization.");
214  }
215  }
216 
226  template <class Archive, class... RelAlgNodeClasses>
228  (ar.template register_type<RelAlgNodeClasses>(), ...);
229  }
230 
239  template <class Archive,
240  class RelAlgClass,
241  typename std::enable_if_t<is_rel_alg_node_class_v<RelAlgClass>>* = nullptr>
242  static void serialize(Archive& ar, RelAlgClass& obj, const unsigned int version) {
243  if constexpr (std::is_same_v<RelAlgNode, RelAlgClass>) {
244  (ar & obj.inputs_);
245  (ar & obj.id_);
246  (ar & obj.hash_);
247  (ar & obj.is_nop_);
248 
249  // NOTE: not serializing the id_in_plan_tree_, context_data_, targets_metainfo_,
250  // dag_node_id_, query_plan_dag_, & query_plan_dag_hash_ members. They are only
251  // needed for RelAlgExecutor pathways and not needed at the time serialization
252  // is needed.
253  } else if constexpr (std::is_same_v<RelScan, RelAlgClass>) {
254  (ar & boost::serialization::base_object<RelAlgNode>(obj));
255 
256  // NOTE: we're not serializing anything in regard to the member RelScan::td_. The
257  // table descriptor is instead a construction-dependent argument and will be
258  // serialized as part of the save/load contruction data. See
259  // boost::serialization::save_construct_data override below.
260  (ar & obj.field_names_);
261  (ar & obj.hint_applied_);
262  (ar & obj.hints_);
263  } else if constexpr (std::is_same_v<ModifyManipulationTarget, RelAlgClass>) {
264  (ar & obj.is_update_via_select_);
265  (ar & obj.is_delete_via_select_);
266  (ar & obj.varlen_update_required_);
267  (ar & obj.target_columns_);
268  (ar & obj.force_rowwise_output_);
269 
270  // NOTE: we're not serializing table_descriptor_. The table descriptor is
271  // instead a constructor-dependent argument and will be saved/loaded as part of
272  // custom contructor data. See: boost::serializer::load_construct_data below for
273  // more details.
274  } else if constexpr (std::is_same_v<RelProject, RelAlgClass>) {
275  (ar & boost::serialization::base_object<RelAlgNode>(obj));
276  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
277  (ar & obj.scalar_exprs_);
278  (ar & obj.fields_);
279  (ar & obj.hint_applied_);
280  (ar & obj.hints_);
281  (ar & obj.has_pushed_down_window_expr_);
282  } else if constexpr (std::is_same_v<RelAggregate, RelAlgClass>) {
283  (ar & boost::serialization::base_object<RelAlgNode>(obj));
284  (ar & obj.groupby_count_);
285  (ar & obj.agg_exprs_);
286  (ar & obj.fields_);
287  (ar & obj.hint_applied_);
288  (ar & obj.hints_);
289  } else if constexpr (std::is_same_v<RelJoin, RelAlgClass>) {
290  (ar & boost::serialization::base_object<RelAlgNode>(obj));
291  (ar & obj.condition_);
292  (ar & obj.join_type_);
293  (ar & obj.hint_applied_);
294  (ar & obj.hints_);
295  } else if constexpr (std::is_same_v<RelFilter, RelAlgClass>) {
296  (ar & boost::serialization::base_object<RelAlgNode>(obj));
297  (ar & obj.filter_);
298  } else if constexpr (std::is_same_v<RelLeftDeepInnerJoin, RelAlgClass>) {
299  (ar & boost::serialization::base_object<RelAlgNode>(obj));
300  (ar & obj.condition_);
301  (ar & obj.outer_conditions_per_level_);
302  (ar & obj.original_filter_);
303  (ar & obj.original_joins_);
304  } else if constexpr (std::is_same_v<RelCompound, RelAlgClass>) {
305  (ar & boost::serialization::base_object<RelAlgNode>(obj));
306  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
307 
308  (ar & obj.filter_expr_);
309  (ar & obj.groupby_count_);
310  (ar & obj.agg_exprs_);
311  (ar & obj.fields_);
312  (ar & obj.is_agg_);
313  (ar & obj.scalar_sources_);
314  (ar & obj.target_exprs_);
315  (ar & obj.hint_applied_);
316  (ar & obj.hints_);
317  } else if constexpr (std::is_same_v<RelSort, RelAlgClass>) {
318  (ar & boost::serialization::base_object<RelAlgNode>(obj));
319  (ar & obj.collation_);
320  (ar & obj.limit_);
321  (ar & obj.offset_);
322  (ar & obj.empty_result_);
323  (ar & obj.limit_delivered_);
324  } else if constexpr (std::is_same_v<RelModify, RelAlgClass>) {
325  (ar & boost::serialization::base_object<RelAlgNode>(obj));
326  // NOTE: not serializing anything in regard to RelModify::catalog_ or
327  // table_descriptor_ members. They will be used as constructor-dependent arguments
328  // instead and will be saved/loaded with custom constuctor data. See:
329  // RelAlgSerializer for more.
330  (ar & obj.flattened_);
331  (ar & obj.operation_);
332  (ar & obj.target_column_list_);
333  } else if constexpr (std::is_same_v<RelTableFunction, RelAlgClass>) {
334  (ar & boost::serialization::base_object<RelAlgNode>(obj));
335  (ar & obj.function_name_);
336  (ar & obj.fields_);
337  (ar & obj.col_inputs_);
338  (ar & obj.table_func_inputs_);
339  (ar & obj.target_exprs_);
340  } else if constexpr (std::is_same_v<RelLogicalValues, RelAlgClass>) {
341  (ar & boost::serialization::base_object<RelAlgNode>(obj));
342  (ar & obj.tuple_type_);
343  (ar & obj.values_);
344  } else if constexpr (std::is_same_v<RelLogicalUnion, RelAlgClass>) {
345  (ar & boost::serialization::base_object<RelAlgNode>(obj));
346  (ar & obj.is_all_);
347  } else {
348  static_assert(!sizeof(RelAlgClass),
349  "Unhandled RelAlgNode class during serialization");
350  }
351  }
352 
356  template <class Archive>
357  static void serialize(Archive& ar, RelAlgDag& rel_alg_dag, const unsigned int version) {
358  // Need to register all RelAlgNode and RexRexScalar-derived classes for
359  // serialization. This is to ensure derived classes referenced via polymorphic
360  // pointer get properly designated for serialization.
361  registerClassesWithArchive<Archive, REL_ALG_NODE_DERIVED_CLASSES>(ar);
362  registerClassesWithArchive<Archive, REX_DERIVED_CLASSES>(ar);
363 
364  // NOTE: we are not archiving RelTranslatedJoin as it is a RelAlgNode only created
365  // during query execution and therefore not relevant here as the serialization
366  // archive for the RelAlgDag should only be saved/loaded before query execution to
367  // avoid having to serialize any query state
368 
369  // now archive relevant RelAlgDag members
370  (ar & rel_alg_dag.build_state_);
371  (ar & rel_alg_dag.nodes_);
372  (ar & rel_alg_dag.subqueries_);
373  (ar & rel_alg_dag.query_hint_);
374  (ar & rel_alg_dag.global_hints_);
375  }
376 };
377 
378 namespace boost {
379 namespace serialization {
380 
390 template <
391  class RelAlgType,
392  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
393 void serialize(boost::archive::text_iarchive& ar,
394  RelAlgType& obj,
395  const unsigned int version) {
396  RelAlgDagSerializer::serialize(ar, obj, version);
397 }
398 
399 template <
400  class RelAlgType,
401  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
402 void serialize(boost::archive::text_oarchive& ar,
403  RelAlgType& obj,
404  const unsigned int version) {
405  RelAlgDagSerializer::serialize(ar, obj, version);
406 }
407 
411 template <class Archive>
412 void serialize(Archive& ar, boost::blank& blank, const unsigned int version) {
413  // no-op. does nothing with an empty class
414 }
415 
416 /*******************************************************************************
417  * The following serializes constructor arguments for TableDescriptor-dependent
418  * classes, which are RelScan, RelProject, RelCompound, & RelModify.
419  *******************************************************************************/
420 
424 template <class T>
426  : std::bool_constant<std::is_same_v<RelScan, typename std::remove_cv_t<T>> ||
427  std::is_same_v<RelProject, typename std::remove_cv_t<T>> ||
428  std::is_same_v<RelCompound, typename std::remove_cv_t<T>> ||
429  std::is_same_v<RelModify, typename std::remove_cv_t<T>>> {};
430 
431 template <class T>
433 
445 template <class RelAlgNodeType,
446  typename std::enable_if_t<is_catalog_rel_alg_node_v<RelAlgNodeType>>* = nullptr>
447 inline void save_construct_data(boost::archive::text_oarchive& ar,
448  const RelAlgNodeType* node,
449  const unsigned int version) {
450  auto* td = node->getTableDescriptor();
451  if (td) {
452  CHECK(!td->tableName.empty());
453  ar << td->tableName;
454  } else {
455  // we need to serialize an empty string as deserialization will expect to see a
456  // string. The empty string will indicate a null table descriptor. There are many
457  // circumstances in which a catalog-dependent RelAlgNode might have a null
458  // TableDescriptor. Generally speaking, RelScan and RelModify nodes require a valid
459  // table descriptor. RelCompound and RelProject do not.
460  ar << std::string();
461  }
462 }
463 
467 template <class RelAlgNodeType>
468 inline void construct_catalog_rel_alg_node(RelAlgNodeType* node,
470  const TableDescriptor* td) {
471  ::new (node) RelAlgNodeType(td);
472 }
473 
479  const TableDescriptor* td) {
480  ::new (node) RelModify(cat, td);
481 }
482 
493 template <
494  class RelAlgNodeType,
495  typename std::enable_if_t<is_catalog_rel_alg_node<RelAlgNodeType>::value>* = nullptr>
496 inline void load_construct_data(boost::archive::text_iarchive& ar,
497  RelAlgNodeType* node,
498  const unsigned int version) {
499  std::string table_name;
500  ar >> table_name;
502  const TableDescriptor* td{nullptr};
503  if (!table_name.empty()) {
504  td = cat.getMetadataForTable(table_name, false);
505  }
507 }
508 
509 } // namespace serialization
510 } // namespace boost
constexpr bool is_catalog_rel_alg_node_v
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > query_hint_
Definition: RelAlgDag.h:2963
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
static void serialize(Archive &ar, RexClass &obj, const unsigned int version)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:2956
static const Catalog_Namespace::Catalog & getCatalog()
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
Definition: RelAlgDag.h:2957
string version
Definition: setup.in.py:73
void serialize(Archive &ar, RegisteredQueryHint &query_hint, const unsigned int version)
static void registerClassesWithArchive(Archive &ar)
void construct_catalog_rel_alg_node(RelAlgNodeType *node, const Catalog_Namespace::Catalog &cat, const TableDescriptor *td)
#define CHECK(condition)
Definition: Logger.h:222
static void serialize(Archive &ar, RelAlgClass &obj, const unsigned int version)
RegisteredQueryHint global_hints_
Definition: RelAlgDag.h:2964
static void serialize(Archive &ar, RelAlgDag &rel_alg_dag, const unsigned int version)
static thread_local std::unique_ptr< RelAlgDagDeserializeContext > rel_alg_dag_deserialize_context
static ScopeGuard createContextScopeGuard(const Catalog_Namespace::Catalog &cat)
void save_construct_data(Archive &ar, const ExplainedQueryHint *query_hint, const unsigned int version)
void load_construct_data(Archive &ar, ExplainedQueryHint *query_hint, const unsigned int version)
BuildState build_state_
Definition: RelAlgDag.h:2954