OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AbstractFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #pragma once
23 
24 #include "Fragmenter/Fragmenter.h"
25 
26 #include <boost/variant.hpp>
27 #include <string>
28 #include <vector>
29 
32 #include "Shared/UpdelRoll.h"
33 #include "Shared/sqltypes.h"
35 
36 // Should the ColumnInfo and FragmentInfo structs be in
37 // AbstractFragmenter?
38 
39 class Executor;
40 
41 namespace Chunk_NS {
42 class Chunk;
43 };
44 
45 namespace Data_Namespace {
46 class AbstractBuffer;
47 class AbstractDataMgr;
48 } // namespace Data_Namespace
49 
50 namespace import_export {
51 class TypedImportBuffer;
52 }
53 
54 namespace Catalog_Namespace {
55 class Catalog;
56 }
57 struct TableDescriptor;
58 struct ColumnDescriptor;
59 
60 namespace Fragmenter_Namespace {
61 
66  public:
67  virtual size_t const getRowCount() const = 0;
68  virtual size_t const getEntryCount() const = 0;
69  virtual StringDictionaryProxy* getLiteralDictionary() const = 0;
70  virtual std::vector<TargetValue> getEntryAt(const size_t index) const = 0;
71  virtual std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const = 0;
72 };
73 
75  bool has_null{false};
76  double max_double{std::numeric_limits<double>::lowest()};
77  double min_double{std::numeric_limits<double>::max()};
78  int64_t max_int64t{std::numeric_limits<int64_t>::min()};
79  int64_t min_int64t{std::numeric_limits<int64_t>::max()};
80 };
81 
82 /*
83  * @type ChunkUpdateStats
84  * @brief struct containing stats from a column chunk update.
85  * `new_values_stats` represents aggregate stats for the new
86  * values that were put into the chunk. `old_values_stats`
87  * represents aggregate stats for chunk values that were
88  * replaced.
89  */
93  int64_t updated_rows_count{0};
94  int64_t fragment_rows_count{0};
95  std::shared_ptr<Chunk_NS::Chunk> chunk;
96 };
97 
98 /*
99  * @type AbstractFragmenter
100  * @brief abstract base class for all table partitioners
101  *
102  * The virtual methods of this class provide an interface
103  * for an interface for getting the id and type of a
104  * partitioner, inserting data into a partitioner, and
105  * getting the partitions (fragments) managed by a
106  * partitioner that must be queried given a predicate
107  */
108 
110  public:
111  virtual ~AbstractFragmenter() {}
112 
120  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0) =
121  // 0
122 
126  virtual TableInfo getFragmentsForQuery() = 0;
127 
133  virtual void insertData(InsertData& insert_data_struct) = 0;
134 
140  virtual void insertDataNoCheckpoint(InsertData& insert_data_struct) = 0;
141 
146  virtual void dropFragmentsToSize(const size_t maxRows) = 0;
147 
151  virtual void updateChunkStats(
152  const ColumnDescriptor* cd,
153  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map,
154  std::optional<Data_Namespace::MemoryLevel> memory_level) = 0;
155 
159  virtual FragmentInfo* getFragmentInfo(const int fragment_id) const = 0;
160 
164  virtual int getFragmenterId() = 0;
165 
170  virtual std::string getFragmenterType() = 0;
171 
172  virtual size_t getNumRows() = 0;
173  virtual void setNumRows(const size_t numTuples) = 0;
174 
175  virtual std::optional<ChunkUpdateStats> updateColumn(
176  const Catalog_Namespace::Catalog* catalog,
177  const TableDescriptor* td,
178  const ColumnDescriptor* cd,
179  const int fragment_id,
180  const std::vector<uint64_t>& frag_offsets,
181  const std::vector<ScalarTargetValue>& rhs_values,
182  const SQLTypeInfo& rhs_type,
183  const Data_Namespace::MemoryLevel memory_level,
184  UpdelRoll& updel_roll) = 0;
185 
186  virtual void updateColumns(const Catalog_Namespace::Catalog* catalog,
187  const TableDescriptor* td,
188  const int fragmentId,
189  const std::vector<TargetMetaInfo> sourceMetaInfo,
190  const std::vector<const ColumnDescriptor*> columnDescriptors,
191  const RowDataProvider& sourceDataProvider,
192  const size_t indexOffFragmentOffsetColumn,
193  const Data_Namespace::MemoryLevel memoryLevel,
194  UpdelRoll& updelRoll,
195  Executor* executor) = 0;
196 
197  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
198  const TableDescriptor* td,
199  const ColumnDescriptor* cd,
200  const int fragment_id,
201  const std::vector<uint64_t>& frag_offsets,
202  const ScalarTargetValue& rhs_value,
203  const SQLTypeInfo& rhs_type,
204  const Data_Namespace::MemoryLevel memory_level,
205  UpdelRoll& updel_roll) = 0;
206 
207  virtual void updateColumnMetadata(const ColumnDescriptor* cd,
208  FragmentInfo& fragment,
209  std::shared_ptr<Chunk_NS::Chunk> chunk,
210  const UpdateValuesStats& update_values_stats,
211  const SQLTypeInfo& rhs_type,
212  UpdelRoll& updel_roll) = 0;
213 
214  virtual void updateMetadata(const Catalog_Namespace::Catalog* catalog,
215  const MetaDataKey& key,
216  UpdelRoll& updel_roll) = 0;
217 
218  virtual void compactRows(const Catalog_Namespace::Catalog* catalog,
219  const TableDescriptor* td,
220  const int fragmentId,
221  const std::vector<uint64_t>& fragOffsets,
222  const Data_Namespace::MemoryLevel memoryLevel,
223  UpdelRoll& updelRoll) = 0;
224 
225  virtual const std::vector<uint64_t> getVacuumOffsets(
226  const std::shared_ptr<Chunk_NS::Chunk>& chunk) = 0;
227 
228  virtual void dropColumns(const std::vector<int>& columnIds) = 0;
229 
231  virtual bool hasDeletedRows(const int delete_column_id) = 0;
232 
240  virtual void updateColumnChunkMetadata(
241  const ColumnDescriptor* cd,
242  const int fragment_id,
243  const std::shared_ptr<ChunkMetadata> metadata) = 0;
244 };
245 
246 } // namespace Fragmenter_Namespace
std::shared_ptr< Chunk_NS::Chunk > chunk
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
virtual std::string getFragmenterType()=0
Gets the string type of the partitioner.
virtual std::optional< ChunkUpdateStats > updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)=0
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:40
virtual std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const =0
virtual bool hasDeletedRows(const int delete_column_id)=0
Iterates through chunk metadata to return whether any rows have been deleted.
Constants for Builtin SQL Types supported by OmniSci.
virtual void insertData(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
virtual void dropColumns(const std::vector< int > &columnIds)=0
virtual void dropFragmentsToSize(const size_t maxRows)=0
Will truncate table to less than maxRows by dropping fragments.
virtual void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< uint64_t > &fragOffsets, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll)=0
virtual const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk)=0
virtual TableInfo getFragmentsForQuery()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
virtual FragmentInfo * getFragmentInfo(const int fragment_id) const =0
Retrieve the fragment info object for an individual fragment for editing.
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:77
virtual size_t const getEntryCount() const =0
An AbstractBuffer is a unit of data management for a data manager.
virtual int getFragmenterId()=0
Gets the id of the partitioner.
specifies the content in-memory of a row in the column metadata table
virtual void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata)=0
Updates the metadata for a column chunk.
virtual void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)=0
virtual void setNumRows(const size_t numTuples)=0
virtual void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)=0
virtual StringDictionaryProxy * getLiteralDictionary() const =0
virtual void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)=0
virtual void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level)=0
Update chunk stats.
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:69
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:142
virtual std::vector< TargetValue > getEntryAt(const size_t index) const =0
virtual void insertDataNoCheckpoint(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
virtual size_t const getRowCount() const =0
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156