OmniSciDB  eb3a3d0a03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AbstractFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #pragma once
23 
24 #include "Fragmenter/Fragmenter.h"
25 
26 #include <boost/variant.hpp>
27 #include <string>
28 #include <vector>
29 
32 #include "Shared/UpdelRoll.h"
33 #include "Shared/sqltypes.h"
35 
36 // Should the ColumnInfo and FragmentInfo structs be in
37 // AbstractFragmenter?
38 
39 class Executor;
40 
41 namespace Chunk_NS {
42 class Chunk;
43 };
44 
45 namespace Data_Namespace {
46 class AbstractBuffer;
47 class AbstractDataMgr;
48 } // namespace Data_Namespace
49 
50 namespace import_export {
51 class TypedImportBuffer;
52 }
53 
54 namespace Catalog_Namespace {
55 class Catalog;
56 }
57 struct TableDescriptor;
58 struct ColumnDescriptor;
59 
60 namespace Fragmenter_Namespace {
61 
66  public:
67  virtual size_t const getRowCount() const = 0;
68  virtual size_t const getEntryCount() const = 0;
69  virtual StringDictionaryProxy* getLiteralDictionary() const = 0;
70  virtual std::vector<TargetValue> getEntryAt(const size_t index) const = 0;
71  virtual std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const = 0;
72 };
73 
75  bool has_null{false};
76  double max_double{std::numeric_limits<double>::lowest()};
77  double min_double{std::numeric_limits<double>::max()};
78  int64_t max_int64t{std::numeric_limits<int64_t>::min()};
79  int64_t min_int64t{std::numeric_limits<int64_t>::max()};
80 };
81 
82 /*
83  * @type ChunkUpdateStats
84  * @brief struct containing stats from a column chunk update.
85  * `new_values_stats` represents aggregate stats for the new
86  * values that were put into the chunk. `old_values_stats`
87  * represents aggregate stats for chunk values that were
88  * replaced.
89  */
93  int64_t updated_rows_count{0};
94  int64_t fragment_rows_count{0};
95  std::shared_ptr<Chunk_NS::Chunk> chunk;
96 };
97 
98 /*
99  * @type AbstractFragmenter
100  * @brief abstract base class for all table partitioners
101  *
102  * The virtual methods of this class provide an interface
103  * for an interface for getting the id and type of a
104  * partitioner, inserting data into a partitioner, and
105  * getting the partitions (fragments) managed by a
106  * partitioner that must be queried given a predicate
107  */
108 
110  public:
111  virtual ~AbstractFragmenter() {}
112 
120  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0) =
121  // 0
122 
127  virtual size_t getNumFragments() = 0;
128 
132  virtual TableInfo getFragmentsForQuery() = 0;
133 
139  virtual void insertData(InsertData& insert_data_struct) = 0;
140 
146  virtual void insertDataNoCheckpoint(InsertData& insert_data_struct) = 0;
147 
152  virtual void dropFragmentsToSize(const size_t maxRows) = 0;
153 
157  virtual void updateChunkStats(
158  const ColumnDescriptor* cd,
159  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map,
160  std::optional<Data_Namespace::MemoryLevel> memory_level) = 0;
161 
165  virtual FragmentInfo* getFragmentInfo(const int fragment_id) const = 0;
166 
170  virtual int getFragmenterId() = 0;
171 
176  virtual std::string getFragmenterType() = 0;
177 
178  virtual size_t getNumRows() = 0;
179  virtual void setNumRows(const size_t numTuples) = 0;
180 
181  virtual std::optional<ChunkUpdateStats> updateColumn(
182  const Catalog_Namespace::Catalog* catalog,
183  const TableDescriptor* td,
184  const ColumnDescriptor* cd,
185  const int fragment_id,
186  const std::vector<uint64_t>& frag_offsets,
187  const std::vector<ScalarTargetValue>& rhs_values,
188  const SQLTypeInfo& rhs_type,
189  const Data_Namespace::MemoryLevel memory_level,
190  UpdelRoll& updel_roll) = 0;
191 
192  virtual void updateColumns(const Catalog_Namespace::Catalog* catalog,
193  const TableDescriptor* td,
194  const int fragmentId,
195  const std::vector<TargetMetaInfo> sourceMetaInfo,
196  const std::vector<const ColumnDescriptor*> columnDescriptors,
197  const RowDataProvider& sourceDataProvider,
198  const size_t indexOffFragmentOffsetColumn,
199  const Data_Namespace::MemoryLevel memoryLevel,
200  UpdelRoll& updelRoll,
201  Executor* executor) = 0;
202 
203  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
204  const TableDescriptor* td,
205  const ColumnDescriptor* cd,
206  const int fragment_id,
207  const std::vector<uint64_t>& frag_offsets,
208  const ScalarTargetValue& rhs_value,
209  const SQLTypeInfo& rhs_type,
210  const Data_Namespace::MemoryLevel memory_level,
211  UpdelRoll& updel_roll) = 0;
212 
213  virtual void updateColumnMetadata(const ColumnDescriptor* cd,
214  FragmentInfo& fragment,
215  std::shared_ptr<Chunk_NS::Chunk> chunk,
216  const UpdateValuesStats& update_values_stats,
217  const SQLTypeInfo& rhs_type,
218  UpdelRoll& updel_roll) = 0;
219 
220  virtual void updateMetadata(const Catalog_Namespace::Catalog* catalog,
221  const MetaDataKey& key,
222  UpdelRoll& updel_roll) = 0;
223 
224  virtual void compactRows(const Catalog_Namespace::Catalog* catalog,
225  const TableDescriptor* td,
226  const int fragmentId,
227  const std::vector<uint64_t>& fragOffsets,
228  const Data_Namespace::MemoryLevel memoryLevel,
229  UpdelRoll& updelRoll) = 0;
230 
231  virtual const std::vector<uint64_t> getVacuumOffsets(
232  const std::shared_ptr<Chunk_NS::Chunk>& chunk) = 0;
233 
234  virtual void dropColumns(const std::vector<int>& columnIds) = 0;
235 
237  virtual bool hasDeletedRows(const int delete_column_id) = 0;
238 
246  virtual void updateColumnChunkMetadata(
247  const ColumnDescriptor* cd,
248  const int fragment_id,
249  const std::shared_ptr<ChunkMetadata> metadata) = 0;
250 
256  virtual void resetSizesFromFragments() = 0;
257 };
258 
259 } // namespace Fragmenter_Namespace
std::shared_ptr< Chunk_NS::Chunk > chunk
virtual size_t getNumFragments()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
virtual std::string getFragmenterType()=0
Gets the string type of the partitioner.
virtual std::optional< ChunkUpdateStats > updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)=0
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:40
virtual std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const =0
virtual bool hasDeletedRows(const int delete_column_id)=0
Iterates through chunk metadata to return whether any rows have been deleted.
Constants for Builtin SQL Types supported by OmniSci.
virtual void insertData(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
virtual void dropColumns(const std::vector< int > &columnIds)=0
virtual void dropFragmentsToSize(const size_t maxRows)=0
Will truncate table to less than maxRows by dropping fragments.
virtual void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< uint64_t > &fragOffsets, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll)=0
virtual const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk)=0
virtual TableInfo getFragmentsForQuery()=0
Get all fragments for the current table.
virtual FragmentInfo * getFragmentInfo(const int fragment_id) const =0
Retrieve the fragment info object for an individual fragment for editing.
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:77
virtual size_t const getEntryCount() const =0
An AbstractBuffer is a unit of data management for a data manager.
virtual int getFragmenterId()=0
Gets the id of the partitioner.
specifies the content in-memory of a row in the column metadata table
virtual void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata)=0
Updates the metadata for a column chunk.
virtual void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)=0
virtual void setNumRows(const size_t numTuples)=0
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:150
virtual void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)=0
virtual StringDictionaryProxy * getLiteralDictionary() const =0
virtual void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)=0
virtual void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level)=0
Update chunk stats.
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:68
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
virtual std::vector< TargetValue > getEntryAt(const size_t index) const =0
virtual void insertDataNoCheckpoint(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
virtual size_t const getRowCount() const =0
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156