OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AbstractFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #pragma once
23 
24 #include "Fragmenter/Fragmenter.h"
25 
26 #include <boost/variant.hpp>
27 #include <string>
28 #include <vector>
29 
32 #include "Shared/UpdelRoll.h"
33 #include "Shared/sqltypes.h"
35 
36 // Should the ColumnInfo and FragmentInfo structs be in
37 // AbstractFragmenter?
38 
39 class Executor;
40 
41 namespace Chunk_NS {
42 class Chunk;
43 };
44 
45 namespace Data_Namespace {
46 class AbstractBuffer;
47 class AbstractDataMgr;
48 } // namespace Data_Namespace
49 
50 namespace import_export {
51 class TypedImportBuffer;
52 }
53 
54 namespace Catalog_Namespace {
55 class Catalog;
56 }
57 struct TableDescriptor;
58 struct ColumnDescriptor;
59 
60 namespace Fragmenter_Namespace {
61 
66  public:
67  virtual size_t const getRowCount() const = 0;
68  virtual size_t const getEntryCount() const = 0;
69  virtual StringDictionaryProxy* getLiteralDictionary() const = 0;
70  virtual std::vector<TargetValue> getEntryAt(const size_t index) const = 0;
71  virtual std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const = 0;
72 };
73 
75  bool has_null{false};
76  double max_double{std::numeric_limits<double>::lowest()};
77  double min_double{std::numeric_limits<double>::max()};
78  int64_t max_int64t{std::numeric_limits<int64_t>::min()};
79  int64_t min_int64t{std::numeric_limits<int64_t>::max()};
80 };
81 
82 /*
83  * @type ChunkUpdateStats
84  * @brief struct containing stats from a column chunk update.
85  * `new_values_stats` represents aggregate stats for the new
86  * values that were put into the chunk. `old_values_stats`
87  * represents aggregate stats for chunk values that were
88  * replaced.
89  */
93  int64_t updated_rows_count{0};
94  int64_t fragment_rows_count{0};
95  std::shared_ptr<Chunk_NS::Chunk> chunk;
96 };
97 
98 /*
99  * @type AbstractFragmenter
100  * @brief abstract base class for all table partitioners
101  *
102  * The virtual methods of this class provide an interface
103  * for an interface for getting the id and type of a
104  * partitioner, inserting data into a partitioner, and
105  * getting the partitions (fragments) managed by a
106  * partitioner that must be queried given a predicate
107  */
108 
110  public:
111  virtual ~AbstractFragmenter() {}
112 
120  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0) =
121  // 0
122 
127  virtual size_t getNumFragments() = 0;
128 
132  virtual TableInfo getFragmentsForQuery() = 0;
133 
139  virtual void insertData(InsertData& insert_data_struct) = 0;
140 
146  virtual void insertChunks(const InsertChunks& insert_chunk) = 0;
147 
153  virtual void insertDataNoCheckpoint(InsertData& insert_data_struct) = 0;
154 
161  virtual void insertChunksNoCheckpoint(const InsertChunks& insert_chunk) = 0;
162 
167  virtual void dropFragmentsToSize(const size_t maxRows) = 0;
168 
172  virtual void updateChunkStats(
173  const ColumnDescriptor* cd,
174  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map,
175  std::optional<Data_Namespace::MemoryLevel> memory_level) = 0;
176 
180  virtual FragmentInfo* getFragmentInfo(const int fragment_id) const = 0;
181 
185  virtual int getFragmenterId() = 0;
186 
191  virtual std::string getFragmenterType() = 0;
192 
193  virtual size_t getNumRows() = 0;
194  virtual void setNumRows(const size_t numTuples) = 0;
195 
196  virtual std::optional<ChunkUpdateStats> updateColumn(
197  const Catalog_Namespace::Catalog* catalog,
198  const TableDescriptor* td,
199  const ColumnDescriptor* cd,
200  const int fragment_id,
201  const std::vector<uint64_t>& frag_offsets,
202  const std::vector<ScalarTargetValue>& rhs_values,
203  const SQLTypeInfo& rhs_type,
204  const Data_Namespace::MemoryLevel memory_level,
205  UpdelRoll& updel_roll) = 0;
206 
207  virtual void updateColumns(const Catalog_Namespace::Catalog* catalog,
208  const TableDescriptor* td,
209  const int fragmentId,
210  const std::vector<TargetMetaInfo> sourceMetaInfo,
211  const std::vector<const ColumnDescriptor*> columnDescriptors,
212  const RowDataProvider& sourceDataProvider,
213  const size_t indexOffFragmentOffsetColumn,
214  const Data_Namespace::MemoryLevel memoryLevel,
215  UpdelRoll& updelRoll,
216  Executor* executor) = 0;
217 
218  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
219  const TableDescriptor* td,
220  const ColumnDescriptor* cd,
221  const int fragment_id,
222  const std::vector<uint64_t>& frag_offsets,
223  const ScalarTargetValue& rhs_value,
224  const SQLTypeInfo& rhs_type,
225  const Data_Namespace::MemoryLevel memory_level,
226  UpdelRoll& updel_roll) = 0;
227 
228  virtual void updateColumnMetadata(const ColumnDescriptor* cd,
229  FragmentInfo& fragment,
230  std::shared_ptr<Chunk_NS::Chunk> chunk,
231  const UpdateValuesStats& update_values_stats,
232  const SQLTypeInfo& rhs_type,
233  UpdelRoll& updel_roll) = 0;
234 
235  virtual void updateMetadata(const Catalog_Namespace::Catalog* catalog,
236  const MetaDataKey& key,
237  UpdelRoll& updel_roll) = 0;
238 
239  virtual void compactRows(const Catalog_Namespace::Catalog* catalog,
240  const TableDescriptor* td,
241  const int fragmentId,
242  const std::vector<uint64_t>& fragOffsets,
243  const Data_Namespace::MemoryLevel memoryLevel,
244  UpdelRoll& updelRoll) = 0;
245 
246  virtual const std::vector<uint64_t> getVacuumOffsets(
247  const std::shared_ptr<Chunk_NS::Chunk>& chunk) = 0;
248 
249  virtual void dropColumns(const std::vector<int>& columnIds) = 0;
250 
252  virtual bool hasDeletedRows(const int delete_column_id) = 0;
253 
261  virtual void updateColumnChunkMetadata(
262  const ColumnDescriptor* cd,
263  const int fragment_id,
264  const std::shared_ptr<ChunkMetadata> metadata) = 0;
265 
271  virtual void resetSizesFromFragments() = 0;
272 };
273 
274 } // namespace Fragmenter_Namespace
std::shared_ptr< Chunk_NS::Chunk > chunk
virtual size_t getNumFragments()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:114
virtual std::string getFragmenterType()=0
Gets the string type of the partitioner.
virtual std::optional< ChunkUpdateStats > updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)=0
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:40
virtual std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const =0
virtual bool hasDeletedRows(const int delete_column_id)=0
Iterates through chunk metadata to return whether any rows have been deleted.
Constants for Builtin SQL Types supported by OmniSci.
virtual void insertData(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
virtual void dropColumns(const std::vector< int > &columnIds)=0
virtual void dropFragmentsToSize(const size_t maxRows)=0
Will truncate table to less than maxRows by dropping fragments.
virtual void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< uint64_t > &fragOffsets, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll)=0
virtual const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk)=0
virtual TableInfo getFragmentsForQuery()=0
Get all fragments for the current table.
virtual FragmentInfo * getFragmentInfo(const int fragment_id) const =0
Retrieve the fragment info object for an individual fragment for editing.
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:84
virtual size_t const getEntryCount() const =0
An AbstractBuffer is a unit of data management for a data manager.
virtual int getFragmenterId()=0
Gets the id of the partitioner.
specifies the content in-memory of a row in the column metadata table
virtual void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata)=0
Updates the metadata for a column chunk.
virtual void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)=0
virtual void setNumRows(const size_t numTuples)=0
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:173
virtual void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)=0
virtual void insertChunksNoCheckpoint(const InsertChunks &insert_chunk)=0
Insert chunks into minimal number of fragments; no locks or checkpoints taken.
virtual StringDictionaryProxy * getLiteralDictionary() const =0
virtual void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)=0
virtual void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level)=0
Update chunk stats.
virtual void insertChunks(const InsertChunks &insert_chunk)=0
Insert chunks into minimal number of fragments.
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:74
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:66
virtual std::vector< TargetValue > getEntryAt(const size_t index) const =0
virtual void insertDataNoCheckpoint(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
virtual size_t const getRowCount() const =0
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156