OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
AbstractFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #pragma once
23 
24 #include "Fragmenter/Fragmenter.h"
25 
26 #include <boost/variant.hpp>
27 #include <string>
28 #include <vector>
29 
32 #include "Shared/UpdelRoll.h"
33 #include "Shared/sqltypes.h"
35 
36 // Should the ColumnInfo and FragmentInfo structs be in
37 // AbstractFragmenter?
38 
39 class Executor;
40 
41 namespace Chunk_NS {
42 class Chunk;
43 };
44 
45 namespace Data_Namespace {
46 class AbstractBuffer;
47 class AbstractDataMgr;
48 }; // namespace Data_Namespace
49 
50 namespace import_export {
51 class TypedImportBuffer;
52 };
53 
54 namespace Catalog_Namespace {
55 class Catalog;
56 }
57 struct TableDescriptor;
58 struct ColumnDescriptor;
59 
60 namespace Fragmenter_Namespace {
61 
66  public:
67  virtual size_t const getRowCount() const = 0;
68  virtual size_t const getEntryCount() const = 0;
69  virtual StringDictionaryProxy* getLiteralDictionary() const = 0;
70  virtual std::vector<TargetValue> getEntryAt(const size_t index) const = 0;
71  virtual std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const = 0;
72 };
73 
74 /*
75  * @type AbstractFragmenter
76  * @brief abstract base class for all table partitioners
77  *
78  * The virtual methods of this class provide an interface
79  * for an interface for getting the id and type of a
80  * partitioner, inserting data into a partitioner, and
81  * getting the partitions (fragments) managed by a
82  * partitioner that must be queried given a predicate
83  */
84 
86  public:
87  virtual ~AbstractFragmenter() {}
88 
96  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0) =
97  // 0
98 
102  virtual TableInfo getFragmentsForQuery() = 0;
103 
109  virtual void insertData(InsertData& insertDataStruct) = 0;
110 
116  virtual void insertDataNoCheckpoint(InsertData& insertDataStruct) = 0;
117 
122  virtual void dropFragmentsToSize(const size_t maxRows) = 0;
123 
127  virtual void updateChunkStats(
128  const ColumnDescriptor* cd,
129  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map) = 0;
130 
134  virtual FragmentInfo* getFragmentInfo(const int fragment_id) const = 0;
135 
139  virtual int getFragmenterId() = 0;
140 
145  virtual std::string getFragmenterType() = 0;
146 
147  virtual size_t getNumRows() = 0;
148  virtual void setNumRows(const size_t numTuples) = 0;
149 
150  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
151  const TableDescriptor* td,
152  const ColumnDescriptor* cd,
153  const int fragment_id,
154  const std::vector<uint64_t>& frag_offsets,
155  const std::vector<ScalarTargetValue>& rhs_values,
156  const SQLTypeInfo& rhs_type,
157  const Data_Namespace::MemoryLevel memory_level,
158  UpdelRoll& updel_roll) = 0;
159 
160  virtual void updateColumns(const Catalog_Namespace::Catalog* catalog,
161  const TableDescriptor* td,
162  const int fragmentId,
163  const std::vector<TargetMetaInfo> sourceMetaInfo,
164  const std::vector<const ColumnDescriptor*> columnDescriptors,
165  const RowDataProvider& sourceDataProvider,
166  const size_t indexOffFragmentOffsetColumn,
167  const Data_Namespace::MemoryLevel memoryLevel,
168  UpdelRoll& updelRoll,
169  Executor* executor) = 0;
170 
171  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
172  const TableDescriptor* td,
173  const ColumnDescriptor* cd,
174  const int fragment_id,
175  const std::vector<uint64_t>& frag_offsets,
176  const ScalarTargetValue& rhs_value,
177  const SQLTypeInfo& rhs_type,
178  const Data_Namespace::MemoryLevel memory_level,
179  UpdelRoll& updel_roll) = 0;
180 
181  virtual void updateColumnMetadata(const ColumnDescriptor* cd,
182  FragmentInfo& fragment,
183  std::shared_ptr<Chunk_NS::Chunk> chunk,
184  const bool null,
185  const double dmax,
186  const double dmin,
187  const int64_t lmax,
188  const int64_t lmin,
189  const SQLTypeInfo& rhs_type,
190  UpdelRoll& updel_roll) = 0;
191 
192  virtual void updateMetadata(const Catalog_Namespace::Catalog* catalog,
193  const MetaDataKey& key,
194  UpdelRoll& updel_roll) = 0;
195 
196  virtual void compactRows(const Catalog_Namespace::Catalog* catalog,
197  const TableDescriptor* td,
198  const int fragmentId,
199  const std::vector<uint64_t>& fragOffsets,
200  const Data_Namespace::MemoryLevel memoryLevel,
201  UpdelRoll& updelRoll) = 0;
202 
203  virtual const std::vector<uint64_t> getVacuumOffsets(
204  const std::shared_ptr<Chunk_NS::Chunk>& chunk) = 0;
205 
206  virtual void dropColumns(const std::vector<int>& columnIds) = 0;
207 
209  virtual bool hasDeletedRows(const int delete_column_id) = 0;
210 };
211 
212 } // namespace Fragmenter_Namespace
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
virtual std::string getFragmenterType()=0
Gets the string type of the partitioner.
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:40
virtual std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const =0
virtual void updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)=0
virtual bool hasDeletedRows(const int delete_column_id)=0
Iterates through chunk metadata to return whether any rows have been deleted.
Constants for Builtin SQL Types supported by OmniSci.
virtual void dropColumns(const std::vector< int > &columnIds)=0
virtual void dropFragmentsToSize(const size_t maxRows)=0
Will truncate table to less than maxRows by dropping fragments.
virtual void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< uint64_t > &fragOffsets, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll)=0
virtual const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk)=0
virtual TableInfo getFragmentsForQuery()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
virtual FragmentInfo * getFragmentInfo(const int fragment_id) const =0
Retrieve the fragment info object for an individual fragment for editing.
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:78
virtual size_t const getEntryCount() const =0
virtual void insertDataNoCheckpoint(InsertData &insertDataStruct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
An AbstractBuffer is a unit of data management for a data manager.
virtual int getFragmenterId()=0
Gets the id of the partitioner.
specifies the content in-memory of a row in the column metadata table
virtual void setNumRows(const size_t numTuples)=0
virtual void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)=0
virtual StringDictionaryProxy * getLiteralDictionary() const =0
virtual void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)=0
virtual void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const bool null, const double dmax, const double dmin, const int64_t lmax, const int64_t lmin, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)=0
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:67
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
virtual void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map)=0
Update chunk stats.
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:129
virtual std::vector< TargetValue > getEntryAt(const size_t index) const =0
specifies the content in-memory of a row in the table metadata table
virtual void insertData(InsertData &insertDataStruct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
virtual size_t const getRowCount() const =0
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156