OmniSciDB  04ee39c94c
InsertOrderFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
21 #ifndef INSERT_ORDER_FRAGMENTER_H
22 #define INSERT_ORDER_FRAGMENTER_H
23 
24 #include "../Chunk/Chunk.h"
25 #include "../DataMgr/MemoryLevel.h"
26 #include "../QueryEngine/TargetValue.h"
27 #include "../Shared/mapd_shared_mutex.h"
28 #include "../Shared/types.h"
29 #include "AbstractFragmenter.h"
30 
31 #include <map>
32 #include <mutex>
33 #include <unordered_map>
34 #include <vector>
35 
36 namespace Data_Namespace {
37 class DataMgr;
38 }
39 
40 #define DEFAULT_FRAGMENT_ROWS 32000000 // in tuples
41 #define DEFAULT_PAGE_SIZE 2097152 // in bytes
42 #define DEFAULT_MAX_ROWS (1L) << 62 // in rows
43 #define DEFAULT_MAX_CHUNK_SIZE 1073741824 // in bytes
44 
45 namespace Fragmenter_Namespace {
46 
55  public:
57 
59  const std::vector<int> chunkKeyPrefix,
60  std::vector<Chunk_NS::Chunk>& chunkVec,
61  Data_Namespace::DataMgr* dataMgr,
63  const int physicalTableId,
64  const int shard,
65  const size_t maxFragmentRows = DEFAULT_FRAGMENT_ROWS,
66  const size_t maxChunkSize = DEFAULT_MAX_CHUNK_SIZE,
67  const size_t pageSize = DEFAULT_PAGE_SIZE /*default 1MB*/,
68  const size_t maxRows = DEFAULT_MAX_ROWS,
69  const Data_Namespace::MemoryLevel defaultInsertLevel = Data_Namespace::DISK_LEVEL);
70 
71  ~InsertOrderFragmenter() override;
78  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0);
79  TableInfo getFragmentsForQuery() override;
80 
88  void insertData(InsertData& insertDataStruct) override;
89 
90  void insertDataNoCheckpoint(InsertData& insertDataStruct) override;
91 
92  void dropFragmentsToSize(const size_t maxRows) override;
93 
94  void updateChunkStats(
95  const ColumnDescriptor* cd,
96  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map) override;
97 
101  inline int getFragmenterId() override { return chunkKeyPrefix_.back(); }
102  inline std::vector<int> getChunkKeyPrefix() const { return chunkKeyPrefix_; }
106  inline std::string getFragmenterType() override { return fragmenterType_; }
107  size_t getNumRows() override { return numTuples_; }
108  void setNumRows(const size_t numTuples) override { numTuples_ = numTuples; }
109 
110  static void updateColumn(const Catalog_Namespace::Catalog* catalog,
111  const std::string& tab_name,
112  const std::string& col_name,
113  const int fragment_id,
114  const std::vector<uint64_t>& frag_offsets,
115  const std::vector<ScalarTargetValue>& rhs_values,
116  const SQLTypeInfo& rhs_type,
117  const Data_Namespace::MemoryLevel memory_level,
118  UpdelRoll& updel_roll);
119 
120  void updateColumn(const Catalog_Namespace::Catalog* catalog,
121  const TableDescriptor* td,
122  const ColumnDescriptor* cd,
123  const int fragment_id,
124  const std::vector<uint64_t>& frag_offsets,
125  const std::vector<ScalarTargetValue>& rhs_values,
126  const SQLTypeInfo& rhs_type,
127  const Data_Namespace::MemoryLevel memory_level,
128  UpdelRoll& updel_roll) override;
129 
130  void updateColumns(const Catalog_Namespace::Catalog* catalog,
131  const TableDescriptor* td,
132  const int fragmentId,
133  const std::vector<TargetMetaInfo> sourceMetaInfo,
134  const std::vector<const ColumnDescriptor*> columnDescriptors,
135  const RowDataProvider& sourceDataProvider,
136  const size_t indexOffFragmentOffsetColumn,
137  const Data_Namespace::MemoryLevel memoryLevel,
138  UpdelRoll& updelRoll) override;
139 
140  void updateColumn(const Catalog_Namespace::Catalog* catalog,
141  const TableDescriptor* td,
142  const ColumnDescriptor* cd,
143  const int fragment_id,
144  const std::vector<uint64_t>& frag_offsets,
145  const ScalarTargetValue& rhs_value,
146  const SQLTypeInfo& rhs_type,
147  const Data_Namespace::MemoryLevel memory_level,
148  UpdelRoll& updel_roll) override;
149 
150  void updateColumnMetadata(const ColumnDescriptor* cd,
151  FragmentInfo& fragment,
152  std::shared_ptr<Chunk_NS::Chunk> chunk,
153  const bool null,
154  const double dmax,
155  const double dmin,
156  const int64_t lmax,
157  const int64_t lmin,
158  const SQLTypeInfo& rhs_type,
159  UpdelRoll& updel_roll) override;
160 
161  void updateMetadata(const Catalog_Namespace::Catalog* catalog,
162  const MetaDataKey& key,
163  UpdelRoll& updel_roll) override;
164 
165  void compactRows(const Catalog_Namespace::Catalog* catalog,
166  const TableDescriptor* td,
167  const int fragment_id,
168  const std::vector<uint64_t>& frag_offsets,
169  const Data_Namespace::MemoryLevel memory_level,
170  UpdelRoll& updel_roll) override;
171 
172  const std::vector<uint64_t> getVacuumOffsets(
173  const std::shared_ptr<Chunk_NS::Chunk>& chunk) override;
174 
175  auto getChunksForAllColumns(const TableDescriptor* td,
176  const FragmentInfo& fragment,
177  const Data_Namespace::MemoryLevel memory_level);
178 
179  protected:
180  std::vector<int> chunkKeyPrefix_;
181  std::map<int, Chunk_NS::Chunk>
183  std::deque<FragmentInfo>
185  // int currentInsertBufferFragmentId_;
188  const int physicalTableId_;
189  const int shard_;
191  size_t pageSize_; /* Page size in bytes of each page making up a given chunk - passed to
192  BufferMgr in createChunk() */
193  size_t numTuples_;
196  size_t maxRows_;
197  std::string fragmenterType_;
199  fragmentInfoMutex_; // to prevent read-write conflicts for fragmentInfoVec_
201  insertMutex_; // to prevent race conditions on insert - only one insert statement
202  // should be going to a table at a time
206  std::unordered_map<int, size_t> varLenColInfo_;
207  std::shared_ptr<std::mutex> mutex_access_inmem_states;
208 
217  FragmentInfo* createNewFragment(
219  void deleteFragments(const std::vector<int>& dropFragIds);
220 
221  void getChunkMetadata();
222 
223  void lockInsertCheckpointData(const InsertData& insertDataStruct);
224  void insertDataImpl(InsertData& insertDataStruct);
225  void replicateData(const InsertData& insertDataStruct);
226 
228  InsertOrderFragmenter& operator=(const InsertOrderFragmenter&);
229  // FIX-ME: Temporary lock; needs removing.
230  mutable std::mutex temp_mutex_;
231 
232  FragmentInfo& getFragmentInfoFromId(const int fragment_id);
233 
234  auto vacuum_fixlen_rows(const FragmentInfo& fragment,
235  const std::shared_ptr<Chunk_NS::Chunk>& chunk,
236  const std::vector<uint64_t>& frag_offsets);
237  auto vacuum_varlen_rows(const FragmentInfo& fragment,
238  const std::shared_ptr<Chunk_NS::Chunk>& chunk,
239  const std::vector<uint64_t>& frag_offsets);
240 };
241 
242 } // namespace Fragmenter_Namespace
243 
244 #endif // INSERT_ORDER_FRAGMENTER_H
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:40
std::shared_ptr< std::mutex > mutex_access_inmem_states
#define DEFAULT_MAX_ROWS
The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order...
std::string getFragmenterType() override
get fragmenter&#39;s type (as string
std::shared_timed_mutex mapd_shared_mutex
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:79
void setNumRows(const size_t numTuples) override
#define DEFAULT_PAGE_SIZE
specifies the content in-memory of a row in the column metadata table
#define DEFAULT_FRAGMENT_ROWS
std::unordered_map< int, size_t > varLenColInfo_
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
#define DEFAULT_MAX_CHUNK_SIZE
specifies the content in-memory of a row in the table metadata table
int getFragmenterId() override
get fragmenter&#39;s id
std::map< int, Chunk_NS::Chunk > columnMap_
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156