OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fragmenter_Namespace::SortedOrderFragmenter Class Reference

#include <SortedOrderFragmenter.h>

+ Inheritance diagram for Fragmenter_Namespace::SortedOrderFragmenter:
+ Collaboration diagram for Fragmenter_Namespace::SortedOrderFragmenter:

Public Member Functions

 SortedOrderFragmenter (const std::vector< int > chunkKeyPrefix, std::vector< Chunk_NS::Chunk > &chunkVec, Data_Namespace::DataMgr *dataMgr, Catalog_Namespace::Catalog *catalog, const int physicalTableId, const int shard, const size_t maxFragmentRows=DEFAULT_FRAGMENT_ROWS, const size_t maxChunkSize=DEFAULT_MAX_CHUNK_SIZE, const size_t pageSize=DEFAULT_PAGE_SIZE, const size_t maxRows=DEFAULT_MAX_ROWS, const Data_Namespace::MemoryLevel defaultInsertLevel=Data_Namespace::DISK_LEVEL)
 
 ~SortedOrderFragmenter () override
 
void insertData (InsertData &insert_data_struct) override
 appends data onto the most recently occuring fragment, creating a new one if necessary More...
 
void insertDataNoCheckpoint (InsertData &insert_data_struct) override
 Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and checkpoints taken needs to be managed externally. More...
 
 SortedOrderFragmenter (SortedOrderFragmenter &&)=default
 
 SortedOrderFragmenter (const SortedOrderFragmenter &)=delete
 
SortedOrderFragmenteroperator= (const SortedOrderFragmenter &)=delete
 
- Public Member Functions inherited from Fragmenter_Namespace::InsertOrderFragmenter
 InsertOrderFragmenter (const std::vector< int > chunkKeyPrefix, std::vector< Chunk_NS::Chunk > &chunkVec, Data_Namespace::DataMgr *dataMgr, Catalog_Namespace::Catalog *catalog, const int physicalTableId, const int shard, const size_t maxFragmentRows=DEFAULT_FRAGMENT_ROWS, const size_t maxChunkSize=DEFAULT_MAX_CHUNK_SIZE, const size_t pageSize=DEFAULT_PAGE_SIZE, const size_t maxRows=DEFAULT_MAX_ROWS, const Data_Namespace::MemoryLevel defaultInsertLevel=Data_Namespace::DISK_LEVEL, const bool uses_foreign_storage=false)
 
 ~InsertOrderFragmenter () override
 
size_t getNumFragments () override
 returns the number of fragments in a table More...
 
TableInfo getFragmentsForQuery () override
 returns (inside QueryInfo) object all ids and row sizes of fragments More...
 
void insertChunks (const InsertChunks &insert_chunk) override
 Insert chunks into minimal number of fragments. More...
 
void insertChunksNoCheckpoint (const InsertChunks &insert_chunk) override
 Insert chunks into minimal number of fragments; no locks or checkpoints taken. More...
 
void dropFragmentsToSize (const size_t maxRows) override
 Will truncate table to less than maxRows by dropping fragments. More...
 
void updateColumnChunkMetadata (const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata) override
 Updates the metadata for a column chunk. More...
 
void updateChunkStats (const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level) override
 Update chunk stats. More...
 
FragmentInfogetFragmentInfo (const int fragment_id) const override
 Retrieve the fragment info object for an individual fragment for editing. More...
 
int getFragmenterId () override
 get fragmenter's id More...
 
std::vector< int > getChunkKeyPrefix () const
 
std::string getFragmenterType () override
 get fragmenter's type (as string More...
 
size_t getNumRows () override
 
void setNumRows (const size_t numTuples) override
 
std::optional< ChunkUpdateStatsupdateColumn (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll) override
 
void updateColumns (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor) override
 
void updateColumn (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const ScalarTargetValue &rhs_value, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll) override
 
void updateColumnMetadata (const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll) override
 
void updateMetadata (const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll) override
 
void compactRows (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll) override
 
const std::vector< uint64_t > getVacuumOffsets (const std::shared_ptr< Chunk_NS::Chunk > &chunk) override
 
auto getChunksForAllColumns (const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level)
 
void dropColumns (const std::vector< int > &columnIds) override
 
bool hasDeletedRows (const int delete_column_id) override
 Iterates through chunk metadata to return whether any rows have been deleted. More...
 
void resetSizesFromFragments () override
 
void alterNonGeoColumnType (const std::list< const ColumnDescriptor * > &columns)
 
void alterColumnGeoType (const std::list< std::pair< const ColumnDescriptor *, std::list< const ColumnDescriptor * >>> &src_dst_column_pairs)
 
- Public Member Functions inherited from Fragmenter_Namespace::AbstractFragmenter
virtual ~AbstractFragmenter ()
 

Protected Member Functions

virtual void sortData (InsertData &insertDataStruct)
 
- Protected Member Functions inherited from Fragmenter_Namespace::InsertOrderFragmenter
FragmentInfocreateNewFragment (const Data_Namespace::MemoryLevel memory_level=Data_Namespace::DISK_LEVEL)
 creates new fragment, calling createChunk() method of BufferMgr to make a new chunk for each column of the table. More...
 
void deleteFragments (const std::vector< int > &dropFragIds)
 
void conditionallyInstantiateFileMgrWithParams ()
 
void getChunkMetadata ()
 
void lockInsertCheckpointData (const InsertData &insertDataStruct)
 
void insertDataImpl (InsertData &insert_data)
 
void insertChunksImpl (const InsertChunks &insert_chunk)
 
void addColumns (const InsertData &insertDataStruct)
 
 InsertOrderFragmenter (const InsertOrderFragmenter &)
 
InsertOrderFragmenteroperator= (const InsertOrderFragmenter &)
 
FragmentInfogetFragmentInfoFromId (const int fragment_id)
 
auto vacuum_fixlen_rows (const FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const std::vector< uint64_t > &frag_offsets)
 
auto vacuum_varlen_rows (const FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const std::vector< uint64_t > &frag_offsets)
 

Additional Inherited Members

- Public Types inherited from Fragmenter_Namespace::InsertOrderFragmenter
using ModifyTransactionTracker = UpdelRoll
 
- Protected Attributes inherited from Fragmenter_Namespace::InsertOrderFragmenter
std::vector< int > chunkKeyPrefix_
 
std::map< int, Chunk_NS::ChunkcolumnMap_
 
std::vector< std::unique_ptr
< Chunk_NS::Chunk > > 
tracked_in_memory_chunks_
 
std::deque< std::unique_ptr
< FragmentInfo > > 
fragmentInfoVec_
 
Data_Namespace::DataMgrdataMgr_
 
Catalog_Namespace::Catalogcatalog_
 
const int physicalTableId_
 
const int shard_
 
size_t maxFragmentRows_
 
size_t pageSize_
 
size_t numTuples_
 
int maxFragmentId_
 
size_t maxChunkSize_
 
size_t maxRows_
 
std::string fragmenterType_
 
heavyai::shared_mutex fragmentInfoMutex_
 
heavyai::shared_mutex insertMutex_
 
Data_Namespace::MemoryLevel defaultInsertLevel_
 
const bool uses_foreign_storage_
 
bool hasMaterializedRowId_
 
int rowIdColId_
 
std::unordered_map< int, size_t > varLenColInfo_
 
std::shared_ptr< std::mutex > mutex_access_inmem_states
 
std::mutex temp_mutex_
 

Detailed Description

Definition at line 23 of file SortedOrderFragmenter.h.

Constructor & Destructor Documentation

Fragmenter_Namespace::SortedOrderFragmenter::SortedOrderFragmenter ( const std::vector< int >  chunkKeyPrefix,
std::vector< Chunk_NS::Chunk > &  chunkVec,
Data_Namespace::DataMgr dataMgr,
Catalog_Namespace::Catalog catalog,
const int  physicalTableId,
const int  shard,
const size_t  maxFragmentRows = DEFAULT_FRAGMENT_ROWS,
const size_t  maxChunkSize = DEFAULT_MAX_CHUNK_SIZE,
const size_t  pageSize = DEFAULT_PAGE_SIZE,
const size_t  maxRows = DEFAULT_MAX_ROWS,
const Data_Namespace::MemoryLevel  defaultInsertLevel = Data_Namespace::DISK_LEVEL 
)
inline

Definition at line 25 of file SortedOrderFragmenter.h.

37  : InsertOrderFragmenter(chunkKeyPrefix,
38  chunkVec,
39  dataMgr,
40  catalog,
41  physicalTableId,
42  shard,
43  maxFragmentRows,
44  maxChunkSize,
45  pageSize,
46  maxRows,
47  defaultInsertLevel) {}
InsertOrderFragmenter(const std::vector< int > chunkKeyPrefix, std::vector< Chunk_NS::Chunk > &chunkVec, Data_Namespace::DataMgr *dataMgr, Catalog_Namespace::Catalog *catalog, const int physicalTableId, const int shard, const size_t maxFragmentRows=DEFAULT_FRAGMENT_ROWS, const size_t maxChunkSize=DEFAULT_MAX_CHUNK_SIZE, const size_t pageSize=DEFAULT_PAGE_SIZE, const size_t maxRows=DEFAULT_MAX_ROWS, const Data_Namespace::MemoryLevel defaultInsertLevel=Data_Namespace::DISK_LEVEL, const bool uses_foreign_storage=false)
Fragmenter_Namespace::SortedOrderFragmenter::~SortedOrderFragmenter ( )
inlineoverride

Definition at line 49 of file SortedOrderFragmenter.h.

49 {}
Fragmenter_Namespace::SortedOrderFragmenter::SortedOrderFragmenter ( SortedOrderFragmenter &&  )
default
Fragmenter_Namespace::SortedOrderFragmenter::SortedOrderFragmenter ( const SortedOrderFragmenter )
delete

Member Function Documentation

void Fragmenter_Namespace::SortedOrderFragmenter::insertData ( InsertData insert_data_struct)
inlineoverridevirtual

appends data onto the most recently occuring fragment, creating a new one if necessary

Todo:
be able to fill up current fragment in multi-row insert before creating new fragment

Reimplemented from Fragmenter_Namespace::InsertOrderFragmenter.

Definition at line 50 of file SortedOrderFragmenter.h.

References Fragmenter_Namespace::InsertOrderFragmenter::insertData(), and sortData().

50  {
51  sortData(insert_data_struct);
52  InsertOrderFragmenter::insertData(insert_data_struct);
53  }
void insertData(InsertData &insert_data_struct) override
appends data onto the most recently occuring fragment, creating a new one if necessary ...
virtual void sortData(InsertData &insertDataStruct)

+ Here is the call graph for this function:

void Fragmenter_Namespace::SortedOrderFragmenter::insertDataNoCheckpoint ( InsertData insert_data_struct)
inlineoverridevirtual

Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and checkpoints taken needs to be managed externally.

Reimplemented from Fragmenter_Namespace::InsertOrderFragmenter.

Definition at line 55 of file SortedOrderFragmenter.h.

References Fragmenter_Namespace::InsertOrderFragmenter::insertDataNoCheckpoint(), and sortData().

55  {
56  sortData(insert_data_struct);
58  }
virtual void sortData(InsertData &insertDataStruct)
void insertDataNoCheckpoint(InsertData &insert_data_struct) override
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...

+ Here is the call graph for this function:

SortedOrderFragmenter& Fragmenter_Namespace::SortedOrderFragmenter::operator= ( const SortedOrderFragmenter )
delete
void Fragmenter_Namespace::SortedOrderFragmenter::sortData ( InsertData insertDataStruct)
protectedvirtual

Definition at line 202 of file SortedOrderFragmenter.cpp.

References Fragmenter_Namespace::InsertOrderFragmenter::catalog_, CHECK, CHECK_GT, CHECK_LT, Fragmenter_Namespace::InsertData::columnIds, Fragmenter_Namespace::InsertData::data, Catalog_Namespace::Catalog::getMetadataForColumn(), Catalog_Namespace::Catalog::getMetadataForTable(), gpu_enabled::iota(), Fragmenter_Namespace::InsertData::is_default, Fragmenter_Namespace::InsertData::numRows, Fragmenter_Namespace::InsertOrderFragmenter::physicalTableId_, Fragmenter_Namespace::shuffleByIndexes(), and Fragmenter_Namespace::sortIndexes().

Referenced by insertData(), and insertDataNoCheckpoint().

202  {
203  // coming here table must have defined a sort_column for mini sort
204  const auto table_desc = catalog_->getMetadataForTable(physicalTableId_);
205  CHECK(table_desc);
206  CHECK_GT(table_desc->sortedColumnId, 0);
207  const auto logical_cd =
208  catalog_->getMetadataForColumn(table_desc->tableId, table_desc->sortedColumnId);
209  CHECK(logical_cd);
210  const auto physical_cd = catalog_->getMetadataForColumn(
211  table_desc->tableId,
212  table_desc->sortedColumnId + (logical_cd->columnType.is_geometry() ? 1 : 0));
213  const auto it = std::find(insertDataStruct.columnIds.begin(),
214  insertDataStruct.columnIds.end(),
215  physical_cd->columnId);
216  CHECK(it != insertDataStruct.columnIds.end());
217  // sort row indexes of the sort column
218  const auto dist = std::distance(insertDataStruct.columnIds.begin(), it);
219  if (!insertDataStruct.is_default[dist]) {
220  std::vector<size_t> indexes(insertDataStruct.numRows);
221  std::iota(indexes.begin(), indexes.end(), 0);
222  CHECK_LT(static_cast<size_t>(dist), insertDataStruct.data.size());
223  sortIndexes(physical_cd, indexes, insertDataStruct.data[dist]);
224  // shuffle rows of all columns
225  for (size_t i = 0; i < insertDataStruct.columnIds.size(); ++i) {
226  if (insertDataStruct.is_default[i]) {
227  continue;
228  }
229  const auto cd = catalog_->getMetadataForColumn(table_desc->tableId,
230  insertDataStruct.columnIds[i]);
231  shuffleByIndexes(cd, indexes, insertDataStruct.data[i]);
232  }
233  } else {
234  // nothing to shuffle, the column has the same value across all rows
235  return;
236  }
237 }
void shuffleByIndexes(const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
#define CHECK_GT(x, y)
Definition: Logger.h:305
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void sortIndexes(const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
#define CHECK_LT(x, y)
Definition: Logger.h:303
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:


The documentation for this class was generated from the following files: