OmniSciDB  2e3a973ef4
Fragmenter_Namespace Namespace Reference

Namespaces

 anonymous_namespace{InsertOrderFragmenter.cpp}
 

Classes

class  AbstractFragmenter
 
struct  ArrayChunkConverter
 
struct  BlockWithColumnId
 
struct  ChunkToInsertDataConverter
 
struct  DateChunkConverter
 
struct  FixedLenArrayChunkConverter
 
class  FragmentInfo
 Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(rows) currently stored by that fragment. More...
 
struct  InsertData
 The data to be inserted using the fragment manager. More...
 
struct  InsertDataLoader
 
class  InsertOrderFragmenter
 The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order. Likely the default fragmenter. More...
 
class  RowDataProvider
 
struct  ScalarChunkConverter
 
struct  ShardDataOwner
 
class  SortedOrderFragmenter
 
struct  StringChunkConverter
 
class  TableInfo
 

Enumerations

enum  FragmenterType { INSERT_ORDER = 0 }
 

Functions

template<typename SRC >
std::vector< std::vector< size_t > > computeRowIndicesOfShards (size_t shardCount, size_t leafCount, size_t rowCount, SRC *src)
 
template<typename T >
size_t indexOf (std::vector< T > &vec, T val)
 
bool isStringVectorData (const ColumnDescriptor *cd)
 
bool isDatumVectorData (const ColumnDescriptor *cd)
 
size_t sizeOfRawColumn (const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
 
std::vector< std::vector< size_t > > computeRowIndicesOfShards (const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
 
template<typename T >
void copyColumnDataOfShard (const std::vector< size_t > &rowIndices, T *src, T *dst)
 
BlockWithColumnId copyColumnDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
 
InsertData copyDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, InsertData &insert_data, int shardTableIndex, const std::vector< size_t > &rowIndices)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, T *buffer)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, std::vector< T > &buffer)
 
void shuffleByIndexes (const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
 
template<typename T >
void sortIndexesImpl (std::vector< size_t > &indexes, const T *buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< std::string > &buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
 
void sortIndexes (const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
 
void wait_cleanup_threads (std::vector< std::future< void >> &threads)
 
bool is_integral (const SQLTypeInfo &t)
 
static int get_chunks (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
 
template<typename T >
static void set_chunk_stats (const SQLTypeInfo &col_type, int8_t *data_addr, int8_t &has_null, T &min, T &max)
 
static void set_chunk_metadata (const Catalog_Namespace::Catalog *catalog, FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const size_t nrows_to_keep, UpdelRoll &updel_roll)
 

Enumeration Type Documentation

◆ FragmenterType

stores the type of a child class of AbstractTableFragmenter

Enumerator
INSERT_ORDER 

Definition at line 44 of file Fragmenter.h.

44  {
45  INSERT_ORDER = 0 // these values persist in catalog. make explicit
46 };

Function Documentation

◆ computeRowIndicesOfShards() [1/2]

template<typename SRC >
std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( size_t  shardCount,
size_t  leafCount,
size_t  rowCount,
SRC *  src 
)

Definition at line 34 of file InsertDataLoader.cpp.

References SHARD_FOR_KEY.

Referenced by computeRowIndicesOfShards(), and Fragmenter_Namespace::InsertDataLoader::insertData().

37  {
38  const auto numShardTables = shardCount * leafCount;
39 
40  std::vector<std::vector<size_t>> rowIndicesOfShards(numShardTables);
41 
42  for (size_t row = 0; row < rowCount; row++) {
43  // expecting unsigned data
44  // thus, no need for double remainder
45  auto shardId = (std::is_unsigned<SRC>::value)
46  ? src[row] % numShardTables
47  : SHARD_FOR_KEY(src[row], numShardTables);
48  rowIndicesOfShards[shardId].push_back(row);
49  }
50 
51  return rowIndicesOfShards;
52 }
int64_t * src
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20
+ Here is the caller graph for this function:

◆ computeRowIndicesOfShards() [2/2]

std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( const Catalog_Namespace::Catalog cat,
size_t  leafCount,
InsertData insert_data 
)

Definition at line 109 of file InsertDataLoader.cpp.

References CHECK, Fragmenter_Namespace::InsertData::columnIds, computeRowIndicesOfShards(), Fragmenter_Namespace::InsertData::data, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getShardColumnMetadataForTable(), indexOf(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, sizeOfRawColumn(), and Fragmenter_Namespace::InsertData::tableId.

112  {
113  const auto* td = cat.getMetadataForTable(insert_data.tableId);
114  const auto* shard_cd = cat.getShardColumnMetadataForTable(td);
115  auto shardDataBlockIndex = indexOf(insert_data.columnIds, shard_cd->columnId);
116  DataBlockPtr& shardDataBlock = insert_data.data[shardDataBlockIndex];
117  auto rowCount = insert_data.numRows;
118  auto shardCount = td->nShards;
119 
120  CHECK(!isStringVectorData(shard_cd));
121  CHECK(!isDatumVectorData(shard_cd));
122 
123  switch (sizeOfRawColumn(cat, shard_cd)) {
124  case 1:
126  shardCount,
127  leafCount,
128  rowCount,
129  reinterpret_cast<uint8_t*>(shardDataBlock.numbersPtr));
130  case 2:
132  shardCount,
133  leafCount,
134  rowCount,
135  reinterpret_cast<uint16_t*>(shardDataBlock.numbersPtr));
136  case 4:
138  shardCount,
139  leafCount,
140  rowCount,
141  reinterpret_cast<uint32_t*>(shardDataBlock.numbersPtr));
142  case 8:
144  shardCount,
145  leafCount,
146  rowCount,
147  reinterpret_cast<uint64_t*>(shardDataBlock.numbersPtr));
148  }
149  throw std::runtime_error("Unexpected data block element size");
150 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< std::vector< size_t > > computeRowIndicesOfShards(const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
bool isStringVectorData(const ColumnDescriptor *cd)
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3571
size_t indexOf(std::vector< T > &vec, T val)
bool isDatumVectorData(const ColumnDescriptor *cd)
#define CHECK(condition)
Definition: Logger.h:197
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:149
+ Here is the call graph for this function:

◆ copyColumnDataOfShard() [1/2]

template<typename T >
void Fragmenter_Namespace::copyColumnDataOfShard ( const std::vector< size_t > &  rowIndices,
T *  src,
T *  dst 
)

Definition at line 153 of file InsertDataLoader.cpp.

Referenced by copyColumnDataOfShard(), and copyDataOfShard().

153  {
154  for (size_t row = 0; row < rowIndices.size(); row++) {
155  auto srcRowIndex = rowIndices[row];
156  dst[row] = src[srcRowIndex];
157  }
158 }
int64_t * src
+ Here is the caller graph for this function:

◆ copyColumnDataOfShard() [2/2]

BlockWithColumnId Fragmenter_Namespace::copyColumnDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner dataOwner,
const std::vector< size_t > &  rowIndices,
const ColumnDescriptor pCol,
size_t  columnIndex,
DataBlockPtr  dataBlock 
)

Definition at line 165 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, DataBlockPtr::arraysPtr, ColumnDescriptor::columnId, copyColumnDataOfShard(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::ShardDataOwner::rawData, sizeOfRawColumn(), Fragmenter_Namespace::ShardDataOwner::stringData, and DataBlockPtr::stringsPtr.

170  {
171  DataBlockPtr ret;
172  if (isStringVectorData(pCol)) {
173  auto& data = dataOwner.stringData[columnIndex];
174  data.resize(rowIndices.size());
175  copyColumnDataOfShard(rowIndices, &(*(dataBlock.stringsPtr))[0], &data[0]);
176  ret.stringsPtr = &data;
177 
178  } else if (isDatumVectorData(pCol)) {
179  auto& data = dataOwner.arrayData[columnIndex];
180  data.resize(rowIndices.size());
181  copyColumnDataOfShard(rowIndices, &(*(dataBlock.arraysPtr))[0], &data[0]);
182  ret.arraysPtr = &data;
183 
184  } else {
185  auto rawArrayElementSize = sizeOfRawColumn(cat, pCol);
186  auto& data = dataOwner.rawData[columnIndex];
187  data.resize(rowIndices.size() * rawArrayElementSize);
188 
189  switch (rawArrayElementSize) {
190  case 1: {
191  copyColumnDataOfShard(rowIndices,
192  reinterpret_cast<uint8_t*>(dataBlock.numbersPtr),
193  reinterpret_cast<uint8_t*>(&data[0]));
194  break;
195  }
196  case 2: {
197  copyColumnDataOfShard(rowIndices,
198  reinterpret_cast<uint16_t*>(dataBlock.numbersPtr),
199  reinterpret_cast<uint16_t*>(&data[0]));
200  break;
201  }
202  case 4: {
203  copyColumnDataOfShard(rowIndices,
204  reinterpret_cast<uint32_t*>(dataBlock.numbersPtr),
205  reinterpret_cast<uint32_t*>(&data[0]));
206  break;
207  }
208  case 8: {
209  copyColumnDataOfShard(rowIndices,
210  reinterpret_cast<uint64_t*>(dataBlock.numbersPtr),
211  reinterpret_cast<uint64_t*>(&data[0]));
212  break;
213  }
214  default:
215  throw std::runtime_error("Unexpected data block element size");
216  }
217 
218  ret.numbersPtr = reinterpret_cast<int8_t*>(&data[0]);
219  }
220 
221  return {pCol->columnId, ret};
222 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
BlockWithColumnId copyColumnDataOfShard(const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
bool isStringVectorData(const ColumnDescriptor *cd)
bool isDatumVectorData(const ColumnDescriptor *cd)
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:149
+ Here is the call graph for this function:

◆ copyDataOfShard()

InsertData Fragmenter_Namespace::copyDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner dataOwner,
InsertData insert_data,
int  shardTableIndex,
const std::vector< size_t > &  rowIndices 
)

Definition at line 224 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, cat(), Fragmenter_Namespace::InsertData::columnIds, copyColumnDataOfShard(), Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getPhysicalTablesDescriptors(), indexOf(), Fragmenter_Namespace::InsertData::numRows, Fragmenter_Namespace::ShardDataOwner::rawData, Fragmenter_Namespace::ShardDataOwner::stringData, and Fragmenter_Namespace::InsertData::tableId.

Referenced by Fragmenter_Namespace::InsertDataLoader::insertData().

228  {
229  const auto* td = cat.getMetadataForTable(insert_data.tableId);
230  const auto* ptd = cat.getPhysicalTablesDescriptors(td)[shardTableIndex];
231 
232  InsertData shardData;
233  shardData.databaseId = insert_data.databaseId;
234  shardData.tableId = ptd->tableId;
235  shardData.numRows = rowIndices.size();
236 
237  std::vector<const ColumnDescriptor*> pCols;
238  std::vector<int> lCols;
239 
240  {
241  auto logicalColumns = cat.getAllColumnMetadataForTable(td->tableId, true, true, true);
242  for (const auto& cd : logicalColumns) {
243  lCols.push_back(cd->columnId);
244  }
245 
246  auto physicalColumns =
247  cat.getAllColumnMetadataForTable(ptd->tableId, true, true, true);
248  for (const auto& cd : physicalColumns) {
249  pCols.push_back(cd);
250  }
251  }
252 
253  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
254  dataOwner.arrayData.emplace_back();
255  dataOwner.rawData.emplace_back();
256  dataOwner.stringData.emplace_back();
257  }
258 
259  auto copycat = [&cat, &dataOwner, &rowIndices, &lCols, &pCols, &insert_data](int col) {
260  const auto lColId = insert_data.columnIds[col];
261  const auto pCol = pCols[indexOf(lCols, lColId)];
262  return copyColumnDataOfShard(
263  cat, dataOwner, rowIndices, pCol, col, insert_data.data[col]);
264  };
265 
266  std::vector<std::future<BlockWithColumnId>> worker_threads;
267  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
268  worker_threads.push_back(std::async(std::launch::async, copycat, col));
269  }
270 
271  for (auto& child : worker_threads) {
272  child.wait();
273  }
274 
275  for (auto& child : worker_threads) {
276  auto shardColumnData = child.get();
277  shardData.columnIds.push_back(shardColumnData.columnId);
278  shardData.data.push_back(shardColumnData.block);
279  }
280 
281  return shardData;
282 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
BlockWithColumnId copyColumnDataOfShard(const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logicalTableDesc) const
Definition: Catalog.cpp:3589
std::string cat(Ts &&... args)
size_t indexOf(std::vector< T > &vec, T val)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1715
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_chunks()

static int Fragmenter_Namespace::get_chunks ( const Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const FragmentInfo fragment,
const Data_Namespace::MemoryLevel  memory_level,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
static

Definition at line 74 of file UpdelStorage.cpp.

References CHECK, Catalog_Namespace::DBMetadata::dbId, Fragmenter_Namespace::FragmentInfo::fragmentId, Chunk_NS::Chunk::getChunk(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), TableDescriptor::nColumns, and TableDescriptor::tableId.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

78  {
79  for (int cid = 1, nc = 0; nc < td->nColumns; ++cid) {
80  if (const auto cd = catalog->getMetadataForColumn(td->tableId, cid)) {
81  ++nc;
82  if (!cd->isVirtualCol) {
83  auto chunk_meta_it = fragment.getChunkMetadataMapPhysical().find(cid);
84  CHECK(chunk_meta_it != fragment.getChunkMetadataMapPhysical().end());
85  ChunkKey chunk_key{
86  catalog->getCurrentDB().dbId, td->tableId, cid, fragment.fragmentId};
87  auto chunk = Chunk_NS::Chunk::getChunk(cd,
88  &catalog->getDataMgr(),
89  chunk_key,
90  memory_level,
91  0,
92  chunk_meta_it->second->numBytes,
93  chunk_meta_it->second->numElements);
94  chunks.push_back(chunk);
95  }
96  }
97  }
98  return chunks.size();
99 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:208
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int > ChunkKey
Definition: types.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ indexOf()

template<typename T >
size_t Fragmenter_Namespace::indexOf ( std::vector< T > &  vec,
val 
)

Definition at line 55 of file InsertDataLoader.cpp.

References CHECK.

Referenced by computeRowIndicesOfShards(), and copyDataOfShard().

55  {
56  typename std::vector<T>::iterator it = std::find(vec.begin(), vec.end(), val);
57  CHECK(it != vec.end());
58  return std::distance(vec.begin(), it);
59 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ is_integral()

bool Fragmenter_Namespace::is_integral ( const SQLTypeInfo t)
inline

Definition at line 48 of file UpdelStorage.cpp.

References SQLTypeInfo::is_boolean(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), SQLTypeInfo::is_timeinterval(), and Fragmenter_Namespace::FragmentInfo::unconditionalVacuum_.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata().

48  {
49  return t.is_integer() || t.is_boolean() || t.is_time() || t.is_timeinterval();
50 }
bool is_time() const
Definition: sqltypes.h:423
bool is_boolean() const
Definition: sqltypes.h:424
bool is_integer() const
Definition: sqltypes.h:419
bool is_timeinterval() const
Definition: sqltypes.h:428
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ isDatumVectorData()

bool Fragmenter_Namespace::isDatumVectorData ( const ColumnDescriptor cd)

Definition at line 67 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, and SQLTypeInfo::is_array().

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

67  {
68  return cd->columnType.is_array();
69 }
bool is_array() const
Definition: sqltypes.h:425
SQLTypeInfo columnType
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ isStringVectorData()

bool Fragmenter_Namespace::isStringVectorData ( const ColumnDescriptor cd)

Definition at line 61 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::is_geometry(), SQLTypeInfo::is_string(), and kENCODING_NONE.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

61  {
62  return (cd->columnType.is_geometry()) ||
63  (cd->columnType.is_string() &&
65 }
bool is_string() const
Definition: sqltypes.h:417
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
bool is_geometry() const
Definition: sqltypes.h:429
SQLTypeInfo columnType
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_chunk_metadata()

static void Fragmenter_Namespace::set_chunk_metadata ( const Catalog_Namespace::Catalog catalog,
FragmentInfo fragment,
const std::shared_ptr< Chunk_NS::Chunk > &  chunk,
const size_t  nrows_to_keep,
UpdelRoll updel_roll 
)
static

Definition at line 1046 of file UpdelStorage.cpp.

References UpdelRoll::chunkMetadata, UpdelRoll::dirtyChunks, Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getMetadataForTable(), and UpdelRoll::mutex.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1050  {
1051  auto cd = chunk->getColumnDesc();
1052  auto td = catalog->getMetadataForTable(cd->tableId);
1053  auto data_buffer = chunk->getBuffer();
1054  std::lock_guard<std::mutex> lck(updel_roll.mutex);
1055  const auto key = std::make_pair(td, &fragment);
1056  if (0 == updel_roll.chunkMetadata.count(key)) {
1057  updel_roll.chunkMetadata[key] = fragment.getChunkMetadataMapPhysical();
1058  }
1059  auto& chunkMetadata = updel_roll.chunkMetadata[key];
1060  chunkMetadata[cd->columnId]->numElements = nrows_to_keep;
1061  chunkMetadata[cd->columnId]->numBytes = data_buffer->size();
1062  if (updel_roll.dirtyChunks.count(chunk.get()) == 0) {
1063  updel_roll.dirtyChunks.emplace(chunk.get(), chunk);
1064  }
1065 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::map< Chunk_NS::Chunk *, std::shared_ptr< Chunk_NS::Chunk > > dirtyChunks
Definition: UpdelRoll.h:52
std::map< MetaDataKey, ChunkMetadataMap > chunkMetadata
Definition: UpdelRoll.h:59
std::mutex mutex
Definition: UpdelRoll.h:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_chunk_stats()

template<typename T >
static void Fragmenter_Namespace::set_chunk_stats ( const SQLTypeInfo col_type,
int8_t *  data_addr,
int8_t &  has_null,
T &  min,
T &  max 
)
static

Definition at line 1031 of file UpdelStorage.cpp.

References SQLTypeInfo::get_notnull(), anonymous_namespace{TypedDataAccessors.h}::is_null(), and anonymous_namespace{TypedDataAccessors.h}::set_minmax().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1035  {
1036  T v;
1037  const auto can_be_null = !col_type.get_notnull();
1038  const auto is_null = get_scalar<T>(data_addr, col_type, v);
1039  if (is_null) {
1040  has_null = has_null || (can_be_null && is_null);
1041  } else {
1042  set_minmax(min, max, v);
1043  }
1044 }
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
bool is_null(const T &v, const SQLTypeInfo &t)
void set_minmax(T &min, T &max, T const val)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ shuffleByIndexes()

void Fragmenter_Namespace::shuffleByIndexes ( const ColumnDescriptor cd,
const std::vector< size_t > &  indexes,
DataBlockPtr data 
)

Definition at line 44 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, shuffleByIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

46  {
47  const auto& ti = cd->columnType;
48  switch (ti.get_type()) {
49  case kBOOLEAN:
50  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
51  break;
52  case kTINYINT:
53  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
54  break;
55  case kSMALLINT:
56  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
57  break;
58  case kINT:
59  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
60  break;
61  case kBIGINT:
62  case kNUMERIC:
63  case kDECIMAL:
64  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
65  break;
66  case kFLOAT:
67  shuffleByIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
68  break;
69  case kDOUBLE:
70  shuffleByIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
71  break;
72  case kTEXT:
73  case kVARCHAR:
74  case kCHAR:
75  if (ti.is_varlen()) {
76  shuffleByIndexesImpl(indexes, *data.stringsPtr);
77  } else {
78  switch (ti.get_size()) {
79  case 1:
80  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
81  break;
82  case 2:
83  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
84  break;
85  case 4:
86  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
87  break;
88  default:
89  CHECK(false);
90  }
91  }
92  break;
93  case kDATE:
94  case kTIME:
95  case kTIMESTAMP:
96  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
97  break;
98  case kARRAY:
99  shuffleByIndexesImpl(indexes, *data.arraysPtr);
100  break;
101  case kPOINT:
102  case kLINESTRING:
103  case kPOLYGON:
104  case kMULTIPOLYGON:
105  shuffleByIndexesImpl(indexes, *data.stringsPtr);
106  break;
107  default:
108  CHECK(false);
109  }
110 }
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
Definition: sqltypes.h:54
Definition: sqltypes.h:55
void shuffleByIndexesImpl(const std::vector< size_t > &indexes, std::vector< T > &buffer)
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:47
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:149
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ shuffleByIndexesImpl() [1/2]

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
T *  buffer 
)

Definition at line 25 of file SortedOrderFragmenter.cpp.

Referenced by shuffleByIndexes().

25  {
26  std::vector<T> new_buffer;
27  new_buffer.reserve(indexes.size());
28  for (const auto i : indexes) {
29  new_buffer.push_back(buffer[i]);
30  }
31  std::memcpy(buffer, new_buffer.data(), indexes.size() * sizeof(T));
32 }
+ Here is the caller graph for this function:

◆ shuffleByIndexesImpl() [2/2]

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
std::vector< T > &  buffer 
)

Definition at line 35 of file SortedOrderFragmenter.cpp.

35  {
36  std::vector<T> new_buffer;
37  new_buffer.reserve(indexes.size());
38  for (const auto i : indexes) {
39  new_buffer.push_back(buffer[i]);
40  }
41  buffer.swap(new_buffer);
42 }

◆ sizeOfRawColumn()

size_t Fragmenter_Namespace::sizeOfRawColumn ( const Catalog_Namespace::Catalog cat,
const ColumnDescriptor cd 
)

Definition at line 71 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_logical_size(), SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::get_type_name(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

72  {
73  switch (cd->columnType.get_type()) {
74  case kPOINT:
75  case kLINESTRING:
76  case kPOLYGON:
77  case kMULTIPOLYGON:
78  case kARRAY:
79  throw std::runtime_error("geo and array columns have variable length elements");
80  case kBOOLEAN:
81  case kTINYINT:
82  case kSMALLINT:
83  case kINT:
84  case kBIGINT:
85  case kNUMERIC:
86  case kDECIMAL:
87  case kFLOAT:
88  case kDOUBLE:
89  case kTIMESTAMP:
90  case kTIME:
91  case kINTERVAL_DAY_TIME:
93  case kDATE:
94  return cd->columnType.get_logical_size();
95  case kTEXT:
96  case kVARCHAR:
97  case kCHAR:
99  throw std::runtime_error(
100  "non encoded string columns have variable length elements");
101  }
102  return cd->columnType.get_size();
103  default:
104  throw std::runtime_error("not supported column type: " + cd->columnName + " (" +
105  cd->columnType.get_type_name() + ")");
106  }
107 }
Definition: sqltypes.h:51
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
int get_logical_size() const
Definition: sqltypes.h:270
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::string get_type_name() const
Definition: sqltypes.h:362
Definition: sqltypes.h:43
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sortIndexes()

void Fragmenter_Namespace::sortIndexes ( const ColumnDescriptor cd,
std::vector< size_t > &  indexes,
const DataBlockPtr data 
)

Definition at line 137 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, sortIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

139  {
140  const auto& ti = cd->columnType;
141  switch (ti.get_type()) {
142  case kBOOLEAN:
143  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
144  break;
145  case kTINYINT:
146  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
147  break;
148  case kSMALLINT:
149  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
150  break;
151  case kINT:
152  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
153  break;
154  case kBIGINT:
155  case kNUMERIC:
156  case kDECIMAL:
157  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
158  break;
159  case kFLOAT:
160  sortIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
161  break;
162  case kDOUBLE:
163  sortIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
164  break;
165  case kTEXT:
166  case kVARCHAR:
167  case kCHAR:
168  if (ti.is_varlen()) {
169  sortIndexesImpl(indexes, *data.stringsPtr);
170  } else {
171  switch (ti.get_size()) {
172  case 1:
173  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
174  break;
175  case 2:
176  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
177  break;
178  case 4:
179  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
180  break;
181  default:
182  CHECK(false);
183  }
184  }
185  break;
186  case kDATE:
187  case kTIME:
188  case kTIMESTAMP:
189  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
190  break;
191  case kARRAY:
192  sortIndexesImpl(indexes, *data.arraysPtr);
193  break;
194  default:
195  CHECK(false) << "invalid type '" << ti.get_type() << "' to sort";
196  }
197 }
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
void sortIndexesImpl(std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:47
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:149
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sortIndexesImpl() [1/3]

template<typename T >
void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const T *  buffer 
)

Definition at line 113 of file SortedOrderFragmenter.cpp.

References CHECK.

Referenced by sortIndexes().

113  {
114  CHECK(buffer);
115  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
116  return buffer[a] < buffer[b];
117  });
118 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ sortIndexesImpl() [2/3]

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< std::string > &  buffer 
)

Definition at line 120 of file SortedOrderFragmenter.cpp.

121  {
122  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
123  return buffer[a].size() < buffer[b].size() ||
124  (buffer[a].size() == buffer[b].size() && buffer[a] < buffer[b]);
125  });
126 }

◆ sortIndexesImpl() [3/3]

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< ArrayDatum > &  buffer 
)

Definition at line 128 of file SortedOrderFragmenter.cpp.

129  {
130  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
131  return buffer[a].is_null || buffer[a].length < buffer[b].length ||
132  (!buffer[b].is_null && buffer[a].length == buffer[b].length &&
133  memcmp(buffer[a].pointer, buffer[b].pointer, buffer[a].length) < 0);
134  });
135 }

◆ wait_cleanup_threads()

void Fragmenter_Namespace::wait_cleanup_threads ( std::vector< std::future< void >> &  threads)
inline

Definition at line 41 of file UpdelStorage.cpp.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows(), Fragmenter_Namespace::InsertOrderFragmenter::getVacuumOffsets(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumn().

41  {
42  for (auto& t : threads) {
43  t.get();
44  }
45  threads.clear();
46 }
+ Here is the caller graph for this function: