OmniSciDB  5ade3759e0
Fragmenter_Namespace Namespace Reference

Classes

class  AbstractFragmenter
 
struct  ArrayChunkConverter
 
struct  BlockWithColumnId
 
struct  ChunkToInsertDataConverter
 
struct  DateChunkConverter
 
struct  FixedLenArrayChunkConverter
 
class  FragmentInfo
 Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(rows) currently stored by that fragment. More...
 
struct  InsertData
 The data to be inserted using the fragment manager. More...
 
struct  InsertDataLoader
 
class  InsertOrderFragmenter
 The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order. Likely the default fragmenter. More...
 
class  RowDataProvider
 
struct  ScalarChunkConverter
 
struct  ShardDataOwner
 
class  SortedOrderFragmenter
 
struct  StringChunkConverter
 
class  TableInfo
 

Enumerations

enum  FragmenterType { INSERT_ORDER = 0 }
 

Functions

template<typename SRC >
std::vector< std::vector< size_t > > computeRowIndicesOfShards (size_t shardCount, size_t leafCount, size_t rowCount, SRC *src)
 
template<typename T >
size_t indexOf (std::vector< T > &vec, T val)
 
bool isStringVectorData (const ColumnDescriptor *cd)
 
bool isDatumVectorData (const ColumnDescriptor *cd)
 
size_t sizeOfRawColumn (const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
 
std::vector< std::vector< size_t > > computeRowIndicesOfShards (const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
 
template<typename T >
void copyColumnDataOfShard (const std::vector< size_t > &rowIndices, T *src, T *dst)
 
BlockWithColumnId copyColumnDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
 
InsertData copyDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, InsertData &insert_data, int shardTableIndex, const std::vector< size_t > &rowIndices)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, T *buffer)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, std::vector< T > &buffer)
 
void shuffleByIndexes (const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
 
template<typename T >
void sortIndexesImpl (std::vector< size_t > &indexes, const T *buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< std::string > &buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
 
void sortIndexes (const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
 
void wait_cleanup_threads (std::vector< std::future< void >> &threads)
 
bool is_integral (const SQLTypeInfo &t)
 
static int get_chunks (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
 
template<typename T >
static void set_chunk_stats (const SQLTypeInfo &col_type, int8_t *data_addr, int8_t &has_null, T &min, T &max)
 
static void set_chunk_metadata (const Catalog_Namespace::Catalog *catalog, FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const size_t nrows_to_keep, UpdelRoll &updel_roll)
 

Enumeration Type Documentation

◆ FragmenterType

stores the type of a child class of AbstractTableFragmenter

Enumerator
INSERT_ORDER 

Definition at line 44 of file Fragmenter.h.

44  {
45  INSERT_ORDER = 0 // these values persist in catalog. make explicit
46 };

Function Documentation

◆ computeRowIndicesOfShards() [1/2]

template<typename SRC >
std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( size_t  shardCount,
size_t  leafCount,
size_t  rowCount,
SRC *  src 
)

Definition at line 34 of file InsertDataLoader.cpp.

References SHARD_FOR_KEY.

Referenced by computeRowIndicesOfShards(), and Fragmenter_Namespace::InsertDataLoader::insertData().

37  {
38  const auto numShardTables = shardCount * leafCount;
39 
40  std::vector<std::vector<size_t>> rowIndicesOfShards(numShardTables);
41 
42  for (size_t row = 0; row < rowCount; row++) {
43  // expecting unsigned data
44  // thus, no need for double remainder
45  auto shardId = (std::is_unsigned<SRC>::value)
46  ? src[row] % numShardTables
47  : SHARD_FOR_KEY(src[row], numShardTables);
48  rowIndicesOfShards[shardId].push_back(row);
49  }
50 
51  return rowIndicesOfShards;
52 }
int64_t * src
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20
+ Here is the caller graph for this function:

◆ computeRowIndicesOfShards() [2/2]

std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( const Catalog_Namespace::Catalog cat,
size_t  leafCount,
InsertData insert_data 
)

Definition at line 109 of file InsertDataLoader.cpp.

References CHECK, Fragmenter_Namespace::InsertData::columnIds, computeRowIndicesOfShards(), Fragmenter_Namespace::InsertData::data, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getShardColumnMetadataForTable(), indexOf(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, sizeOfRawColumn(), and Fragmenter_Namespace::InsertData::tableId.

112  {
113  const auto* td = cat.getMetadataForTable(insert_data.tableId);
114  const auto* shard_cd = cat.getShardColumnMetadataForTable(td);
115  auto shardDataBlockIndex = indexOf(insert_data.columnIds, shard_cd->columnId);
116  DataBlockPtr& shardDataBlock = insert_data.data[shardDataBlockIndex];
117  auto rowCount = insert_data.numRows;
118  auto shardCount = td->nShards;
119 
120  CHECK(!isStringVectorData(shard_cd));
121  CHECK(!isDatumVectorData(shard_cd));
122 
123  switch (sizeOfRawColumn(cat, shard_cd)) {
124  case 1:
126  shardCount,
127  leafCount,
128  rowCount,
129  reinterpret_cast<uint8_t*>(shardDataBlock.numbersPtr));
130  case 2:
132  shardCount,
133  leafCount,
134  rowCount,
135  reinterpret_cast<uint16_t*>(shardDataBlock.numbersPtr));
136  case 4:
138  shardCount,
139  leafCount,
140  rowCount,
141  reinterpret_cast<uint32_t*>(shardDataBlock.numbersPtr));
142  case 8:
144  shardCount,
145  leafCount,
146  rowCount,
147  reinterpret_cast<uint64_t*>(shardDataBlock.numbersPtr));
148  }
149  throw std::runtime_error("Unexpected data block element size");
150 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::vector< std::vector< size_t > > computeRowIndicesOfShards(const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
bool isStringVectorData(const ColumnDescriptor *cd)
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:2879
size_t indexOf(std::vector< T > &vec, T val)
bool isDatumVectorData(const ColumnDescriptor *cd)
#define CHECK(condition)
Definition: Logger.h:187
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:137
+ Here is the call graph for this function:

◆ copyColumnDataOfShard() [1/2]

template<typename T >
void Fragmenter_Namespace::copyColumnDataOfShard ( const std::vector< size_t > &  rowIndices,
T *  src,
T *  dst 
)

Definition at line 153 of file InsertDataLoader.cpp.

Referenced by copyColumnDataOfShard(), and copyDataOfShard().

153  {
154  for (size_t row = 0; row < rowIndices.size(); row++) {
155  auto srcRowIndex = rowIndices[row];
156  dst[row] = src[srcRowIndex];
157  }
158 }
int64_t * src
+ Here is the caller graph for this function:

◆ copyColumnDataOfShard() [2/2]

BlockWithColumnId Fragmenter_Namespace::copyColumnDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner dataOwner,
const std::vector< size_t > &  rowIndices,
const ColumnDescriptor pCol,
size_t  columnIndex,
DataBlockPtr  dataBlock 
)

Definition at line 165 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, DataBlockPtr::arraysPtr, ColumnDescriptor::columnId, copyColumnDataOfShard(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::ShardDataOwner::rawData, sizeOfRawColumn(), Fragmenter_Namespace::ShardDataOwner::stringData, and DataBlockPtr::stringsPtr.

170  {
171  DataBlockPtr ret;
172  if (isStringVectorData(pCol)) {
173  auto& data = dataOwner.stringData[columnIndex];
174  data.resize(rowIndices.size());
175  copyColumnDataOfShard(rowIndices, &(*(dataBlock.stringsPtr))[0], &data[0]);
176  ret.stringsPtr = &data;
177 
178  } else if (isDatumVectorData(pCol)) {
179  auto& data = dataOwner.arrayData[columnIndex];
180  data.resize(rowIndices.size());
181  copyColumnDataOfShard(rowIndices, &(*(dataBlock.arraysPtr))[0], &data[0]);
182  ret.arraysPtr = &data;
183 
184  } else {
185  auto rawArrayElementSize = sizeOfRawColumn(cat, pCol);
186  auto& data = dataOwner.rawData[columnIndex];
187  data.resize(rowIndices.size() * rawArrayElementSize);
188 
189  switch (rawArrayElementSize) {
190  case 1: {
191  copyColumnDataOfShard(rowIndices,
192  reinterpret_cast<uint8_t*>(dataBlock.numbersPtr),
193  reinterpret_cast<uint8_t*>(&data[0]));
194  break;
195  }
196  case 2: {
197  copyColumnDataOfShard(rowIndices,
198  reinterpret_cast<uint16_t*>(dataBlock.numbersPtr),
199  reinterpret_cast<uint16_t*>(&data[0]));
200  break;
201  }
202  case 4: {
203  copyColumnDataOfShard(rowIndices,
204  reinterpret_cast<uint32_t*>(dataBlock.numbersPtr),
205  reinterpret_cast<uint32_t*>(&data[0]));
206  break;
207  }
208  case 8: {
209  copyColumnDataOfShard(rowIndices,
210  reinterpret_cast<uint64_t*>(dataBlock.numbersPtr),
211  reinterpret_cast<uint64_t*>(&data[0]));
212  break;
213  }
214  default:
215  throw std::runtime_error("Unexpected data block element size");
216  }
217 
218  ret.numbersPtr = reinterpret_cast<int8_t*>(&data[0]);
219  }
220 
221  return {pCol->columnId, ret};
222 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
BlockWithColumnId copyColumnDataOfShard(const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
bool isStringVectorData(const ColumnDescriptor *cd)
bool isDatumVectorData(const ColumnDescriptor *cd)
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:137
+ Here is the call graph for this function:

◆ copyDataOfShard()

InsertData Fragmenter_Namespace::copyDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner dataOwner,
InsertData insert_data,
int  shardTableIndex,
const std::vector< size_t > &  rowIndices 
)

Definition at line 224 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, Fragmenter_Namespace::InsertData::columnIds, copyColumnDataOfShard(), Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getPhysicalTablesDescriptors(), indexOf(), Fragmenter_Namespace::InsertData::numRows, Fragmenter_Namespace::ShardDataOwner::rawData, Fragmenter_Namespace::ShardDataOwner::stringData, and Fragmenter_Namespace::InsertData::tableId.

Referenced by Fragmenter_Namespace::InsertDataLoader::insertData().

228  {
229  const auto* td = cat.getMetadataForTable(insert_data.tableId);
230  const auto* ptd = cat.getPhysicalTablesDescriptors(td)[shardTableIndex];
231 
232  InsertData shardData;
233  shardData.databaseId = insert_data.databaseId;
234  shardData.tableId = ptd->tableId;
235  shardData.numRows = rowIndices.size();
236 
237  std::vector<const ColumnDescriptor*> pCols;
238  std::vector<int> lCols;
239 
240  {
241  auto logicalColumns = cat.getAllColumnMetadataForTable(td->tableId, true, true, true);
242  for (const auto& cd : logicalColumns) {
243  lCols.push_back(cd->columnId);
244  }
245 
246  auto physicalColumns =
247  cat.getAllColumnMetadataForTable(ptd->tableId, true, true, true);
248  for (const auto& cd : physicalColumns) {
249  pCols.push_back(cd);
250  }
251  }
252 
253  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
254  dataOwner.arrayData.emplace_back();
255  dataOwner.rawData.emplace_back();
256  dataOwner.stringData.emplace_back();
257  }
258 
259  auto copycat = [&cat, &dataOwner, &rowIndices, &lCols, &pCols, &insert_data](int col) {
260  const auto lColId = insert_data.columnIds[col];
261  const auto pCol = pCols[indexOf(lCols, lColId)];
262  return copyColumnDataOfShard(
263  cat, dataOwner, rowIndices, pCol, col, insert_data.data[col]);
264  };
265 
266  std::vector<std::future<BlockWithColumnId>> worker_threads;
267  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
268  worker_threads.push_back(std::async(std::launch::async, copycat, col));
269  }
270 
271  for (auto& child : worker_threads) {
272  child.wait();
273  }
274 
275  for (auto& child : worker_threads) {
276  auto shardColumnData = child.get();
277  shardData.columnIds.push_back(shardColumnData.columnId);
278  shardData.data.push_back(shardColumnData.block);
279  }
280 
281  return shardData;
282 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
BlockWithColumnId copyColumnDataOfShard(const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logicalTableDesc) const
Definition: Catalog.cpp:2895
size_t indexOf(std::vector< T > &vec, T val)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_chunks()

static int Fragmenter_Namespace::get_chunks ( const Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const FragmentInfo fragment,
const Data_Namespace::MemoryLevel  memory_level,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
static

Definition at line 104 of file UpdelStorage.cpp.

References CHECK, Catalog_Namespace::DBMetadata::dbId, Fragmenter_Namespace::FragmentInfo::fragmentId, Chunk_NS::Chunk::getChunk(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), TableDescriptor::nColumns, and TableDescriptor::tableId.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

108  {
109  for (int cid = 1, nc = 0; nc < td->nColumns; ++cid) {
110  if (const auto cd = catalog->getMetadataForColumn(td->tableId, cid)) {
111  ++nc;
112  if (!cd->isVirtualCol) {
113  auto chunk_meta_it = fragment.getChunkMetadataMapPhysical().find(cid);
114  CHECK(chunk_meta_it != fragment.getChunkMetadataMapPhysical().end());
115  ChunkKey chunk_key{
116  catalog->getCurrentDB().dbId, td->tableId, cid, fragment.fragmentId};
117  auto chunk = Chunk_NS::Chunk::getChunk(cd,
118  &catalog->getDataMgr(),
119  chunk_key,
120  memory_level,
121  0,
122  chunk_meta_it->second.numBytes,
123  chunk_meta_it->second.numElements);
124  chunks.push_back(chunk);
125  }
126  }
127  }
128  return chunks.size();
129 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
#define CHECK(condition)
Definition: Logger.h:187
std::vector< int > ChunkKey
Definition: types.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ indexOf()

template<typename T >
size_t Fragmenter_Namespace::indexOf ( std::vector< T > &  vec,
val 
)

Definition at line 55 of file InsertDataLoader.cpp.

References CHECK.

Referenced by computeRowIndicesOfShards(), and copyDataOfShard().

55  {
56  typename std::vector<T>::iterator it = std::find(vec.begin(), vec.end(), val);
57  CHECK(it != vec.end());
58  return std::distance(vec.begin(), it);
59 }
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the caller graph for this function:

◆ is_integral()

bool Fragmenter_Namespace::is_integral ( const SQLTypeInfo t)
inline

Definition at line 54 of file UpdelStorage.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::is_boolean(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_integer(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_timeinterval(), and Fragmenter_Namespace::FragmentInfo::unconditionalVacuum_.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata().

54  {
55  return t.is_integer() || t.is_boolean() || t.is_time() || t.is_timeinterval();
56 }
bool is_time() const
Definition: sqltypes.h:456
bool is_timeinterval() const
Definition: sqltypes.h:461
bool is_integer() const
Definition: sqltypes.h:452
bool is_boolean() const
Definition: sqltypes.h:457
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ isDatumVectorData()

bool Fragmenter_Namespace::isDatumVectorData ( const ColumnDescriptor cd)

Definition at line 67 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, and SQLTypeInfoCore< TYPE_FACET_PACK >::is_array().

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

67  {
68  return cd->columnType.is_array();
69 }
bool is_array() const
Definition: sqltypes.h:458
SQLTypeInfo columnType
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ isStringVectorData()

bool Fragmenter_Namespace::isStringVectorData ( const ColumnDescriptor cd)

Definition at line 61 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_geometry(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), and kENCODING_NONE.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

61  {
62  return (cd->columnType.is_geometry()) ||
63  (cd->columnType.is_string() &&
65 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
bool is_geometry() const
Definition: sqltypes.h:462
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:450
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_chunk_metadata()

static void Fragmenter_Namespace::set_chunk_metadata ( const Catalog_Namespace::Catalog catalog,
FragmentInfo fragment,
const std::shared_ptr< Chunk_NS::Chunk > &  chunk,
const size_t  nrows_to_keep,
UpdelRoll updel_roll 
)
static

Definition at line 1067 of file UpdelStorage.cpp.

References UpdelRoll::chunkMetadata, UpdelRoll::dirtyChunks, Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getMetadataForTable(), and UpdelRoll::mutex.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1071  {
1072  auto cd = chunk->get_column_desc();
1073  auto td = catalog->getMetadataForTable(cd->tableId);
1074  auto data_buffer = chunk->get_buffer();
1075  std::lock_guard<std::mutex> lck(updel_roll.mutex);
1076  const auto key = std::make_pair(td, &fragment);
1077  if (0 == updel_roll.chunkMetadata.count(key)) {
1078  updel_roll.chunkMetadata[key] = fragment.getChunkMetadataMapPhysical();
1079  }
1080  auto& chunkMetadata = updel_roll.chunkMetadata[key];
1081  chunkMetadata[cd->columnId].numElements = nrows_to_keep;
1082  chunkMetadata[cd->columnId].numBytes = data_buffer->size();
1083  if (updel_roll.dirtyChunks.count(chunk.get()) == 0) {
1084  updel_roll.dirtyChunks.emplace(chunk.get(), chunk);
1085  }
1086 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::map< Chunk_NS::Chunk *, std::shared_ptr< Chunk_NS::Chunk > > dirtyChunks
Definition: UpdelRoll.h:52
std::map< MetaDataKey, std::map< int, ChunkMetadata > > chunkMetadata
Definition: UpdelRoll.h:59
std::mutex mutex
Definition: UpdelRoll.h:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_chunk_stats()

template<typename T >
static void Fragmenter_Namespace::set_chunk_stats ( const SQLTypeInfo col_type,
int8_t *  data_addr,
int8_t &  has_null,
T &  min,
T &  max 
)
static

Definition at line 1052 of file UpdelStorage.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_notnull(), anonymous_namespace{TypedDataAccessors.h}::is_null(), anonymous_namespace{TypedDataAccessors.h}::set_minmax(), and v().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1056  {
1057  T v;
1058  const auto can_be_null = !col_type.get_notnull();
1059  const auto is_null = get_scalar<T>(data_addr, col_type, v);
1060  if (is_null) {
1061  has_null = has_null || (can_be_null && is_null);
1062  } else {
1063  set_minmax(min, max, v);
1064  }
1065 }
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:330
T v(const TargetValue &r)
bool is_null(const T &v, const SQLTypeInfo &t)
void set_minmax(T &min, T &max, T const val)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ shuffleByIndexes()

void Fragmenter_Namespace::shuffleByIndexes ( const ColumnDescriptor cd,
const std::vector< size_t > &  indexes,
DataBlockPtr data 
)

Definition at line 44 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, shuffleByIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

46  {
47  const auto& ti = cd->columnType;
48  switch (ti.get_type()) {
49  case kBOOLEAN:
50  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
51  break;
52  case kTINYINT:
53  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
54  break;
55  case kSMALLINT:
56  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
57  break;
58  case kINT:
59  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
60  break;
61  case kBIGINT:
62  case kNUMERIC:
63  case kDECIMAL:
64  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
65  break;
66  case kFLOAT:
67  shuffleByIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
68  break;
69  case kDOUBLE:
70  shuffleByIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
71  break;
72  case kTEXT:
73  case kVARCHAR:
74  case kCHAR:
75  if (ti.is_varlen()) {
76  shuffleByIndexesImpl(indexes, *data.stringsPtr);
77  } else {
78  switch (ti.get_size()) {
79  case 1:
80  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
81  break;
82  case 2:
83  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
84  break;
85  case 4:
86  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
87  break;
88  default:
89  CHECK(false);
90  }
91  }
92  break;
93  case kDATE:
94  case kTIME:
95  case kTIMESTAMP:
96  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
97  break;
98  case kARRAY:
99  shuffleByIndexesImpl(indexes, *data.arraysPtr);
100  break;
101  case kPOINT:
102  case kLINESTRING:
103  case kPOLYGON:
104  case kMULTIPOLYGON:
105  shuffleByIndexesImpl(indexes, *data.stringsPtr);
106  break;
107  default:
108  CHECK(false);
109  }
110 }
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
Definition: sqltypes.h:54
Definition: sqltypes.h:55
void shuffleByIndexesImpl(const std::vector< size_t > &indexes, std::vector< T > &buffer)
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:187
Definition: sqltypes.h:47
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:137
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ shuffleByIndexesImpl() [1/2]

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
T *  buffer 
)

Definition at line 25 of file SortedOrderFragmenter.cpp.

Referenced by shuffleByIndexes().

25  {
26  std::vector<T> new_buffer;
27  new_buffer.reserve(indexes.size());
28  for (const auto i : indexes) {
29  new_buffer.push_back(buffer[i]);
30  }
31  std::memcpy(buffer, new_buffer.data(), indexes.size() * sizeof(T));
32 }
+ Here is the caller graph for this function:

◆ shuffleByIndexesImpl() [2/2]

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
std::vector< T > &  buffer 
)

Definition at line 35 of file SortedOrderFragmenter.cpp.

35  {
36  std::vector<T> new_buffer;
37  new_buffer.reserve(indexes.size());
38  for (const auto i : indexes) {
39  new_buffer.push_back(buffer[i]);
40  }
41  buffer.swap(new_buffer);
42 }

◆ sizeOfRawColumn()

size_t Fragmenter_Namespace::sizeOfRawColumn ( const Catalog_Namespace::Catalog cat,
const ColumnDescriptor cd 
)

Definition at line 71 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_logical_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type_name(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

72  {
73  switch (cd->columnType.get_type()) {
74  case kPOINT:
75  case kLINESTRING:
76  case kPOLYGON:
77  case kMULTIPOLYGON:
78  case kARRAY:
79  throw std::runtime_error("geo and array columns have variable length elements");
80  case kBOOLEAN:
81  case kTINYINT:
82  case kSMALLINT:
83  case kINT:
84  case kBIGINT:
85  case kNUMERIC:
86  case kDECIMAL:
87  case kFLOAT:
88  case kDOUBLE:
89  case kTIMESTAMP:
90  case kTIME:
91  case kINTERVAL_DAY_TIME:
93  case kDATE:
94  return cd->columnType.get_logical_size();
95  case kTEXT:
96  case kVARCHAR:
97  case kCHAR:
99  throw std::runtime_error(
100  "non encoded string columns have variable length elements");
101  }
102  return cd->columnType.get_size();
103  default:
104  throw std::runtime_error("not supported column type: " + cd->columnName + " (" +
105  cd->columnType.get_type_name() + ")");
106  }
107 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
Definition: sqltypes.h:51
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
std::string get_type_name() const
Definition: sqltypes.h:426
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
int get_logical_size() const
Definition: sqltypes.h:334
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sortIndexes()

void Fragmenter_Namespace::sortIndexes ( const ColumnDescriptor cd,
std::vector< size_t > &  indexes,
const DataBlockPtr data 
)

Definition at line 136 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, sortIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

138  {
139  const auto& ti = cd->columnType;
140  switch (ti.get_type()) {
141  case kBOOLEAN:
142  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
143  break;
144  case kTINYINT:
145  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
146  break;
147  case kSMALLINT:
148  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
149  break;
150  case kINT:
151  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
152  break;
153  case kBIGINT:
154  case kNUMERIC:
155  case kDECIMAL:
156  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
157  break;
158  case kFLOAT:
159  sortIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
160  break;
161  case kDOUBLE:
162  sortIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
163  break;
164  case kTEXT:
165  case kVARCHAR:
166  case kCHAR:
167  if (ti.is_varlen()) {
168  sortIndexesImpl(indexes, *data.stringsPtr);
169  } else {
170  switch (ti.get_size()) {
171  case 1:
172  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
173  break;
174  case 2:
175  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
176  break;
177  case 4:
178  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
179  break;
180  default:
181  CHECK(false);
182  }
183  }
184  break;
185  case kDATE:
186  case kTIME:
187  case kTIMESTAMP:
188  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
189  break;
190  case kARRAY:
191  sortIndexesImpl(indexes, *data.arraysPtr);
192  break;
193  default:
194  CHECK(false) << "invalid type '" << ti.get_type() << "' to sort";
195  }
196 }
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
void sortIndexesImpl(std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:187
Definition: sqltypes.h:47
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:137
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sortIndexesImpl() [1/3]

template<typename T >
void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const T *  buffer 
)

Definition at line 113 of file SortedOrderFragmenter.cpp.

Referenced by sortIndexes().

113  {
114  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
115  return buffer[a] < buffer[b];
116  });
117 }
+ Here is the caller graph for this function:

◆ sortIndexesImpl() [2/3]

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< std::string > &  buffer 
)

Definition at line 119 of file SortedOrderFragmenter.cpp.

120  {
121  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
122  return buffer[a].size() < buffer[b].size() ||
123  (buffer[a].size() == buffer[b].size() && buffer[a] < buffer[b]);
124  });
125 }

◆ sortIndexesImpl() [3/3]

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< ArrayDatum > &  buffer 
)

Definition at line 127 of file SortedOrderFragmenter.cpp.

128  {
129  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
130  return buffer[a].is_null || buffer[a].length < buffer[b].length ||
131  (!buffer[b].is_null && buffer[a].length == buffer[b].length &&
132  memcmp(buffer[a].pointer, buffer[b].pointer, buffer[a].length) < 0);
133  });
134 }

◆ wait_cleanup_threads()

void Fragmenter_Namespace::wait_cleanup_threads ( std::vector< std::future< void >> &  threads)
inline

Definition at line 38 of file UpdelStorage.cpp.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows(), Fragmenter_Namespace::InsertOrderFragmenter::getVacuumOffsets(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumn().

38  {
39  for (auto& t : threads) {
40  t.get();
41  }
42  threads.clear();
43 }
+ Here is the caller graph for this function: