OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Fragmenter_Namespace Namespace Reference

Classes

class  RowDataProvider
 
class  AbstractFragmenter
 
struct  InsertData
 The data to be inserted using the fragment manager. More...
 
class  FragmentInfo
 Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(rows) currently stored by that fragment. More...
 
class  TableInfo
 
struct  ShardDataOwner
 
struct  BlockWithColumnId
 
struct  InsertDataLoader
 
class  InsertOrderFragmenter
 The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order. Likely the default fragmenter. More...
 
class  SortedOrderFragmenter
 
struct  ChunkToInsertDataConverter
 
struct  ScalarChunkConverter
 
struct  FixedLenArrayChunkConverter
 
struct  ArrayChunkConverter
 
struct  StringChunkConverter
 
struct  DateChunkConverter
 

Enumerations

enum  FragmenterType { INSERT_ORDER = 0 }
 

Functions

template<typename SRC >
std::vector< std::vector
< size_t > > 
computeRowIndicesOfShards (size_t shardCount, size_t leafCount, size_t rowCount, SRC *src)
 
template<typename T >
size_t indexOf (std::vector< T > &vec, T val)
 
bool isStringVectorData (const ColumnDescriptor *cd)
 
bool isDatumVectorData (const ColumnDescriptor *cd)
 
size_t sizeOfRawColumn (const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
 
std::vector< std::vector
< size_t > > 
computeRowIndicesOfShards (const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
 
template<typename T >
void copyColumnDataOfShard (const std::vector< size_t > &rowIndices, T *src, T *dst)
 
BlockWithColumnId copyColumnDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock)
 
InsertData copyDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, InsertData &insert_data, int shardTableIndex, const std::vector< size_t > &rowIndices)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, T *buffer)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, std::vector< T > &buffer)
 
void shuffleByIndexes (const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
 
template<typename T >
void sortIndexesImpl (std::vector< size_t > &indexes, const T *buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< std::string > &buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
 
void sortIndexes (const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
 
void wait_cleanup_threads (std::vector< std::future< void >> &threads)
 
bool is_integral (const SQLTypeInfo &t)
 
static int get_chunks (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
 
template<typename T >
static void set_chunk_stats (const SQLTypeInfo &col_type, int8_t *data_addr, int8_t &has_null, T &min, T &max)
 
static void set_chunk_metadata (const Catalog_Namespace::Catalog *catalog, FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const size_t nrows_to_keep, UpdelRoll &updel_roll)
 

Enumeration Type Documentation

stores the type of a child class of AbstractTableFragmenter

Enumerator
INSERT_ORDER 

Definition at line 44 of file Fragmenter.h.

44  {
45  INSERT_ORDER = 0 // these values persist in catalog. make explicit
46 };

Function Documentation

template<typename SRC >
std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( size_t  shardCount,
size_t  leafCount,
size_t  rowCount,
SRC *  src 
)

Definition at line 34 of file InsertDataLoader.cpp.

References SHARD_FOR_KEY.

Referenced by computeRowIndicesOfShards(), and Fragmenter_Namespace::InsertDataLoader::insertData().

37  {
38  const auto numShardTables = shardCount * leafCount;
39 
40  std::vector<std::vector<size_t>> rowIndicesOfShards(numShardTables);
41 
42  for (size_t row = 0; row < rowCount; row++) {
43  // expecting unsigned data
44  // thus, no need for double remainder
45  auto shardId = (std::is_unsigned<SRC>::value)
46  ? src[row] % numShardTables
47  : SHARD_FOR_KEY(src[row], numShardTables);
48  rowIndicesOfShards[shardId].push_back(row);
49  }
50 
51  return rowIndicesOfShards;
52 }
int64_t * src
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20

+ Here is the caller graph for this function:

std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( const Catalog_Namespace::Catalog cat,
size_t  leafCount,
InsertData &  insert_data 
)

Definition at line 109 of file InsertDataLoader.cpp.

References CHECK(), Fragmenter_Namespace::InsertData::columnIds, computeRowIndicesOfShards(), Fragmenter_Namespace::InsertData::data, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getShardColumnMetadataForTable(), indexOf(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, sizeOfRawColumn(), and Fragmenter_Namespace::InsertData::tableId.

112  {
113  const auto* td = cat.getMetadataForTable(insert_data.tableId);
114  const auto* shard_cd = cat.getShardColumnMetadataForTable(td);
115  auto shardDataBlockIndex = indexOf(insert_data.columnIds, shard_cd->columnId);
116  DataBlockPtr& shardDataBlock = insert_data.data[shardDataBlockIndex];
117  auto rowCount = insert_data.numRows;
118  auto shardCount = td->nShards;
119 
120  CHECK(!isStringVectorData(shard_cd));
121  CHECK(!isDatumVectorData(shard_cd));
122 
123  switch (sizeOfRawColumn(cat, shard_cd)) {
124  case 1:
126  shardCount,
127  leafCount,
128  rowCount,
129  reinterpret_cast<uint8_t*>(shardDataBlock.numbersPtr));
130  case 2:
132  shardCount,
133  leafCount,
134  rowCount,
135  reinterpret_cast<uint16_t*>(shardDataBlock.numbersPtr));
136  case 4:
138  shardCount,
139  leafCount,
140  rowCount,
141  reinterpret_cast<uint32_t*>(shardDataBlock.numbersPtr));
142  case 8:
144  shardCount,
145  leafCount,
146  rowCount,
147  reinterpret_cast<uint64_t*>(shardDataBlock.numbersPtr));
148  }
149  throw std::runtime_error("Unexpected data block element size");
150 }
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:2881
bool isStringVectorData(const ColumnDescriptor *cd)
CHECK(cgen_state)
size_t indexOf(std::vector< T > &vec, T val)
bool isDatumVectorData(const ColumnDescriptor *cd)
std::vector< std::vector< size_t > > computeRowIndicesOfShards(size_t shardCount, size_t leafCount, size_t rowCount, SRC *src)
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
int8_t * numbersPtr
Definition: sqltypes.h:140

+ Here is the call graph for this function:

template<typename T >
void Fragmenter_Namespace::copyColumnDataOfShard ( const std::vector< size_t > &  rowIndices,
T *  src,
T *  dst 
)

Definition at line 153 of file InsertDataLoader.cpp.

Referenced by copyColumnDataOfShard(), and copyDataOfShard().

153  {
154  for (size_t row = 0; row < rowIndices.size(); row++) {
155  auto srcRowIndex = rowIndices[row];
156  dst[row] = src[srcRowIndex];
157  }
158 }
int64_t * src

+ Here is the caller graph for this function:

BlockWithColumnId Fragmenter_Namespace::copyColumnDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner &  dataOwner,
const std::vector< size_t > &  rowIndices,
const ColumnDescriptor pCol,
size_t  columnIndex,
DataBlockPtr  dataBlock 
)

Definition at line 165 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, DataBlockPtr::arraysPtr, ColumnDescriptor::columnId, copyColumnDataOfShard(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::ShardDataOwner::rawData, sizeOfRawColumn(), Fragmenter_Namespace::ShardDataOwner::stringData, and DataBlockPtr::stringsPtr.

170  {
171  DataBlockPtr ret;
172  if (isStringVectorData(pCol)) {
173  auto& data = dataOwner.stringData[columnIndex];
174  data.resize(rowIndices.size());
175  copyColumnDataOfShard(rowIndices, &(*(dataBlock.stringsPtr))[0], &data[0]);
176  ret.stringsPtr = &data;
177 
178  } else if (isDatumVectorData(pCol)) {
179  auto& data = dataOwner.arrayData[columnIndex];
180  data.resize(rowIndices.size());
181  copyColumnDataOfShard(rowIndices, &(*(dataBlock.arraysPtr))[0], &data[0]);
182  ret.arraysPtr = &data;
183 
184  } else {
185  auto rawArrayElementSize = sizeOfRawColumn(cat, pCol);
186  auto& data = dataOwner.rawData[columnIndex];
187  data.resize(rowIndices.size() * rawArrayElementSize);
188 
189  switch (rawArrayElementSize) {
190  case 1: {
191  copyColumnDataOfShard(rowIndices,
192  reinterpret_cast<uint8_t*>(dataBlock.numbersPtr),
193  reinterpret_cast<uint8_t*>(&data[0]));
194  break;
195  }
196  case 2: {
197  copyColumnDataOfShard(rowIndices,
198  reinterpret_cast<uint16_t*>(dataBlock.numbersPtr),
199  reinterpret_cast<uint16_t*>(&data[0]));
200  break;
201  }
202  case 4: {
203  copyColumnDataOfShard(rowIndices,
204  reinterpret_cast<uint32_t*>(dataBlock.numbersPtr),
205  reinterpret_cast<uint32_t*>(&data[0]));
206  break;
207  }
208  case 8: {
209  copyColumnDataOfShard(rowIndices,
210  reinterpret_cast<uint64_t*>(dataBlock.numbersPtr),
211  reinterpret_cast<uint64_t*>(&data[0]));
212  break;
213  }
214  default:
215  throw std::runtime_error("Unexpected data block element size");
216  }
217 
218  ret.numbersPtr = reinterpret_cast<int8_t*>(&data[0]);
219  }
220 
221  return {pCol->columnId, ret};
222 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
bool isStringVectorData(const ColumnDescriptor *cd)
void copyColumnDataOfShard(const std::vector< size_t > &rowIndices, T *src, T *dst)
bool isDatumVectorData(const ColumnDescriptor *cd)
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:140

+ Here is the call graph for this function:

InsertData Fragmenter_Namespace::copyDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner &  dataOwner,
InsertData &  insert_data,
int  shardTableIndex,
const std::vector< size_t > &  rowIndices 
)

Definition at line 224 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, Fragmenter_Namespace::InsertData::columnIds, copyColumnDataOfShard(), Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getPhysicalTablesDescriptors(), indexOf(), Fragmenter_Namespace::InsertData::numRows, Fragmenter_Namespace::ShardDataOwner::rawData, Fragmenter_Namespace::ShardDataOwner::stringData, and Fragmenter_Namespace::InsertData::tableId.

Referenced by Fragmenter_Namespace::InsertDataLoader::insertData().

228  {
229  const auto* td = cat.getMetadataForTable(insert_data.tableId);
230  const auto* ptd = cat.getPhysicalTablesDescriptors(td)[shardTableIndex];
231 
232  InsertData shardData;
233  shardData.databaseId = insert_data.databaseId;
234  shardData.tableId = ptd->tableId;
235  shardData.numRows = rowIndices.size();
236 
237  std::vector<const ColumnDescriptor*> pCols;
238  std::vector<int> lCols;
239 
240  {
241  auto logicalColumns = cat.getAllColumnMetadataForTable(td->tableId, true, true, true);
242  for (const auto& cd : logicalColumns) {
243  lCols.push_back(cd->columnId);
244  }
245 
246  auto physicalColumns =
247  cat.getAllColumnMetadataForTable(ptd->tableId, true, true, true);
248  for (const auto& cd : physicalColumns) {
249  pCols.push_back(cd);
250  }
251  }
252 
253  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
254  dataOwner.arrayData.emplace_back();
255  dataOwner.rawData.emplace_back();
256  dataOwner.stringData.emplace_back();
257  }
258 
259  auto copycat = [&cat, &dataOwner, &rowIndices, &lCols, &pCols, &insert_data](int col) {
260  const auto lColId = insert_data.columnIds[col];
261  const auto pCol = pCols[indexOf(lCols, lColId)];
262  return copyColumnDataOfShard(
263  cat, dataOwner, rowIndices, pCol, col, insert_data.data[col]);
264  };
265 
266  std::vector<std::future<BlockWithColumnId>> worker_threads;
267  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
268  worker_threads.push_back(std::async(std::launch::async, copycat, col));
269  }
270 
271  for (auto& child : worker_threads) {
272  child.wait();
273  }
274 
275  for (auto& child : worker_threads) {
276  auto shardColumnData = child.get();
277  shardData.columnIds.push_back(shardColumnData.columnId);
278  shardData.data.push_back(shardColumnData.block);
279  }
280 
281  return shardData;
282 }
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logicalTableDesc) const
Definition: Catalog.cpp:2897
size_t indexOf(std::vector< T > &vec, T val)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1581
void copyColumnDataOfShard(const std::vector< size_t > &rowIndices, T *src, T *dst)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static int Fragmenter_Namespace::get_chunks ( const Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const FragmentInfo &  fragment,
const Data_Namespace::MemoryLevel  memory_level,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
static

Definition at line 107 of file UpdelStorage.cpp.

References CHECK(), Catalog_Namespace::DBMetadata::dbId, Fragmenter_Namespace::FragmentInfo::fragmentId, Chunk_NS::Chunk::getChunk(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), TableDescriptor::nColumns, and TableDescriptor::tableId.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

111  {
112  for (int cid = 1, nc = 0; nc < td->nColumns; ++cid) {
113  if (const auto cd = catalog->getMetadataForColumn(td->tableId, cid)) {
114  ++nc;
115  if (!cd->isVirtualCol) {
116  auto chunk_meta_it = fragment.getChunkMetadataMapPhysical().find(cid);
117  CHECK(chunk_meta_it != fragment.getChunkMetadataMapPhysical().end());
118  ChunkKey chunk_key{
119  catalog->getCurrentDB().dbId, td->tableId, cid, fragment.fragmentId};
120  auto chunk = Chunk_NS::Chunk::getChunk(cd,
121  &catalog->getDataMgr(),
122  chunk_key,
123  memory_level,
124  0,
125  chunk_meta_it->second.numBytes,
126  chunk_meta_it->second.numElements);
127  chunks.push_back(chunk);
128  }
129  }
130  }
131  return chunks.size();
132 }
std::vector< int > ChunkKey
Definition: types.h:35
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
size_t Fragmenter_Namespace::indexOf ( std::vector< T > &  vec,
val 
)

Definition at line 55 of file InsertDataLoader.cpp.

References CHECK().

Referenced by org.apache.calcite.sql2rel.SqlToRelConverter::collectInsertTargets(), computeRowIndicesOfShards(), copyDataOfShard(), and com.mapd.utility.db_vendors.PostGis_types::get_wkt().

55  {
56  typename std::vector<T>::iterator it = std::find(vec.begin(), vec.end(), val);
57  CHECK(it != vec.end());
58  return std::distance(vec.begin(), it);
59 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::is_integral ( const SQLTypeInfo t)
inline

Definition at line 57 of file UpdelStorage.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::is_boolean(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_integer(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), and SQLTypeInfoCore< TYPE_FACET_PACK >::is_timeinterval().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata().

57  {
58  return t.is_integer() || t.is_boolean() || t.is_time() || t.is_timeinterval();
59 }
bool is_boolean() const
Definition: sqltypes.h:484
bool is_time() const
Definition: sqltypes.h:483
bool is_integer() const
Definition: sqltypes.h:479
bool is_timeinterval() const
Definition: sqltypes.h:488

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::isDatumVectorData ( const ColumnDescriptor cd)

Definition at line 67 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, and SQLTypeInfoCore< TYPE_FACET_PACK >::is_array().

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

67  {
68  return cd->columnType.is_array();
69 }
bool is_array() const
Definition: sqltypes.h:485
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::isStringVectorData ( const ColumnDescriptor cd)

Definition at line 61 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_geometry(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), and kENCODING_NONE.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

61  {
62  return (cd->columnType.is_geometry()) ||
63  (cd->columnType.is_string() &&
65 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
bool is_geometry() const
Definition: sqltypes.h:489
bool is_string() const
Definition: sqltypes.h:477
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void Fragmenter_Namespace::set_chunk_metadata ( const Catalog_Namespace::Catalog catalog,
FragmentInfo &  fragment,
const std::shared_ptr< Chunk_NS::Chunk > &  chunk,
const size_t  nrows_to_keep,
UpdelRoll updel_roll 
)
static

Definition at line 1092 of file UpdelStorage.cpp.

References UpdelRoll::chunkMetadata, UpdelRoll::dirtyChunks, Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getMetadataForTable(), and UpdelRoll::mutex.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1096  {
1097  auto cd = chunk->get_column_desc();
1098  auto td = catalog->getMetadataForTable(cd->tableId);
1099  auto data_buffer = chunk->get_buffer();
1100  std::lock_guard<std::mutex> lck(updel_roll.mutex);
1101  const auto key = std::make_pair(td, &fragment);
1102  if (0 == updel_roll.chunkMetadata.count(key)) {
1103  updel_roll.chunkMetadata[key] = fragment.getChunkMetadataMapPhysical();
1104  }
1105  auto& chunkMetadata = updel_roll.chunkMetadata[key];
1106  chunkMetadata[cd->columnId].numElements = nrows_to_keep;
1107  chunkMetadata[cd->columnId].numBytes = data_buffer->size();
1108  if (updel_roll.dirtyChunks.count(chunk.get()) == 0) {
1109  updel_roll.dirtyChunks.emplace(chunk.get(), chunk);
1110  }
1111 }
std::map< Chunk_NS::Chunk *, std::shared_ptr< Chunk_NS::Chunk > > dirtyChunks
Definition: UpdelRoll.h:52
std::map< MetaDataKey, std::map< int, ChunkMetadata > > chunkMetadata
Definition: UpdelRoll.h:59
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::mutex mutex
Definition: UpdelRoll.h:49

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
static void Fragmenter_Namespace::set_chunk_stats ( const SQLTypeInfo col_type,
int8_t *  data_addr,
int8_t &  has_null,
T &  min,
T &  max 
)
static

Definition at line 1077 of file UpdelStorage.cpp.

References SQLTypeInfoCore< TYPE_FACET_PACK >::get_notnull(), anonymous_namespace{TypedDataAccessors.h}::is_null(), and anonymous_namespace{TypedDataAccessors.h}::set_minmax().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1081  {
1082  T v;
1083  const auto can_be_null = !col_type.get_notnull();
1084  const auto is_null = get_scalar<T>(data_addr, col_type, v);
1085  if (is_null) {
1086  has_null = has_null || (can_be_null && is_null);
1087  } else {
1088  set_minmax(min, max, v);
1089  }
1090 }
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:333
bool is_null(const T &v, const SQLTypeInfo &t)
void set_minmax(T &min, T &max, T const val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Fragmenter_Namespace::shuffleByIndexes ( const ColumnDescriptor cd,
const std::vector< size_t > &  indexes,
DataBlockPtr data 
)

Definition at line 44 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK(), ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, shuffleByIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

46  {
47  const auto& ti = cd->columnType;
48  switch (ti.get_type()) {
49  case kBOOLEAN:
50  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
51  break;
52  case kTINYINT:
53  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
54  break;
55  case kSMALLINT:
56  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
57  break;
58  case kINT:
59  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
60  break;
61  case kBIGINT:
62  case kNUMERIC:
63  case kDECIMAL:
64  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
65  break;
66  case kFLOAT:
67  shuffleByIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
68  break;
69  case kDOUBLE:
70  shuffleByIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
71  break;
72  case kTEXT:
73  case kVARCHAR:
74  case kCHAR:
75  if (ti.is_varlen()) {
76  shuffleByIndexesImpl(indexes, *data.stringsPtr);
77  } else {
78  switch (ti.get_size()) {
79  case 1:
80  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
81  break;
82  case 2:
83  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
84  break;
85  case 4:
86  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
87  break;
88  default:
89  CHECK(false);
90  }
91  }
92  break;
93  case kDATE:
94  case kTIME:
95  case kTIMESTAMP:
96  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
97  break;
98  case kARRAY:
99  shuffleByIndexesImpl(indexes, *data.arraysPtr);
100  break;
101  case kPOINT:
102  case kLINESTRING:
103  case kPOLYGON:
104  case kMULTIPOLYGON:
105  shuffleByIndexesImpl(indexes, *data.stringsPtr);
106  break;
107  default:
108  CHECK(false);
109  }
110 }
Definition: sqltypes.h:52
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
CHECK(cgen_state)
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:44
Definition: sqltypes.h:48
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:140
void shuffleByIndexesImpl(const std::vector< size_t > &indexes, T *buffer)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
T *  buffer 
)

Definition at line 25 of file SortedOrderFragmenter.cpp.

Referenced by shuffleByIndexes().

25  {
26  std::vector<T> new_buffer;
27  new_buffer.reserve(indexes.size());
28  for (const auto i : indexes) {
29  new_buffer.push_back(buffer[i]);
30  }
31  std::memcpy(buffer, new_buffer.data(), indexes.size() * sizeof(T));
32 }

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
std::vector< T > &  buffer 
)

Definition at line 35 of file SortedOrderFragmenter.cpp.

35  {
36  std::vector<T> new_buffer;
37  new_buffer.reserve(indexes.size());
38  for (const auto i : indexes) {
39  new_buffer.push_back(buffer[i]);
40  }
41  buffer.swap(new_buffer);
42 }
size_t Fragmenter_Namespace::sizeOfRawColumn ( const Catalog_Namespace::Catalog cat,
const ColumnDescriptor cd 
)

Definition at line 71 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_logical_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type_name(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

72  {
73  switch (cd->columnType.get_type()) {
74  case kPOINT:
75  case kLINESTRING:
76  case kPOLYGON:
77  case kMULTIPOLYGON:
78  case kARRAY:
79  throw std::runtime_error("geo and array columns have variable length elements");
80  case kBOOLEAN:
81  case kTINYINT:
82  case kSMALLINT:
83  case kINT:
84  case kBIGINT:
85  case kNUMERIC:
86  case kDECIMAL:
87  case kFLOAT:
88  case kDOUBLE:
89  case kTIMESTAMP:
90  case kTIME:
91  case kINTERVAL_DAY_TIME:
93  case kDATE:
94  return cd->columnType.get_logical_size();
95  case kTEXT:
96  case kVARCHAR:
97  case kCHAR:
99  throw std::runtime_error(
100  "non encoded string columns have variable length elements");
101  }
102  return cd->columnType.get_size();
103  default:
104  throw std::runtime_error("not supported column type: " + cd->columnName + " (" +
105  cd->columnType.get_type_name() + ")");
106  }
107 }
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
std::string get_type_name() const
Definition: sqltypes.h:429
int get_logical_size() const
Definition: sqltypes.h:337
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:44
Definition: sqltypes.h:48
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Fragmenter_Namespace::sortIndexes ( const ColumnDescriptor cd,
std::vector< size_t > &  indexes,
const DataBlockPtr data 
)

Definition at line 136 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK(), ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, sortIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

138  {
139  const auto& ti = cd->columnType;
140  switch (ti.get_type()) {
141  case kBOOLEAN:
142  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
143  break;
144  case kTINYINT:
145  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
146  break;
147  case kSMALLINT:
148  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
149  break;
150  case kINT:
151  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
152  break;
153  case kBIGINT:
154  case kNUMERIC:
155  case kDECIMAL:
156  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
157  break;
158  case kFLOAT:
159  sortIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
160  break;
161  case kDOUBLE:
162  sortIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
163  break;
164  case kTEXT:
165  case kVARCHAR:
166  case kCHAR:
167  if (ti.is_varlen()) {
168  sortIndexesImpl(indexes, *data.stringsPtr);
169  } else {
170  switch (ti.get_size()) {
171  case 1:
172  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
173  break;
174  case 2:
175  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
176  break;
177  case 4:
178  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
179  break;
180  default:
181  CHECK(false);
182  }
183  }
184  break;
185  case kDATE:
186  case kTIME:
187  case kTIMESTAMP:
188  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
189  break;
190  case kARRAY:
191  sortIndexesImpl(indexes, *data.arraysPtr);
192  break;
193  default:
194  CHECK(false) << "invalid type '" << ti.get_type() << "' to sort";
195  }
196 }
Definition: sqltypes.h:52
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:141
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:142
void sortIndexesImpl(std::vector< size_t > &indexes, const T *buffer)
CHECK(cgen_state)
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:44
Definition: sqltypes.h:48
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:140

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const T *  buffer 
)

Definition at line 113 of file SortedOrderFragmenter.cpp.

Referenced by sortIndexes().

113  {
114  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
115  return buffer[a] < buffer[b];
116  });
117 }

+ Here is the caller graph for this function:

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< std::string > &  buffer 
)

Definition at line 119 of file SortedOrderFragmenter.cpp.

120  {
121  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
122  return buffer[a].size() < buffer[b].size() ||
123  (buffer[a].size() == buffer[b].size() && buffer[a] < buffer[b]);
124  });
125 }
void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< ArrayDatum > &  buffer 
)

Definition at line 127 of file SortedOrderFragmenter.cpp.

128  {
129  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
130  return buffer[a].is_null || buffer[a].length < buffer[b].length ||
131  (!buffer[b].is_null && buffer[a].length == buffer[b].length &&
132  memcmp(buffer[a].pointer, buffer[b].pointer, buffer[a].length) < 0);
133  });
134 }
void Fragmenter_Namespace::wait_cleanup_threads ( std::vector< std::future< void >> &  threads)
inline

Definition at line 41 of file UpdelStorage.cpp.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows(), Fragmenter_Namespace::InsertOrderFragmenter::getVacuumOffsets(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumn().

41  {
42  for (auto& t : threads) {
43  t.get();
44  }
45  threads.clear();
46 }

+ Here is the caller graph for this function: