OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Fragmenter_Namespace Namespace Reference

Namespaces

 anonymous_namespace{InsertOrderFragmenter.cpp}
 
 anonymous_namespace{UpdelStorage.cpp}
 

Classes

class  RowDataProvider
 
struct  UpdateValuesStats
 
struct  ChunkUpdateStats
 
class  AbstractFragmenter
 
struct  InsertData
 The data to be inserted using the fragment manager. More...
 
class  FragmentInfo
 Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(rows) currently stored by that fragment. More...
 
class  TableInfo
 
struct  ShardDataOwner
 
struct  BlockWithColumnId
 
struct  InsertDataLoader
 
class  InsertOrderFragmenter
 The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order. Likely the default fragmenter. More...
 
class  SortedOrderFragmenter
 
struct  ChunkToInsertDataConverter
 
struct  ScalarChunkConverter
 
struct  FixedLenArrayChunkConverter
 
struct  ArrayChunkConverter
 
struct  StringChunkConverter
 
struct  DateChunkConverter
 

Enumerations

enum  FragmenterType { INSERT_ORDER = 0 }
 

Functions

template<typename SRC >
std::vector< std::vector
< size_t > > 
computeRowIndicesOfShards (size_t shard_count, size_t leaf_count, size_t row_count, SRC *src, bool duplicated_key_value)
 
template<typename T >
size_t indexOf (std::vector< T > &vec, T val)
 
bool isStringVectorData (const ColumnDescriptor *cd)
 
bool isDatumVectorData (const ColumnDescriptor *cd)
 
size_t sizeOfRawColumn (const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
 
std::vector< std::vector
< size_t > > 
computeRowIndicesOfShards (const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
 
template<typename T >
void copyColumnDataOfShard (const std::vector< size_t > &rowIndices, T *src, T *dst)
 
BlockWithColumnId copyColumnDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, const std::vector< size_t > &rowIndices, const ColumnDescriptor *pCol, size_t columnIndex, DataBlockPtr dataBlock, bool is_default)
 
InsertData copyDataOfShard (const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, InsertData &insert_data, int shardTableIndex, const std::vector< size_t > &rowIndices)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, T *buffer)
 
template<typename T >
void shuffleByIndexesImpl (const std::vector< size_t > &indexes, std::vector< T > &buffer)
 
void shuffleByIndexes (const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
 
template<typename T >
void sortIndexesImpl (std::vector< size_t > &indexes, const T *buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< std::string > &buffer)
 
void sortIndexesImpl (std::vector< size_t > &indexes, const std::vector< ArrayDatum > &buffer)
 
void sortIndexes (const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
 
void wait_cleanup_threads (std::vector< std::future< void >> &threads)
 
bool is_integral (const SQLTypeInfo &t)
 
static int get_chunks (const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
 
template<typename T >
static void set_chunk_stats (const SQLTypeInfo &col_type, int8_t *data_addr, bool &has_null, T &min, T &max)
 
static void set_chunk_metadata (const Catalog_Namespace::Catalog *catalog, FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const size_t nrows_to_keep, UpdelRoll &updel_roll)
 
size_t get_null_padding (bool is_varlen_array, const std::vector< uint64_t > &frag_offsets, const StringOffsetT *index_array, size_t fragment_row_count)
 
std::set< size_t > get_var_len_null_array_indexes (const SQLTypeInfo sql_type_info, const std::vector< uint64_t > &frag_offsets, const StringOffsetT *index_array, size_t fragment_row_count)
 
StringOffsetT get_buffer_offset (bool is_varlen_array, const StringOffsetT *index_array, size_t index)
 

Enumeration Type Documentation

stores the type of a child class of AbstractTableFragmenter

Enumerator
INSERT_ORDER 

Definition at line 44 of file Fragmenter.h.

44  {
45  INSERT_ORDER = 0 // these values persist in catalog. make explicit
46 };

Function Documentation

template<typename SRC >
std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( size_t  shard_count,
size_t  leaf_count,
size_t  row_count,
SRC *  src,
bool  duplicated_key_value 
)

Definition at line 35 of file InsertDataLoader.cpp.

References SHARD_FOR_KEY.

Referenced by computeRowIndicesOfShards(), and Fragmenter_Namespace::InsertDataLoader::insertData().

39  {
40  const auto n_shard_tables = shard_count * leaf_count;
41  std::vector<std::vector<size_t>> row_indices_of_shards(n_shard_tables);
42  if (!duplicated_key_value) {
43  for (size_t row = 0; row < row_count; row++) {
44  // expecting unsigned data
45  // thus, no need for double remainder
46  auto shard_id = (std::is_unsigned<SRC>::value)
47  ? src[row] % n_shard_tables
48  : SHARD_FOR_KEY(src[row], n_shard_tables);
49  row_indices_of_shards[shard_id].push_back(row);
50  }
51  } else {
52  auto shard_id = (std::is_unsigned<SRC>::value)
53  ? src[0] % n_shard_tables
54  : SHARD_FOR_KEY(src[0], n_shard_tables);
55  row_indices_of_shards[shard_id].reserve(row_count);
56  for (size_t row = 0; row < row_count; row++) {
57  row_indices_of_shards[shard_id].push_back(row);
58  }
59  }
60 
61  return row_indices_of_shards;
62 }
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20

+ Here is the caller graph for this function:

std::vector<std::vector<size_t> > Fragmenter_Namespace::computeRowIndicesOfShards ( const Catalog_Namespace::Catalog cat,
size_t  leafCount,
InsertData &  insert_data 
)

Definition at line 119 of file InsertDataLoader.cpp.

References CHECK, Fragmenter_Namespace::InsertData::columnIds, computeRowIndicesOfShards(), Fragmenter_Namespace::InsertData::data, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getShardColumnMetadataForTable(), indexOf(), Fragmenter_Namespace::InsertData::is_default, isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::InsertData::numRows, sizeOfRawColumn(), and Fragmenter_Namespace::InsertData::tableId.

122  {
123  const auto* td = cat.getMetadataForTable(insert_data.tableId);
124  const auto* shard_cd = cat.getShardColumnMetadataForTable(td);
125  auto shardDataBlockIndex = indexOf(insert_data.columnIds, shard_cd->columnId);
126  DataBlockPtr& shardDataBlock = insert_data.data[shardDataBlockIndex];
127  auto rowCount = insert_data.numRows;
128  auto shardCount = td->nShards;
129 
130  CHECK(!isStringVectorData(shard_cd));
131  CHECK(!isDatumVectorData(shard_cd));
132 
133  CHECK(insert_data.is_default.size() == insert_data.columnIds.size());
134  bool is_default = insert_data.is_default[shardDataBlockIndex];
135  switch (sizeOfRawColumn(cat, shard_cd)) {
136  case 1:
138  shardCount,
139  leafCount,
140  rowCount,
141  reinterpret_cast<uint8_t*>(shardDataBlock.numbersPtr),
142  is_default);
143  case 2:
145  shardCount,
146  leafCount,
147  rowCount,
148  reinterpret_cast<uint16_t*>(shardDataBlock.numbersPtr),
149  is_default);
150  case 4:
152  shardCount,
153  leafCount,
154  rowCount,
155  reinterpret_cast<uint32_t*>(shardDataBlock.numbersPtr),
156  is_default);
157  case 8:
159  shardCount,
160  leafCount,
161  rowCount,
162  reinterpret_cast<uint64_t*>(shardDataBlock.numbersPtr),
163  is_default);
164  }
165  throw std::runtime_error("Unexpected data block element size");
166 }
std::vector< std::vector< size_t > > computeRowIndicesOfShards(size_t shard_count, size_t leaf_count, size_t row_count, SRC *src, bool duplicated_key_value)
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3995
bool isStringVectorData(const ColumnDescriptor *cd)
size_t indexOf(std::vector< T > &vec, T val)
bool isDatumVectorData(const ColumnDescriptor *cd)
#define CHECK(condition)
Definition: Logger.h:197
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
int8_t * numbersPtr
Definition: sqltypes.h:220

+ Here is the call graph for this function:

template<typename T >
void Fragmenter_Namespace::copyColumnDataOfShard ( const std::vector< size_t > &  rowIndices,
T *  src,
T *  dst 
)

Definition at line 169 of file InsertDataLoader.cpp.

Referenced by copyColumnDataOfShard(), and copyDataOfShard().

169  {
170  for (size_t row = 0; row < rowIndices.size(); row++) {
171  auto srcRowIndex = rowIndices[row];
172  dst[row] = src[srcRowIndex];
173  }
174 }

+ Here is the caller graph for this function:

BlockWithColumnId Fragmenter_Namespace::copyColumnDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner &  dataOwner,
const std::vector< size_t > &  rowIndices,
const ColumnDescriptor pCol,
size_t  columnIndex,
DataBlockPtr  dataBlock,
bool  is_default 
)

Definition at line 182 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, DataBlockPtr::arraysPtr, ColumnDescriptor::columnId, copyColumnDataOfShard(), isDatumVectorData(), isStringVectorData(), DataBlockPtr::numbersPtr, Fragmenter_Namespace::ShardDataOwner::rawData, sizeOfRawColumn(), Fragmenter_Namespace::ShardDataOwner::stringData, and DataBlockPtr::stringsPtr.

188  {
189  DataBlockPtr ret;
190  std::vector<size_t> single_row_idx({0ul});
191  const std::vector<size_t>& rows = is_default ? single_row_idx : rowIndices;
192  if (isStringVectorData(pCol)) {
193  auto& data = dataOwner.stringData[columnIndex];
194  data.resize(rows.size());
195  copyColumnDataOfShard(rows, &(*(dataBlock.stringsPtr))[0], &data[0]);
196  ret.stringsPtr = &data;
197 
198  } else if (isDatumVectorData(pCol)) {
199  auto& data = dataOwner.arrayData[columnIndex];
200  data.resize(rows.size());
201  copyColumnDataOfShard(rows, &(*(dataBlock.arraysPtr))[0], &data[0]);
202  ret.arraysPtr = &data;
203 
204  } else {
205  auto rawArrayElementSize = sizeOfRawColumn(cat, pCol);
206  auto& data = dataOwner.rawData[columnIndex];
207  data.resize(rows.size() * rawArrayElementSize);
208 
209  switch (rawArrayElementSize) {
210  case 1: {
212  reinterpret_cast<uint8_t*>(dataBlock.numbersPtr),
213  reinterpret_cast<uint8_t*>(&data[0]));
214  break;
215  }
216  case 2: {
218  reinterpret_cast<uint16_t*>(dataBlock.numbersPtr),
219  reinterpret_cast<uint16_t*>(&data[0]));
220  break;
221  }
222  case 4: {
224  reinterpret_cast<uint32_t*>(dataBlock.numbersPtr),
225  reinterpret_cast<uint32_t*>(&data[0]));
226  break;
227  }
228  case 8: {
230  reinterpret_cast<uint64_t*>(dataBlock.numbersPtr),
231  reinterpret_cast<uint64_t*>(&data[0]));
232  break;
233  }
234  default:
235  throw std::runtime_error("Unexpected data block element size");
236  }
237 
238  ret.numbersPtr = reinterpret_cast<int8_t*>(&data[0]);
239  }
240 
241  return {pCol->columnId, ret, is_default};
242 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
bool isStringVectorData(const ColumnDescriptor *cd)
void copyColumnDataOfShard(const std::vector< size_t > &rowIndices, T *src, T *dst)
bool isDatumVectorData(const ColumnDescriptor *cd)
size_t sizeOfRawColumn(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd)
int8_t * numbersPtr
Definition: sqltypes.h:220

+ Here is the call graph for this function:

InsertData Fragmenter_Namespace::copyDataOfShard ( const Catalog_Namespace::Catalog cat,
ShardDataOwner &  dataOwner,
InsertData &  insert_data,
int  shardTableIndex,
const std::vector< size_t > &  rowIndices 
)

Definition at line 244 of file InsertDataLoader.cpp.

References Fragmenter_Namespace::ShardDataOwner::arrayData, cat(), Fragmenter_Namespace::InsertData::columnIds, copyColumnDataOfShard(), Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::Catalog::getPhysicalTablesDescriptors(), indexOf(), Fragmenter_Namespace::InsertData::is_default, Fragmenter_Namespace::InsertData::numRows, Fragmenter_Namespace::ShardDataOwner::rawData, Fragmenter_Namespace::ShardDataOwner::stringData, and Fragmenter_Namespace::InsertData::tableId.

Referenced by Fragmenter_Namespace::InsertDataLoader::insertData().

248  {
249  const auto* td = cat.getMetadataForTable(insert_data.tableId);
250  const auto* ptd = cat.getPhysicalTablesDescriptors(td)[shardTableIndex];
251 
252  InsertData shardData;
253  shardData.databaseId = insert_data.databaseId;
254  shardData.tableId = ptd->tableId;
255  shardData.numRows = rowIndices.size();
256 
257  std::vector<const ColumnDescriptor*> pCols;
258  std::vector<int> lCols;
259 
260  {
261  auto logicalColumns = cat.getAllColumnMetadataForTable(td->tableId, true, true, true);
262  for (const auto& cd : logicalColumns) {
263  lCols.push_back(cd->columnId);
264  }
265 
266  auto physicalColumns =
267  cat.getAllColumnMetadataForTable(ptd->tableId, true, true, true);
268  for (const auto& cd : physicalColumns) {
269  pCols.push_back(cd);
270  }
271  }
272 
273  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
274  dataOwner.arrayData.emplace_back();
275  dataOwner.rawData.emplace_back();
276  dataOwner.stringData.emplace_back();
277  }
278 
279  auto copycat = [&cat, &dataOwner, &rowIndices, &lCols, &pCols, &insert_data](int col) {
280  const auto lColId = insert_data.columnIds[col];
281  const auto pCol = pCols[indexOf(lCols, lColId)];
282  return copyColumnDataOfShard(cat,
283  dataOwner,
284  rowIndices,
285  pCol,
286  col,
287  insert_data.data[col],
288  insert_data.is_default[col]);
289  };
290 
291  std::vector<std::future<BlockWithColumnId>> worker_threads;
292  for (size_t col = 0; col < insert_data.columnIds.size(); col++) {
293  worker_threads.push_back(std::async(std::launch::async, copycat, col));
294  }
295 
296  for (auto& child : worker_threads) {
297  child.wait();
298  }
299 
300  for (auto& child : worker_threads) {
301  auto shardColumnData = child.get();
302  shardData.columnIds.push_back(shardColumnData.columnId);
303  shardData.data.push_back(shardColumnData.block);
304  shardData.is_default.push_back(shardColumnData.is_default);
305  }
306 
307  return shardData;
308 }
std::string cat(Ts &&...args)
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4013
size_t indexOf(std::vector< T > &vec, T val)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1721
void copyColumnDataOfShard(const std::vector< size_t > &rowIndices, T *src, T *dst)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringOffsetT Fragmenter_Namespace::get_buffer_offset ( bool  is_varlen_array,
const StringOffsetT index_array,
size_t  index 
)

Definition at line 1210 of file UpdelStorage.cpp.

References CHECK.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::vacuum_varlen_rows().

1212  {
1213  auto offset = index_array[index];
1214  if (offset < 0) {
1215  // Variable length arrays encode null arrays as negative offsets
1216  CHECK(is_varlen_array);
1217  offset = -offset;
1218  }
1219  return offset;
1220 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

static int Fragmenter_Namespace::get_chunks ( const Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const FragmentInfo &  fragment,
const Data_Namespace::MemoryLevel  memory_level,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks 
)
static

Definition at line 74 of file UpdelStorage.cpp.

References CHECK, Catalog_Namespace::DBMetadata::dbId, Fragmenter_Namespace::FragmentInfo::fragmentId, Chunk_NS::Chunk::getChunk(), Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), TableDescriptor::nColumns, and TableDescriptor::tableId.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumns().

78  {
79  for (int cid = 1, nc = 0; nc < td->nColumns; ++cid) {
80  if (const auto cd = catalog->getMetadataForColumn(td->tableId, cid)) {
81  ++nc;
82  if (!cd->isVirtualCol) {
83  auto chunk_meta_it = fragment.getChunkMetadataMapPhysical().find(cid);
84  CHECK(chunk_meta_it != fragment.getChunkMetadataMapPhysical().end());
85  ChunkKey chunk_key{
86  catalog->getCurrentDB().dbId, td->tableId, cid, fragment.fragmentId};
87  auto chunk = Chunk_NS::Chunk::getChunk(cd,
88  &catalog->getDataMgr(),
89  chunk_key,
90  memory_level,
91  0,
92  chunk_meta_it->second->numBytes,
93  chunk_meta_it->second->numElements);
94  chunks.push_back(chunk);
95  }
96  }
97  }
98  return chunks.size();
99 }
std::vector< int > ChunkKey
Definition: types.h:37
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:222
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:221
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t Fragmenter_Namespace::get_null_padding ( bool  is_varlen_array,
const std::vector< uint64_t > &  frag_offsets,
const StringOffsetT index_array,
size_t  fragment_row_count 
)

Definition at line 1147 of file UpdelStorage.cpp.

References CHECK_GT, CHECK_LT, ArrayNoneEncoder::DEFAULT_NULL_PADDING_SIZE, and i.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::vacuum_varlen_rows().

1150  {
1151  if (is_varlen_array) {
1152  size_t first_non_deleted_row_index{0};
1153  for (auto deleted_offset : frag_offsets) {
1154  if (first_non_deleted_row_index < deleted_offset) {
1155  break;
1156  } else {
1157  first_non_deleted_row_index++;
1158  }
1159  }
1160  CHECK_LT(first_non_deleted_row_index, fragment_row_count);
1161  if (first_non_deleted_row_index == 0) {
1162  // If the first row in the fragment is not deleted, then the first offset in the
1163  // index buffer/array already contains expected padding.
1164  return index_array[0];
1165  } else {
1166  // If the first non-deleted element is a null array (indentified by a negative
1167  // offset), get a padding value for the chunk buffer.
1168  if (index_array[first_non_deleted_row_index + 1] < 0) {
1169  size_t first_non_zero_offset{0};
1170  for (size_t i = 0; i <= first_non_deleted_row_index; i++) {
1171  if (index_array[i] != 0) {
1172  first_non_zero_offset = index_array[i];
1173  break;
1174  }
1175  }
1176  CHECK_GT(first_non_zero_offset, static_cast<size_t>(0));
1178  first_non_zero_offset);
1179  } else {
1180  return 0;
1181  }
1182  }
1183  } else {
1184  return 0;
1185  }
1186 }
#define CHECK_GT(x, y)
Definition: Logger.h:209
#define CHECK_LT(x, y)
Definition: Logger.h:207
static constexpr size_t DEFAULT_NULL_PADDING_SIZE

+ Here is the caller graph for this function:

std::set<size_t> Fragmenter_Namespace::get_var_len_null_array_indexes ( const SQLTypeInfo  sql_type_info,
const std::vector< uint64_t > &  frag_offsets,
const StringOffsetT index_array,
size_t  fragment_row_count 
)

Definition at line 1189 of file UpdelStorage.cpp.

References SQLTypeInfo::get_notnull(), i, and SQLTypeInfo::is_varlen_array().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::vacuum_varlen_rows().

1192  {
1193  std::set<size_t> null_array_indexes;
1194  if (sql_type_info.is_varlen_array() && !sql_type_info.get_notnull()) {
1195  size_t frag_offset_index{0};
1196  size_t vacuum_offset{0};
1197  for (size_t i = 0; i < fragment_row_count; i++) {
1198  if (frag_offset_index < frag_offsets.size() &&
1199  i == frag_offsets[frag_offset_index]) {
1200  frag_offset_index++;
1201  vacuum_offset++;
1202  } else if (index_array[i + 1] < 0) {
1203  null_array_indexes.emplace(i - vacuum_offset);
1204  }
1205  }
1206  }
1207  return null_array_indexes;
1208 }
bool is_varlen_array() const
Definition: sqltypes.h:497
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
size_t Fragmenter_Namespace::indexOf ( std::vector< T > &  vec,
val 
)

Definition at line 65 of file InsertDataLoader.cpp.

References CHECK.

Referenced by org.apache.calcite.sql2rel.SqlToRelConverter::collectInsertTargets(), computeRowIndicesOfShards(), copyDataOfShard(), and com.mapd.utility.db_vendors.PostGis_types::get_wkt().

65  {
66  typename std::vector<T>::iterator it = std::find(vec.begin(), vec.end(), val);
67  CHECK(it != vec.end());
68  return std::distance(vec.begin(), it);
69 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::is_integral ( const SQLTypeInfo t)
inline

Definition at line 48 of file UpdelStorage.cpp.

References SQLTypeInfo::is_boolean(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_time(), and SQLTypeInfo::is_timeinterval().

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata().

48  {
49  return t.is_integer() || t.is_boolean() || t.is_time() || t.is_timeinterval();
50 }
bool is_time() const
Definition: sqltypes.h:494
bool is_integer() const
Definition: sqltypes.h:490
bool is_timeinterval() const
Definition: sqltypes.h:499
bool is_boolean() const
Definition: sqltypes.h:495

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::isDatumVectorData ( const ColumnDescriptor cd)

Definition at line 77 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, and SQLTypeInfo::is_array().

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

77  {
78  return cd->columnType.is_array();
79 }
SQLTypeInfo columnType
bool is_array() const
Definition: sqltypes.h:496

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Fragmenter_Namespace::isStringVectorData ( const ColumnDescriptor cd)

Definition at line 71 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::is_geometry(), SQLTypeInfo::is_string(), and kENCODING_NONE.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

71  {
72  return (cd->columnType.is_geometry()) ||
73  (cd->columnType.is_string() &&
75 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
bool is_geometry() const
Definition: sqltypes.h:500
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:488

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void Fragmenter_Namespace::set_chunk_metadata ( const Catalog_Namespace::Catalog catalog,
FragmentInfo &  fragment,
const std::shared_ptr< Chunk_NS::Chunk > &  chunk,
const size_t  nrows_to_keep,
UpdelRoll updel_roll 
)
static

Definition at line 1084 of file UpdelStorage.cpp.

References UpdelRoll::chunkMetadata, UpdelRoll::dirtyChunkeys, UpdelRoll::dirtyChunks, Fragmenter_Namespace::FragmentInfo::fragmentId, Fragmenter_Namespace::FragmentInfo::getChunkMetadataMapPhysical(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getMetadataForTable(), and UpdelRoll::mutex.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1088  {
1089  auto cd = chunk->getColumnDesc();
1090  auto td = catalog->getMetadataForTable(cd->tableId);
1091  auto data_buffer = chunk->getBuffer();
1092  std::lock_guard<std::mutex> lck(updel_roll.mutex);
1093  const auto key = std::make_pair(td, &fragment);
1094  if (0 == updel_roll.chunkMetadata.count(key)) {
1095  updel_roll.chunkMetadata[key] = fragment.getChunkMetadataMapPhysical();
1096  }
1097  auto& chunkMetadata = updel_roll.chunkMetadata[key];
1098  chunkMetadata[cd->columnId]->numElements = nrows_to_keep;
1099  chunkMetadata[cd->columnId]->numBytes = data_buffer->size();
1100  if (updel_roll.dirtyChunks.count(chunk.get()) == 0) {
1101  updel_roll.dirtyChunks.emplace(chunk.get(), chunk);
1102  ChunkKey chunk_key{
1103  catalog->getDatabaseId(), cd->tableId, cd->columnId, fragment.fragmentId};
1104  updel_roll.dirtyChunkeys.emplace(chunk_key);
1105  }
1106 }
std::vector< int > ChunkKey
Definition: types.h:37
std::map< Chunk_NS::Chunk *, std::shared_ptr< Chunk_NS::Chunk > > dirtyChunks
Definition: UpdelRoll.h:52
std::set< ChunkKey > dirtyChunkeys
Definition: UpdelRoll.h:53
int getDatabaseId() const
Definition: Catalog.h:276
std::map< MetaDataKey, ChunkMetadataMap > chunkMetadata
Definition: UpdelRoll.h:59
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::mutex mutex
Definition: UpdelRoll.h:49

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
static void Fragmenter_Namespace::set_chunk_stats ( const SQLTypeInfo col_type,
int8_t *  data_addr,
bool &  has_null,
T &  min,
T &  max 
)
static

Definition at line 1069 of file UpdelStorage.cpp.

References SQLTypeInfo::get_notnull(), is_null(), anonymous_namespace{TypedDataAccessors.h}::set_minmax(), and omnisci.dtypes::T.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows().

1073  {
1074  T v;
1075  const auto can_be_null = !col_type.get_notnull();
1076  const auto is_null = get_scalar<T>(data_addr, col_type, v);
1077  if (is_null) {
1078  has_null = has_null || (can_be_null && is_null);
1079  } else {
1080  set_minmax(min, max, v);
1081  }
1082 }
CONSTEXPR DEVICE bool is_null(const T &value)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
void set_minmax(T &min, T &max, T const val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Fragmenter_Namespace::shuffleByIndexes ( const ColumnDescriptor cd,
const std::vector< size_t > &  indexes,
DataBlockPtr data 
)

Definition at line 44 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, shuffleByIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

46  {
47  const auto& ti = cd->columnType;
48  switch (ti.get_type()) {
49  case kBOOLEAN:
50  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
51  break;
52  case kTINYINT:
53  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
54  break;
55  case kSMALLINT:
56  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
57  break;
58  case kINT:
59  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
60  break;
61  case kBIGINT:
62  case kNUMERIC:
63  case kDECIMAL:
64  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
65  break;
66  case kFLOAT:
67  shuffleByIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
68  break;
69  case kDOUBLE:
70  shuffleByIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
71  break;
72  case kTEXT:
73  case kVARCHAR:
74  case kCHAR:
75  if (ti.is_varlen()) {
76  shuffleByIndexesImpl(indexes, *data.stringsPtr);
77  } else {
78  switch (ti.get_size()) {
79  case 1:
80  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
81  break;
82  case 2:
83  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
84  break;
85  case 4:
86  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
87  break;
88  default:
89  CHECK(false);
90  }
91  }
92  break;
93  case kDATE:
94  case kTIME:
95  case kTIMESTAMP:
96  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
97  break;
98  case kARRAY:
99  shuffleByIndexesImpl(indexes, *data.arraysPtr);
100  break;
101  case kPOINT:
102  case kLINESTRING:
103  case kPOLYGON:
104  case kMULTIPOLYGON:
105  shuffleByIndexesImpl(indexes, *data.stringsPtr);
106  break;
107  default:
108  CHECK(false);
109  }
110 }
Definition: sqltypes.h:48
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:40
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:44
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:220
void shuffleByIndexesImpl(const std::vector< size_t > &indexes, T *buffer)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
T *  buffer 
)

Definition at line 25 of file SortedOrderFragmenter.cpp.

References i, and omnisci.dtypes::T.

Referenced by shuffleByIndexes().

25  {
26  std::vector<T> new_buffer;
27  new_buffer.reserve(indexes.size());
28  for (const auto i : indexes) {
29  new_buffer.push_back(buffer[i]);
30  }
31  std::memcpy(buffer, new_buffer.data(), indexes.size() * sizeof(T));
32 }

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::shuffleByIndexesImpl ( const std::vector< size_t > &  indexes,
std::vector< T > &  buffer 
)

Definition at line 35 of file SortedOrderFragmenter.cpp.

References i.

35  {
36  std::vector<T> new_buffer;
37  new_buffer.reserve(indexes.size());
38  for (const auto i : indexes) {
39  new_buffer.push_back(buffer[i]);
40  }
41  buffer.swap(new_buffer);
42 }
size_t Fragmenter_Namespace::sizeOfRawColumn ( const Catalog_Namespace::Catalog cat,
const ColumnDescriptor cd 
)

Definition at line 81 of file InsertDataLoader.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_logical_size(), SQLTypeInfo::get_size(), SQLTypeInfo::get_type(), SQLTypeInfo::get_type_name(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

Referenced by computeRowIndicesOfShards(), and copyColumnDataOfShard().

82  {
83  switch (cd->columnType.get_type()) {
84  case kPOINT:
85  case kLINESTRING:
86  case kPOLYGON:
87  case kMULTIPOLYGON:
88  case kARRAY:
89  throw std::runtime_error("geo and array columns have variable length elements");
90  case kBOOLEAN:
91  case kTINYINT:
92  case kSMALLINT:
93  case kINT:
94  case kBIGINT:
95  case kNUMERIC:
96  case kDECIMAL:
97  case kFLOAT:
98  case kDOUBLE:
99  case kTIMESTAMP:
100  case kTIME:
101  case kINTERVAL_DAY_TIME:
103  case kDATE:
104  return cd->columnType.get_logical_size();
105  case kTEXT:
106  case kVARCHAR:
107  case kCHAR:
109  throw std::runtime_error(
110  "non encoded string columns have variable length elements");
111  }
112  return cd->columnType.get_size();
113  default:
114  throw std::runtime_error("not supported column type: " + cd->columnName + " (" +
115  cd->columnType.get_type_name() + ")");
116  }
117 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
Definition: sqltypes.h:48
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
int get_logical_size() const
Definition: sqltypes.h:325
Definition: sqltypes.h:51
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::string get_type_name() const
Definition: sqltypes.h:417
Definition: sqltypes.h:40
Definition: sqltypes.h:44
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Fragmenter_Namespace::sortIndexes ( const ColumnDescriptor cd,
std::vector< size_t > &  indexes,
const DataBlockPtr data 
)

Definition at line 137 of file SortedOrderFragmenter.cpp.

References DataBlockPtr::arraysPtr, CHECK, ColumnDescriptor::columnType, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, DataBlockPtr::numbersPtr, sortIndexesImpl(), and DataBlockPtr::stringsPtr.

Referenced by Fragmenter_Namespace::SortedOrderFragmenter::sortData().

139  {
140  const auto& ti = cd->columnType;
141  switch (ti.get_type()) {
142  case kBOOLEAN:
143  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
144  break;
145  case kTINYINT:
146  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
147  break;
148  case kSMALLINT:
149  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
150  break;
151  case kINT:
152  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
153  break;
154  case kBIGINT:
155  case kNUMERIC:
156  case kDECIMAL:
157  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
158  break;
159  case kFLOAT:
160  sortIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
161  break;
162  case kDOUBLE:
163  sortIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
164  break;
165  case kTEXT:
166  case kVARCHAR:
167  case kCHAR:
168  if (ti.is_varlen()) {
169  sortIndexesImpl(indexes, *data.stringsPtr);
170  } else {
171  switch (ti.get_size()) {
172  case 1:
173  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
174  break;
175  case 2:
176  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
177  break;
178  case 4:
179  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
180  break;
181  default:
182  CHECK(false);
183  }
184  }
185  break;
186  case kDATE:
187  case kTIME:
188  case kTIMESTAMP:
189  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
190  break;
191  case kARRAY:
192  sortIndexesImpl(indexes, *data.arraysPtr);
193  break;
194  default:
195  CHECK(false) << "invalid type '" << ti.get_type() << "' to sort";
196  }
197 }
Definition: sqltypes.h:48
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
void sortIndexesImpl(std::vector< size_t > &indexes, const T *buffer)
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:40
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:44
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:220

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const T *  buffer 
)

Definition at line 113 of file SortedOrderFragmenter.cpp.

References CHECK, and gpu_enabled::sort().

Referenced by sortIndexes().

113  {
114  CHECK(buffer);
115  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
116  return buffer[a] < buffer[b];
117  });
118 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< std::string > &  buffer 
)

Definition at line 120 of file SortedOrderFragmenter.cpp.

References gpu_enabled::sort().

121  {
122  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
123  return buffer[a].size() < buffer[b].size() ||
124  (buffer[a].size() == buffer[b].size() && buffer[a] < buffer[b]);
125  });
126 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105

+ Here is the call graph for this function:

void Fragmenter_Namespace::sortIndexesImpl ( std::vector< size_t > &  indexes,
const std::vector< ArrayDatum > &  buffer 
)

Definition at line 128 of file SortedOrderFragmenter.cpp.

References gpu_enabled::sort().

129  {
130  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
131  return buffer[a].is_null || buffer[a].length < buffer[b].length ||
132  (!buffer[b].is_null && buffer[a].length == buffer[b].length &&
133  memcmp(buffer[a].pointer, buffer[b].pointer, buffer[a].length) < 0);
134  });
135 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105

+ Here is the call graph for this function:

void Fragmenter_Namespace::wait_cleanup_threads ( std::vector< std::future< void >> &  threads)
inline

Definition at line 41 of file UpdelStorage.cpp.

References t.

Referenced by Fragmenter_Namespace::InsertOrderFragmenter::compactRows(), Fragmenter_Namespace::InsertOrderFragmenter::getVacuumOffsets(), and Fragmenter_Namespace::InsertOrderFragmenter::updateColumn().

41  {
42  for (auto& t : threads) {
43  t.get();
44  }
45  threads.clear();
46 }
char * t

+ Here is the caller graph for this function: