_insert_data_loader_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #include <algorithm>

 #include <numeric>

 #include <vector>


 #include "../Shared/shard_key.h"

 #include "Geospatial/Types.h"

 #include "InsertDataLoader.h"

 #include "TargetValueConvertersFactories.h"


 namespace Fragmenter_Namespace {


 struct ShardDataOwner {

   std::vector<std::vector<uint8_t>> rawData;

   std::vector<std::vector<std::string>> stringData;

   std::vector<std::vector<ArrayDatum>> arrayData;

 };


 template <typename SRC>

 std::vector<std::vector<size_t>> compute_row_indices_of_shards(

     size_t shard_count,

     size_t leaf_count,

     size_t row_count,

     SRC* src,

     bool duplicated_key_value) {

   const auto n_shard_tables = shard_count * leaf_count;

   std::vector<std::vector<size_t>> row_indices_of_shards(n_shard_tables);

   if (!duplicated_key_value) {

     for (size_t row = 0; row < row_count; row++) {

       // expecting unsigned data

       // thus, no need for double remainder

       auto shard_id = (std::is_unsigned<SRC>::value)

                           ? src[row] % n_shard_tables

                           : SHARD_FOR_KEY(src[row], n_shard_tables);

       row_indices_of_shards[shard_id].push_back(row);

     }

   } else {

     auto shard_id = (std::is_unsigned<SRC>::value)

                         ? src[0] % n_shard_tables

                         : SHARD_FOR_KEY(src[0], n_shard_tables);

     row_indices_of_shards[shard_id].reserve(row_count);

     for (size_t row = 0; row < row_count; row++) {

       row_indices_of_shards[shard_id].push_back(row);

     }

   }


   return row_indices_of_shards;

 }


 template <typename T>

 size_t indexOf(std::vector<T>& vec, T val) {

   typename std::vector<T>::iterator it = std::find(vec.begin(), vec.end(), val);

   CHECK(it != vec.end());

   return std::distance(vec.begin(), it);

 }


 bool isStringVectorData(const ColumnDescriptor* cd) {

   return (cd->columnType.is_geometry()) ||

          (cd->columnType.is_string() &&

           cd->columnType.get_compression() == kENCODING_NONE);

 }


 bool isDatumVectorData(const ColumnDescriptor* cd) {

   return cd->columnType.is_array();

 }


 size_t size_of_raw_column(const Catalog_Namespace::Catalog& cat,

                           const ColumnDescriptor* cd,

                           const bool get_logical_size = true) {

   switch (cd->columnType.get_type()) {

     case kPOINT:

     case kMULTIPOINT:

     case kLINESTRING:

     case kMULTILINESTRING:

     case kPOLYGON:

     case kMULTIPOLYGON:

     case kARRAY:

       throw std::runtime_error("geo and array columns have variable length elements");

     case kBOOLEAN:

     case kTINYINT:

     case kSMALLINT:

     case kINT:

     case kBIGINT:

     case kNUMERIC:

     case kDECIMAL:

     case kFLOAT:

     case kDOUBLE:

     case kTIMESTAMP:

     case kTIME:

     case kINTERVAL_DAY_TIME:

     case kINTERVAL_YEAR_MONTH:

     case kDATE:

       return get_logical_size ? cd->columnType.get_logical_size()

                               : cd->columnType.get_size();

     case kTEXT:

     case kVARCHAR:

     case kCHAR:

       if (cd->columnType.get_compression() == kENCODING_NONE) {

         throw std::runtime_error(

             "non encoded string columns have variable length elements");

       }

       return cd->columnType.get_size();

     default:

       throw std::runtime_error("not supported column type: " + cd->columnName + " (" +

                                cd->columnType.get_type_name() + ")");

   }

 }


 std::vector<std::vector<size_t>> compute_row_indices_of_shards(

     const Catalog_Namespace::Catalog& cat,

     size_t leaf_count,

     const InsertChunks& insert_chunks) {

   const auto* td = cat.getMetadataForTable(insert_chunks.table_id);

   const auto* shard_cd = cat.getShardColumnMetadataForTable(td);

   auto find_it = insert_chunks.chunks.find(shard_cd->columnId);

   CHECK(find_it != insert_chunks.chunks.end());

   Chunk_NS::Chunk& shard_chunk = *find_it->second;

   auto row_count = shard_chunk.getBuffer()->getEncoder()->getNumElems();

   auto shard_count = td->nShards;


   CHECK(!isStringVectorData(shard_cd));

   CHECK(!isDatumVectorData(shard_cd));


   auto memory_ptr = shard_chunk.getBuffer()->getMemoryPtr();

   CHECK(memory_ptr);

   switch (size_of_raw_column(cat, shard_cd, false)) {

     case 1:

       return compute_row_indices_of_shards(shard_count,

                                            leaf_count,

                                            row_count,

                                            reinterpret_cast<uint8_t*>(memory_ptr),

                                            false);

     case 2:

       return compute_row_indices_of_shards(shard_count,

                                            leaf_count,

                                            row_count,

                                            reinterpret_cast<uint16_t*>(memory_ptr),

                                            false);

     case 4:

       return compute_row_indices_of_shards(shard_count,

                                            leaf_count,

                                            row_count,

                                            reinterpret_cast<uint32_t*>(memory_ptr),

                                            false);

     case 8:

       return compute_row_indices_of_shards(shard_count,

                                            leaf_count,

                                            row_count,

                                            reinterpret_cast<uint64_t*>(memory_ptr),

                                            false);

     default:

       UNREACHABLE() << "unexpected data element size of column";

   }

   return {};

 }


 std::vector<std::vector<size_t>> computeRowIndicesOfShards(

     const Catalog_Namespace::Catalog& cat,

     size_t leafCount,

     InsertData& insert_data) {

   const auto* td = cat.getMetadataForTable(insert_data.tableId);

   const auto* shard_cd = cat.getShardColumnMetadataForTable(td);

   auto shardDataBlockIndex = indexOf(insert_data.columnIds, shard_cd->columnId);

   DataBlockPtr& shardDataBlock = insert_data.data[shardDataBlockIndex];

   auto rowCount = insert_data.numRows;

   auto shardCount = td->nShards;


   CHECK(!isStringVectorData(shard_cd));

   CHECK(!isDatumVectorData(shard_cd));


   CHECK(insert_data.is_default.size() == insert_data.columnIds.size());

   bool is_default = insert_data.is_default[shardDataBlockIndex];

   switch (size_of_raw_column(cat, shard_cd)) {

     case 1:

       return compute_row_indices_of_shards(

           shardCount,

           leafCount,

           rowCount,

           reinterpret_cast<uint8_t*>(shardDataBlock.numbersPtr),

           is_default);

     case 2:

       return compute_row_indices_of_shards(

           shardCount,

           leafCount,

           rowCount,

           reinterpret_cast<uint16_t*>(shardDataBlock.numbersPtr),

           is_default);

     case 4:

       return compute_row_indices_of_shards(

           shardCount,

           leafCount,

           rowCount,

           reinterpret_cast<uint32_t*>(shardDataBlock.numbersPtr),

           is_default);

     case 8:

       return compute_row_indices_of_shards(

           shardCount,

           leafCount,

           rowCount,

           reinterpret_cast<uint64_t*>(shardDataBlock.numbersPtr),

           is_default);

   }

   throw std::runtime_error("Unexpected data block element size");

 }


 template <typename T>

 void copyColumnDataOfShard(const std::vector<size_t>& rowIndices, T* src, T* dst) {

   for (size_t row = 0; row < rowIndices.size(); row++) {

     auto srcRowIndex = rowIndices[row];

     dst[row] = src[srcRowIndex];

   }

 }


 struct BlockWithColumnId {

   int columnId;

   DataBlockPtr block;

   bool is_default;

 };


 BlockWithColumnId copyColumnDataOfShard(const Catalog_Namespace::Catalog& cat,

                                         ShardDataOwner& dataOwner,

                                         const std::vector<size_t>& rowIndices,

                                         const ColumnDescriptor* pCol,

                                         size_t columnIndex,

                                         DataBlockPtr dataBlock,

                                         bool is_default) {

   DataBlockPtr ret;

   std::vector<size_t> single_row_idx({0ul});

   const std::vector<size_t>& rows = is_default ? single_row_idx : rowIndices;

   if (isStringVectorData(pCol)) {

     auto& data = dataOwner.stringData[columnIndex];

     data.resize(rows.size());

     copyColumnDataOfShard(rows, &(*(dataBlock.stringsPtr))[0], &data[0]);

     ret.stringsPtr = &data;


   } else if (isDatumVectorData(pCol)) {

     auto& data = dataOwner.arrayData[columnIndex];

     data.resize(rows.size());

     copyColumnDataOfShard(rows, &(*(dataBlock.arraysPtr))[0], &data[0]);

     ret.arraysPtr = &data;


   } else {

     auto rawArrayElementSize = size_of_raw_column(cat, pCol);

     auto& data = dataOwner.rawData[columnIndex];

     data.resize(rows.size() * rawArrayElementSize);


     switch (rawArrayElementSize) {

       case 1: {

         copyColumnDataOfShard(rows,

                               reinterpret_cast<uint8_t*>(dataBlock.numbersPtr),

                               reinterpret_cast<uint8_t*>(&data[0]));

         break;

       }

       case 2: {

         copyColumnDataOfShard(rows,

                               reinterpret_cast<uint16_t*>(dataBlock.numbersPtr),

                               reinterpret_cast<uint16_t*>(&data[0]));

         break;

       }

       case 4: {

         copyColumnDataOfShard(rows,

                               reinterpret_cast<uint32_t*>(dataBlock.numbersPtr),

                               reinterpret_cast<uint32_t*>(&data[0]));

         break;

       }

       case 8: {

         copyColumnDataOfShard(rows,

                               reinterpret_cast<uint64_t*>(dataBlock.numbersPtr),

                               reinterpret_cast<uint64_t*>(&data[0]));

         break;

       }

       default:

         throw std::runtime_error("Unexpected data block element size");

     }


     ret.numbersPtr = reinterpret_cast<int8_t*>(&data[0]);

   }


   return {pCol->columnId, ret, is_default};

 }


 std::pair<std::list<std::unique_ptr<foreign_storage::ForeignStorageBuffer>>, InsertChunks>

 copy_data_of_shard(const Catalog_Namespace::Catalog& cat,

                    const InsertChunks& insert_chunks,

                    int shardTableIndex,

                    const std::vector<size_t>& rowIndices) {

   const auto* table = cat.getMetadataForTable(insert_chunks.table_id);

   const auto* physical_table = cat.getPhysicalTablesDescriptors(table)[shardTableIndex];


   InsertChunks insert_chunks_for_shard{

       physical_table->tableId, insert_chunks.db_id, {}, {}};


   std::list<std::unique_ptr<foreign_storage::ForeignStorageBuffer>> buffers;


   for (const auto& [column_id, chunk] : insert_chunks.chunks) {

     auto column = chunk->getColumnDesc();

     insert_chunks_for_shard.chunks[column_id] = std::make_shared<Chunk_NS::Chunk>(column);

     auto& chunk_for_shard = *insert_chunks_for_shard.chunks[column_id];

     chunk_for_shard.setBuffer(

         buffers.emplace_back(std::make_unique<foreign_storage::ForeignStorageBuffer>())

             .get());

     if (column->columnType.is_varlen_indeed()) {  // requires an index buffer

       chunk_for_shard.setIndexBuffer(

           buffers.emplace_back(std::make_unique<foreign_storage::ForeignStorageBuffer>())

               .get());

     }

     chunk_for_shard.initEncoder();

     chunk_for_shard.appendEncodedDataAtIndices(*chunk, rowIndices);

     CHECK_EQ(chunk_for_shard.getBuffer()->getEncoder()->getNumElems(), rowIndices.size());

   }


   // mark which row indices are valid for import

   auto row_count = rowIndices.size();

   insert_chunks_for_shard.valid_row_indices.reserve(row_count);

   for (size_t irow = 0; irow < row_count; ++irow) {

     auto row_index = rowIndices[irow];

     if (std::binary_search(insert_chunks.valid_row_indices.begin(),

                            insert_chunks.valid_row_indices.end(),

                            row_index)) {

       insert_chunks_for_shard.valid_row_indices.emplace_back(irow);

     }

   }


   return {std::move(buffers), insert_chunks_for_shard};

 }


 InsertData copyDataOfShard(const Catalog_Namespace::Catalog& cat,

                            ShardDataOwner& dataOwner,

                            InsertData& insert_data,

                            int shardTableIndex,

                            const std::vector<size_t>& rowIndices) {

   const auto* td = cat.getMetadataForTable(insert_data.tableId);

   const auto* ptd = cat.getPhysicalTablesDescriptors(td)[shardTableIndex];


   InsertData shardData;

   shardData.databaseId = insert_data.databaseId;

   shardData.tableId = ptd->tableId;

   shardData.numRows = rowIndices.size();


   std::vector<const ColumnDescriptor*> pCols;

   std::vector<int> lCols;


   {

     auto logicalColumns = cat.getAllColumnMetadataForTable(td->tableId, true, true, true);

     for (const auto& cd : logicalColumns) {

       lCols.push_back(cd->columnId);

     }


     auto physicalColumns =

         cat.getAllColumnMetadataForTable(ptd->tableId, true, true, true);

     for (const auto& cd : physicalColumns) {

       pCols.push_back(cd);

     }

   }


   for (size_t col = 0; col < insert_data.columnIds.size(); col++) {

     dataOwner.arrayData.emplace_back();

     dataOwner.rawData.emplace_back();

     dataOwner.stringData.emplace_back();

   }


   auto copycat = [&cat, &dataOwner, &rowIndices, &lCols, &pCols, &insert_data](int col) {

     const auto lColId = insert_data.columnIds[col];

     const auto pCol = pCols[indexOf(lCols, lColId)];

     return copyColumnDataOfShard(cat,

                                  dataOwner,

                                  rowIndices,

                                  pCol,

                                  col,

                                  insert_data.data[col],

                                  insert_data.is_default[col]);

   };


   std::vector<std::future<BlockWithColumnId>> worker_threads;

   for (size_t col = 0; col < insert_data.columnIds.size(); col++) {

     worker_threads.push_back(std::async(std::launch::async, copycat, col));

   }


   for (auto& child : worker_threads) {

     child.wait();

   }


   for (auto& child : worker_threads) {

     auto shardColumnData = child.get();

     shardData.columnIds.push_back(shardColumnData.columnId);

     shardData.data.push_back(shardColumnData.block);

     shardData.is_default.push_back(shardColumnData.is_default);

   }


   return shardData;

 }


 size_t InsertDataLoader::moveToNextLeaf() {

   std::unique_lock current_leaf_index_lock(current_leaf_index_mutex_);

   size_t starting_leaf_index = current_leaf_index_;

   current_leaf_index_++;

   if (current_leaf_index_ >= leaf_count_) {

     current_leaf_index_ = 0;

   }

   return starting_leaf_index;

 }


 void InsertDataLoader::insertChunks(const Catalog_Namespace::SessionInfo& session_info,

                                     const InsertChunks& insert_chunks) {

   const auto& cat = session_info.getCatalog();

   const auto* td = cat.getMetadataForTable(insert_chunks.table_id);


   CHECK(td);

   if (td->nShards == 0) {

     connector_.insertChunksToLeaf(session_info, moveToNextLeaf(), insert_chunks);

   } else {

     // we have a sharded target table, start spreading to physical tables

     auto row_indices_of_shards =

         compute_row_indices_of_shards(cat, connector_.leafCount(), insert_chunks);


     auto insert_shard_data =

         [this, &session_info, &insert_chunks, &cat, &td, &row_indices_of_shards](

             size_t shardId) {

           const auto shard_tables = cat.getPhysicalTablesDescriptors(td);

           auto stard_table_idx = shardId % td->nShards;

           auto shard_leaf_idx = shardId / td->nShards;


           const auto& row_indices_of_shard = row_indices_of_shards[shardId];


           auto [buffers, shard_insert_chunks] = copy_data_of_shard(

               cat, insert_chunks, stard_table_idx, row_indices_of_shard);

           connector_.insertChunksToLeaf(

               session_info, shard_leaf_idx, shard_insert_chunks);

         };


     std::vector<std::future<void>> worker_threads;

     for (size_t shard_id = 0; shard_id < row_indices_of_shards.size(); shard_id++) {

       if (row_indices_of_shards[shard_id].size() > 0) {

         worker_threads.push_back(

             std::async(std::launch::async, insert_shard_data, shard_id));

       }

     }

     for (auto& child : worker_threads) {

       child.wait();

     }

     for (auto& child : worker_threads) {

       child.get();

     }

   }

 }


 void InsertDataLoader::insertData(const Catalog_Namespace::SessionInfo& session_info,

                                   InsertData& insert_data) {

   const auto& cat = session_info.getCatalog();

   const auto* td = cat.getMetadataForTable(insert_data.tableId);


   CHECK(td);

   if (td->nShards == 0) {

     connector_.insertDataToLeaf(session_info, moveToNextLeaf(), insert_data);

   } else {

     // we have a sharded target table, start spreading to physical tables

     auto rowIndicesOfShards =

         computeRowIndicesOfShards(cat, connector_.leafCount(), insert_data);


     auto insertShardData =

         [this, &session_info, &insert_data, &cat, &td, &rowIndicesOfShards](

             size_t shardId) {

           const auto shard_tables = cat.getPhysicalTablesDescriptors(td);

           auto stardTableIdx = shardId % td->nShards;

           auto shardLeafIdx = shardId / td->nShards;


           const auto& rowIndicesOfShard = rowIndicesOfShards[shardId];

           ShardDataOwner shardDataOwner;


           InsertData shardData = copyDataOfShard(

               cat, shardDataOwner, insert_data, stardTableIdx, rowIndicesOfShard);

           CHECK(shardData.numRows > 0);

           connector_.insertDataToLeaf(session_info, shardLeafIdx, shardData);

         };


     std::vector<std::future<void>> worker_threads;

     for (size_t shardId = 0; shardId < rowIndicesOfShards.size(); shardId++) {

       if (rowIndicesOfShards[shardId].size() > 0) {

         worker_threads.push_back(

             std::async(std::launch::async, insertShardData, shardId));

       }

     }

     for (auto& child : worker_threads) {

       child.wait();

     }

     for (auto& child : worker_threads) {

       child.get();

     }

   }

 }


 void LocalInsertConnector::insertChunksToLeaf(

     const Catalog_Namespace::SessionInfo& session,

     const size_t leaf_idx,

     const Fragmenter_Namespace::InsertChunks& insert_chunks) {

   CHECK(leaf_idx == 0);

   auto& catalog = session.getCatalog();

   auto created_td = catalog.getMetadataForTable(insert_chunks.table_id);

   created_td->fragmenter->insertChunksNoCheckpoint(insert_chunks);

 }


 void LocalInsertConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,

                                             const size_t leaf_idx,

                                             InsertData& insert_data) {

   CHECK(leaf_idx == 0);

   auto& catalog = session.getCatalog();

   auto created_td = catalog.getMetadataForTable(insert_data.tableId);

   created_td->fragmenter->insertDataNoCheckpoint(insert_data);

 }


 void LocalInsertConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,

                                       int table_id) {

   auto& catalog = session.getCatalog();

   catalog.checkpointWithAutoRollback(table_id);

 }


 void LocalInsertConnector::rollback(const Catalog_Namespace::SessionInfo& session,

                                     int table_id) {

   auto& catalog = session.getCatalog();

   auto db_id = catalog.getDatabaseId();

   auto table_epochs = catalog.getTableEpochs(db_id, table_id);

   catalog.setTableEpochs(db_id, table_epochs);

 }


 }  // namespace Fragmenter_Namespace

heavydb.dtypes.T
T
Definition: dtypes.py:8

Fragmenter_Namespace::BlockWithColumnId
Definition: InsertDataLoader.cpp:229

CHECK_EQ
#define CHECK_EQ(x, y)
Definition: Logger.h:301

TargetValueConvertersFactories.h

kENCODING_NONE
Definition: sqltypes.h:241

SQLTypeInfo::get_size
HOST DEVICE int get_size() const
Definition: sqltypes.h:403

kPOLYGON
Definition: sqltypes.h:86

cat
std::string cat(Ts &&...args)
Definition: StringTransform.h:41

Fragmenter_Namespace::InsertDataLoader::insertChunks
void insertChunks(const Catalog_Namespace::SessionInfo &session_info, const InsertChunks &insert_chunks)
Definition: InsertDataLoader.cpp:418

Catalog_Namespace::Catalog
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143

kTIME
Definition: sqltypes.h:76

kARRAY
Definition: sqltypes.h:81

DataBlockPtr::stringsPtr
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:234

DataBlockPtr::arraysPtr
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:235

Fragmenter_Namespace::copy_data_of_shard
std::pair< std::list< std::unique_ptr< foreign_storage::ForeignStorageBuffer > >, InsertChunks > copy_data_of_shard(const Catalog_Namespace::Catalog &cat, const InsertChunks &insert_chunks, int shardTableIndex, const std::vector< size_t > &rowIndices)
Definition: InsertDataLoader.cpp:298

Fragmenter_Namespace::InsertDataLoader::current_leaf_index_mutex_
std::shared_mutex current_leaf_index_mutex_
Definition: InsertDataLoader.h:71

Catalog_Namespace::SessionInfo
Definition: SessionInfo.h:47

Fragmenter_Namespace::computeRowIndicesOfShards
std::vector< std::vector< size_t > > computeRowIndicesOfShards(const Catalog_Namespace::Catalog &cat, size_t leafCount, InsertData &insert_data)
Definition: InsertDataLoader.cpp:172

Fragmenter_Namespace::BlockWithColumnId::columnId
int columnId
Definition: InsertDataLoader.cpp:230

UNREACHABLE
#define UNREACHABLE()
Definition: Logger.h:338

kFLOAT
Definition: sqltypes.h:74

Fragmenter_Namespace::InsertData::is_default
std::vector< bool > is_default
Definition: Fragmenter.h:75

Fragmenter_Namespace::InsertDataLoader::InsertConnector::leafCount
virtual size_t leafCount()=0

SQLTypeInfo::get_type
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391

Fragmenter_Namespace::InsertDataLoader::InsertConnector::insertDataToLeaf
virtual void insertDataToLeaf(const Catalog_Namespace::SessionInfo &parent_session_info, const size_t leaf_idx, Fragmenter_Namespace::InsertData &insert_data)=0

Catalog_Namespace::Catalog::getShardColumnMetadataForTable
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:4861

Fragmenter_Namespace::isStringVectorData
bool isStringVectorData(const ColumnDescriptor *cd)
Definition: InsertDataLoader.cpp:72

Fragmenter_Namespace::InsertDataLoader::insertData
void insertData(const Catalog_Namespace::SessionInfo &session_info, InsertData &insert_data)
Definition: InsertDataLoader.cpp:462

Fragmenter_Namespace::InsertChunks::db_id
const int db_id
Definition: Fragmenter.h:51

Fragmenter_Namespace::InsertData::tableId
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70

kBOOLEAN
Definition: sqltypes.h:67

kMULTIPOINT
Definition: sqltypes.h:97

kDOUBLE
Definition: sqltypes.h:75

report.rows
tuple rows
Definition: report.py:114

Fragmenter_Namespace::InsertData::databaseId
int databaseId
Definition: Fragmenter.h:69

kINTERVAL_YEAR_MONTH
Definition: sqltypes.h:83

Fragmenter_Namespace::InsertData::numRows
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:72

Fragmenter_Namespace::LocalInsertConnector::checkpoint
void checkpoint(const Catalog_Namespace::SessionInfo &parent_session_info, int tableId) override
Definition: InsertDataLoader.cpp:526

Fragmenter_Namespace::size_of_raw_column
size_t size_of_raw_column(const Catalog_Namespace::Catalog &cat, const ColumnDescriptor *cd, const bool get_logical_size=true)
Definition: InsertDataLoader.cpp:82

Types.h

threading_serial::async
future< Result > async(Fn &&fn, Args &&...args)
Definition: threading_serial.h:11

Fragmenter_Namespace::InsertDataLoader::current_leaf_index_
size_t current_leaf_index_
Definition: InsertDataLoader.h:69

SQLTypeInfo::get_logical_size
int get_logical_size() const
Definition: sqltypes.h:421

Fragmenter_Namespace::LocalInsertConnector::insertDataToLeaf
void insertDataToLeaf(const Catalog_Namespace::SessionInfo &parent_session_info, const size_t leaf_idx, Fragmenter_Namespace::InsertData &insert_data) override
Definition: InsertDataLoader.cpp:517

DataBlockPtr
Definition: sqltypes.h:232

Encoder::getNumElems
size_t getNumElems() const
Definition: Encoder.h:284

heavyai::unique_lock
std::unique_lock< T > unique_lock
Definition: heavyai_shared_mutex.h:39

Fragmenter_Namespace::ShardDataOwner::rawData
std::vector< std::vector< uint8_t > > rawData
Definition: InsertDataLoader.cpp:29

Fragmenter_Namespace::InsertChunks
Definition: Fragmenter.h:49

Catalog_Namespace::Catalog::getDatabaseId
int getDatabaseId() const
Definition: Catalog.h:326

kMULTILINESTRING
Definition: sqltypes.h:96

ColumnDescriptor
specifies the content in-memory of a row in the column metadata table
Definition: ColumnDescriptor.h:33

kBIGINT
Definition: sqltypes.h:78

Fragmenter_Namespace::compute_row_indices_of_shards
std::vector< std::vector< size_t > > compute_row_indices_of_shards(size_t shard_count, size_t leaf_count, size_t row_count, SRC *src, bool duplicated_key_value)
Definition: InsertDataLoader.cpp:35

Catalog_Namespace::Catalog::checkpointWithAutoRollback
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:5040

Catalog_Namespace::Catalog::getPhysicalTablesDescriptors
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
Definition: Catalog.cpp:4879

TableDescriptor::fragmenter
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
Definition: TableDescriptor.h:63

Fragmenter_Namespace::indexOf
size_t indexOf(std::vector< T > &vec, T val)
Definition: InsertDataLoader.cpp:66

kMULTIPOLYGON
Definition: sqltypes.h:87

kTEXT
Definition: sqltypes.h:79

kDATE
Definition: sqltypes.h:80

ColumnDescriptor::columnId
int columnId
Definition: ColumnDescriptor.h:35

SQLTypeInfo::get_compression
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399

kTINYINT
Definition: sqltypes.h:88

Fragmenter_Namespace::InsertData::data
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73

Chunk_NS::Chunk::getBuffer
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146

Catalog_Namespace::SessionInfo::getCatalog
Catalog & getCatalog() const
Definition: SessionInfo.h:75

Fragmenter_Namespace::InsertChunks::chunks
std::map< int, std::shared_ptr< Chunk_NS::Chunk > > chunks
Definition: Fragmenter.h:52

Fragmenter_Namespace::LocalInsertConnector::insertChunksToLeaf
void insertChunksToLeaf(const Catalog_Namespace::SessionInfo &parent_session_info, const size_t leaf_idx, const Fragmenter_Namespace::InsertChunks &insert_chunks) override
Definition: InsertDataLoader.cpp:507

Fragmenter_Namespace::BlockWithColumnId::is_default
bool is_default
Definition: InsertDataLoader.cpp:232

Catalog_Namespace::Catalog::getAllColumnMetadataForTable
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2175

Fragmenter_Namespace::copyColumnDataOfShard
void copyColumnDataOfShard(const std::vector< size_t > &rowIndices, T *src, T *dst)
Definition: InsertDataLoader.cpp:222

kSMALLINT
Definition: sqltypes.h:73

Fragmenter_Namespace::ShardDataOwner::arrayData
std::vector< std::vector< ArrayDatum > > arrayData
Definition: InsertDataLoader.cpp:31

Fragmenter_Namespace::InsertDataLoader::leaf_count_
size_t leaf_count_
Definition: InsertDataLoader.h:68

SQLTypeInfo::get_type_name
std::string get_type_name() const
Definition: sqltypes.h:484

kTIMESTAMP
Definition: sqltypes.h:77

kDECIMAL
Definition: sqltypes.h:71

kCHAR
Definition: sqltypes.h:68

Fragmenter_Namespace::ShardDataOwner
Definition: InsertDataLoader.cpp:28

Fragmenter_Namespace::InsertDataLoader::moveToNextLeaf
size_t moveToNextLeaf()
Definition: InsertDataLoader.cpp:408

Fragmenter_Namespace::LocalInsertConnector::rollback
void rollback(const Catalog_Namespace::SessionInfo &parent_session_info, int tableId) override
Definition: InsertDataLoader.cpp:532

Fragmenter_Namespace::InsertChunks::valid_row_indices
std::vector< size_t > valid_row_indices
Definition: Fragmenter.h:53

Data_Namespace::AbstractBuffer::getEncoder
Encoder * getEncoder() const
Definition: AbstractBuffer.h:104

Fragmenter_Namespace::isDatumVectorData
bool isDatumVectorData(const ColumnDescriptor *cd)
Definition: InsertDataLoader.cpp:78

Fragmenter_Namespace::ShardDataOwner::stringData
std::vector< std::vector< std::string > > stringData
Definition: InsertDataLoader.cpp:30

CHECK
#define CHECK(condition)
Definition: Logger.h:291

SQLTypeInfo::is_geometry
bool is_geometry() const
Definition: sqltypes.h:597

Fragmenter_Namespace::InsertData
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68

kLINESTRING
Definition: sqltypes.h:85

kVARCHAR
Definition: sqltypes.h:69

kINTERVAL_DAY_TIME
Definition: sqltypes.h:82

kNUMERIC
Definition: sqltypes.h:70

kINT
Definition: sqltypes.h:72

ColumnDescriptor::columnType
SQLTypeInfo columnType
Definition: ColumnDescriptor.h:38

Fragmenter_Namespace::copyDataOfShard
InsertData copyDataOfShard(const Catalog_Namespace::Catalog &cat, ShardDataOwner &dataOwner, InsertData &insert_data, int shardTableIndex, const std::vector< size_t > &rowIndices)
Definition: InsertDataLoader.cpp:342

Catalog_Namespace::Catalog::getMetadataForTable
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

SQLTypeInfo::is_string
bool is_string() const
Definition: sqltypes.h:561

InsertDataLoader.h

DataBlockPtr::numbersPtr
int8_t * numbersPtr
Definition: sqltypes.h:233

Fragmenter_Namespace::InsertChunks::table_id
const int table_id
Definition: Fragmenter.h:50

Fragmenter_Namespace::InsertData::columnIds
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

Fragmenter_Namespace::InsertDataLoader::InsertConnector::insertChunksToLeaf
virtual void insertChunksToLeaf(const Catalog_Namespace::SessionInfo &parent_session_info, const size_t leaf_idx, const Fragmenter_Namespace::InsertChunks &insert_chunks)=0

ColumnDescriptor::columnName
std::string columnName
Definition: ColumnDescriptor.h:36

SHARD_FOR_KEY
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20

Fragmenter_Namespace::InsertDataLoader::connector_
InsertConnector & connector_
Definition: InsertDataLoader.h:70

SQLTypeInfo::is_array
bool is_array() const
Definition: sqltypes.h:585

Chunk_NS::Chunk
Definition: Chunk.h:41

Fragmenter_Namespace::BlockWithColumnId::block
DataBlockPtr block
Definition: InsertDataLoader.cpp:231

kPOINT
Definition: sqltypes.h:84