OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::anonymous_namespace{ForeignDataImporter.cpp} Namespace Reference

Functions

int32_t get_proxy_foreign_table_fragment_size (const size_t maximum_num_fragments_buffered, const size_t max_import_batch_row_count, const Catalog_Namespace::SessionInfo &parent_session_info, const int32_t table_id)
 

Function Documentation

int32_t import_export::anonymous_namespace{ForeignDataImporter.cpp}::get_proxy_foreign_table_fragment_size ( const size_t  maximum_num_fragments_buffered,
const size_t  max_import_batch_row_count,
const Catalog_Namespace::SessionInfo parent_session_info,
const int32_t  table_id 
)

Definition at line 475 of file ForeignDataImporter.cpp.

References DEFAULT_FRAGMENT_ROWS, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::SessionInfo::getCatalog(), import_export::ForeignDataImporter::proxy_foreign_table_fragment_size_, and run_benchmark_import::type.

Referenced by import_export::ForeignDataImporter::importGeneral().

479  {
480  if (ForeignDataImporter::proxy_foreign_table_fragment_size_ != 0) {
481  return ForeignDataImporter::proxy_foreign_table_fragment_size_;
482  }
483 
484  if (max_import_batch_row_count != 0) {
485  return max_import_batch_row_count;
486  }
487 
488  // This number is chosen as a reasonable default value to reserve for
489  // intermediate buffering during import, it is about 2GB of memory. Note,
490  // depending on the acutal size of var len values, this heuristic target may
491  // be off. NOTE: `maximum_num_fragments_buffered` scales the allowed buffer
492  // size with the assumption that in the worst case all buffers may be
493  // buffered at once.
494  const size_t max_buffer_byte_size =
495  2 * 1024UL * 1024UL * 1024UL / maximum_num_fragments_buffered;
496 
497  auto& catalog = parent_session_info.getCatalog();
498 
499  auto logical_columns =
500  catalog.getAllColumnMetadataForTable(table_id, false, false, false);
501 
502  size_t row_byte_size = 0;
503  for (const auto& column_descriptor : logical_columns) {
504  auto type = column_descriptor->columnType;
505  size_t field_byte_length = 0;
506  if (type.is_varlen_indeed()) {
507  // use a heuristic where varlen types are assumed to be 256 bytes in length
508  field_byte_length = 256;
509  } else {
510  field_byte_length = type.get_size();
511  }
512  row_byte_size += field_byte_length;
513  }
514 
515  return std::min<size_t>((max_buffer_byte_size + row_byte_size - 1) / row_byte_size,
517 }
Catalog & getCatalog() const
Definition: SessionInfo.h:75
#define DEFAULT_FRAGMENT_ROWS
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2254

+ Here is the call graph for this function:

+ Here is the caller graph for this function: