OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::anonymous_namespace{ForeignDataImporter.cpp} Namespace Reference

Functions

int32_t get_proxy_foreign_table_fragment_size (const size_t maximum_num_fragments_buffered, const size_t max_import_batch_row_count, const Catalog_Namespace::SessionInfo &parent_session_info, const int32_t table_id)
 

Function Documentation

int32_t import_export::anonymous_namespace{ForeignDataImporter.cpp}::get_proxy_foreign_table_fragment_size ( const size_t  maximum_num_fragments_buffered,
const size_t  max_import_batch_row_count,
const Catalog_Namespace::SessionInfo parent_session_info,
const int32_t  table_id 
)

Definition at line 469 of file ForeignDataImporter.cpp.

References DEFAULT_FRAGMENT_ROWS, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::SessionInfo::getCatalog(), import_export::ForeignDataImporter::proxy_foreign_table_fragment_size_, and run_benchmark_import::type.

Referenced by import_export::ForeignDataImporter::importGeneral().

473  {
474  if (ForeignDataImporter::proxy_foreign_table_fragment_size_ != 0) {
475  return ForeignDataImporter::proxy_foreign_table_fragment_size_;
476  }
477 
478  if (max_import_batch_row_count != 0) {
479  return max_import_batch_row_count;
480  }
481 
482  // This number is chosen as a reasonable default value to reserve for
483  // intermediate buffering during import, it is about 2GB of memory. Note,
484  // depending on the acutal size of var len values, this heuristic target may
485  // be off. NOTE: `maximum_num_fragments_buffered` scales the allowed buffer
486  // size with the assumption that in the worst case all buffers may be
487  // buffered at once.
488  const size_t max_buffer_byte_size =
489  2 * 1024UL * 1024UL * 1024UL / maximum_num_fragments_buffered;
490 
491  auto& catalog = parent_session_info.getCatalog();
492 
493  auto logical_columns =
494  catalog.getAllColumnMetadataForTable(table_id, false, false, false);
495 
496  size_t row_byte_size = 0;
497  for (const auto& column_descriptor : logical_columns) {
498  auto type = column_descriptor->columnType;
499  size_t field_byte_length = 0;
500  if (type.is_varlen_indeed()) {
501  // use a heuristic where varlen types are assumed to be 256 bytes in length
502  field_byte_length = 256;
503  } else {
504  field_byte_length = type.get_size();
505  }
506  row_byte_size += field_byte_length;
507  }
508 
509  return std::min<size_t>((max_buffer_byte_size + row_byte_size - 1) / row_byte_size,
511 }
Catalog & getCatalog() const
Definition: SessionInfo.h:75
#define DEFAULT_FRAGMENT_ROWS
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2172

+ Here is the call graph for this function:

+ Here is the caller graph for this function: