OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::anonymous_namespace{ForeignDataImporter.cpp} Namespace Reference

Functions

int32_t get_proxy_foreign_table_fragment_size (const size_t maximum_num_fragments_buffered, const size_t max_import_batch_row_count, const Catalog_Namespace::SessionInfo &parent_session_info, const int32_t table_id)
 

Function Documentation

int32_t import_export::anonymous_namespace{ForeignDataImporter.cpp}::get_proxy_foreign_table_fragment_size ( const size_t  maximum_num_fragments_buffered,
const size_t  max_import_batch_row_count,
const Catalog_Namespace::SessionInfo parent_session_info,
const int32_t  table_id 
)

Definition at line 459 of file ForeignDataImporter.cpp.

References DEFAULT_FRAGMENT_ROWS, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::SessionInfo::getCatalog(), StringDictionary::MAX_STRLEN, import_export::ForeignDataImporter::proxy_foreign_table_fragment_size_, and run_benchmark_import::type.

Referenced by import_export::ForeignDataImporter::importGeneral().

463  {
464  if (ForeignDataImporter::proxy_foreign_table_fragment_size_ != 0) {
465  return ForeignDataImporter::proxy_foreign_table_fragment_size_;
466  }
467 
468  if (max_import_batch_row_count != 0) {
469  return max_import_batch_row_count;
470  }
471 
472  // This number is chosen as a reasonable default value to reserve for
473  // intermediate buffering during import, it is about 2GB of memory. Note,
474  // depending on the acutal size of var len values, this heuristic target may
475  // be off. NOTE: `maximum_num_fragments_buffered` scales the allowed buffer
476  // size with the assumption that in the worst case all buffers may be
477  // buffered at once.
478  const size_t max_buffer_byte_size =
479  2 * 1024UL * 1024UL * 1024UL / maximum_num_fragments_buffered;
480 
481  auto& catalog = parent_session_info.getCatalog();
482 
483  auto logical_columns =
484  catalog.getAllColumnMetadataForTable(table_id, false, false, false);
485 
486  size_t row_byte_size = 0;
487  for (const auto& column_descriptor : logical_columns) {
488  auto type = column_descriptor->columnType;
489  size_t field_byte_length = 0;
490  if (type.is_varlen_indeed()) {
491  // use a heuristic of 25% of the maximum string length size, which is likely
492  // conservative
493  field_byte_length = std::max<size_t>(StringDictionary::MAX_STRLEN / 4, 1);
494  } else {
495  field_byte_length = type.get_size();
496  }
497  row_byte_size += field_byte_length;
498  }
499 
500  return std::min<size_t>((max_buffer_byte_size + row_byte_size - 1) / row_byte_size,
502 }
Catalog & getCatalog() const
Definition: SessionInfo.h:75
#define DEFAULT_FRAGMENT_ROWS
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2228
static constexpr size_t MAX_STRLEN

+ Here is the call graph for this function:

+ Here is the caller graph for this function: