OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage Namespace Reference

Namespaces

 anonymous_namespace{AbstractFileStorageDataWrapper.cpp}
 
 anonymous_namespace{AbstractTextFileDataWrapper.cpp}
 
 anonymous_namespace{CachingForeignStorageMgr.cpp}
 
 anonymous_namespace{CsvFileBufferParser.cpp}
 
 anonymous_namespace{FileReader.cpp}
 
 anonymous_namespace{ForeignDataWrapperFactory.cpp}
 
 anonymous_namespace{ForeignStorageCache.cpp}
 
 anonymous_namespace{ForeignTableRefresh.cpp}
 
 anonymous_namespace{InternalCatalogDataWrapper.cpp}
 
 anonymous_namespace{InternalExecutorStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}
 
 
 anonymous_namespace{InternalStorageStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalSystemDataWrapper.cpp}
 
 anonymous_namespace{LazyParquetChunkLoader.cpp}
 
 anonymous_namespace{LogFileBufferParser.cpp}
 
 anonymous_namespace{ParquetDataWrapper.cpp}
 
 anonymous_namespace{RefreshTimeCalculator.cpp}
 
 anonymous_namespace{RegexFileBufferParser.cpp}
 
 anonymous_namespace{RegexParserDataWrapper.cpp}
 
 anonymous_namespace{S3FilePathUtil.cpp}
 
 anonymous_namespace{TextFileBufferParser.cpp}
 
 Csv
 

Classes

struct  ForeignServer
 
struct  ForeignTable
 
struct  OptionsContainer
 
struct  UserMappingType
 
struct  UserMapping
 
class  RefreshTimeCalculator
 
class  AbstractFileStorageDataWrapper
 
struct  ParseFileRegionResult
 
struct  MetadataScanMultiThreadingParams
 
struct  IterativeFileScanParameters
 
class  AbstractTextFileDataWrapper
 
class  CachingForeignStorageMgr
 
class  CsvDataWrapper
 
class  CsvFileBufferParser
 
struct  DataPreview
 
class  FileReader
 
class  SingleFileReader
 
class  SingleTextFileReader
 
class  ArchiveWrapper
 
class  CompressedFileReader
 
class  MultiFileReader
 
class  LocalMultiFileReader
 
struct  FileRegion
 
class  ForeignDataWrapper
 
struct  DataWrapperType
 Encapsulates an enumeration of foreign data wrapper type strings. More...
 
class  ForeignDataWrapperFactory
 
class  ForeignStorageBuffer
 
class  ForeignStorageCache
 
class  ForeignStorageException
 
class  MetadataScanInfeasibleFragmentSizeException
 
class  RequestedFragmentIdOutOfBoundsException
 
class  ChunkSizeValidator
 
class  MockForeignDataWrapper
 
class  ForeignStorageMgr
 
class  ForeignTableSchema
 
class  GeospatialEncoder
 
class  InternalCatalogDataWrapper
 
class  InternalExecutorStatsDataWrapper
 
class  InternalLogsDataWrapper
 
class  InternalMemoryStatsDataWrapper
 
class  InternalMLModelMetadataDataWrapper
 
struct  StorageDetails
 
class  InternalStorageStatsDataWrapper
 
class  InternalSystemDataWrapper
 
struct  ColumnType
 
struct  FragmentType
 
struct  Interval
 
struct  ParquetBatchData
 
class  ParquetRowGroupReader
 
struct  PreviewContext
 
class  LazyParquetChunkLoader
 
class  LogFileBufferParser
 
class  OdbcGeospatialEncoder
 
class  ParquetArrayDetectEncoder
 
class  ParquetArrayEncoder
 
class  ParquetArrayImportEncoder
 
class  ParquetDataWrapper
 
class  ParquetDateInDaysFromTimestampEncoder
 
class  ParquetDateInSecondsEncoder
 
class  ParquetDecimalEncoder
 
class  ParquetDetectStringEncoder
 
class  ParquetEncoder
 
class  ParquetImportEncoder
 
class  ParquetScalarEncoder
 
class  ParquetFixedLengthArrayEncoder
 
class  ParquetFixedLengthEncoder
 
class  ParquetUnsignedFixedLengthEncoder
 
class  ParquetGeospatialEncoder
 
class  ParquetGeospatialImportEncoder
 
class  RowGroupIntervalTracker
 
class  ParquetImportBatchResult
 
class  AbstractRowGroupIntervalTracker
 
class  ParquetImporter
 
class  ParquetInPlaceEncoder
 
class  TypedParquetInPlaceEncoder
 
class  ParquetMetadataValidator
 
class  TimestampBoundsValidator
 
class  IntegralFixedLengthBoundsValidator
 
class  BaseDateBoundsValidator
 
class  FloatPointValidator
 
struct  RowGroupInterval
 
struct  RowGroupMetadata
 
class  FileReaderMap
 
class  ParquetStringEncoder
 
class  ParquetStringImportEncoder
 
class  ParquetStringNoneEncoder
 
class  ParquetTimeEncoder
 
class  ParquetTimestampEncoder
 
class  ParquetVariableLengthArrayEncoder
 
class  PassThroughBuffer
 
class  RegexFileBufferParser
 
class  RegexParserDataWrapper
 
class  FileOrderS3
 
struct  ParseBufferRequest
 
struct  ParseBufferResult
 
class  TextFileBufferParser
 
class  TypedParquetDetectBuffer
 
class  TypedParquetStorageBuffer
 
class  ForeignTableRefreshScheduler
 

Typedefs

using OptionsMap = std::map< std::string, std::string, std::less<>>
 
using SampleRows = std::vector< std::vector< std::string >>
 
using FirstLineByFilePath = std::map< std::string, std::string >
 
using FileRegions = std::vector< FileRegion >
 
using ChunkToBufferMap = std::map< ChunkKey, AbstractBuffer * >
 
using FilePathAndRowGroup = std::pair< std::string, int32_t >
 
using RejectedRowIndices = std::set< int64_t >
 
using InvalidRowGroupIndices = std::set< int64_t >
 
template<typename T >
using DateInSecondsBoundsValidator = BaseDateBoundsValidator< T, true >
 
template<typename T >
using DateInDaysBoundsValidator = BaseDateBoundsValidator< T, false >
 
using UniqueReaderPtr = std::unique_ptr< parquet::arrow::FileReader >
 
using ReaderPtr = parquet::arrow::FileReader *
 
template<typename V , typename T , T conversion_denominator, typename NullType = V>
using ParquetDateInSecondsFromTimestampEncoder = ParquetTimestampEncoder< V, T, conversion_denominator, NullType >
 
using S3ObjectComparator = std::function< bool(const Aws::S3::Model::Object &, const Aws::S3::Model::Object &)>
 

Functions

size_t get_num_threads (const ForeignTable &table)
 
ParseFileRegionResult parse_file_regions (const FileRegions &file_regions, const size_t start_index, const size_t end_index, FileReader &file_reader, ParseBufferRequest &parse_file_request, const std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, const TextFileBufferParser &parser)
 
size_t num_rows_to_process (const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
 
std::vector< size_t > partition_by_fragment (const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
 
std::optional< ParseBufferRequestget_next_scan_request (MetadataScanMultiThreadingParams &multi_threading_params)
 
void add_file_region (std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
 
void update_stats (Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
 
void cache_blocks (std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)
 
void append_data_block_to_chunk (const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const int column_id, const ColumnDescriptor *column, const size_t element_count_required)
 
std::pair< std::map< int,
DataBlockPtr >, std::map< int,
DataBlockPtr > > 
partition_data_blocks (const std::map< int, const ColumnDescriptor * > &column_by_id, const std::map< int, DataBlockPtr > &data_blocks)
 
void update_delete_buffer (const ParseBufferRequest &request, const ParseBufferResult &result, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t start_position_in_fragment)
 
void populate_chunks_using_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t expected_current_element_count)
 
void process_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
 
void add_request_to_pool (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void scan_metadata (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser)
 
ParseBufferRequest get_request_from_pool (MetadataScanMultiThreadingParams &multi_threading_params)
 
bool request_pool_non_empty (MetadataScanMultiThreadingParams &multi_threading_params)
 
void defer_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void dispatch_all_deferred_requests (MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void populate_chunks (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser, foreign_storage::IterativeFileScanParameters &file_scan_param)
 
void resize_buffer_if_needed (std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
 
void reset_multithreading_params (foreign_storage::MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_requests (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)
 
void dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)
 
void dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call)
 
void set_value (rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, FileRegion &file_region)
 
std::optional< SQLTypesdetect_geo_type (const SampleRows &sample_rows, size_t column_index)
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
 Create proxy fsi objects for use outside FSI. More...
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const TableDescriptor *table)
 Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above. More...
 
void validate_regex_parser_options (const import_export::CopyParams &copy_params)
 
bool is_valid_source_type (const import_export::CopyParams &copy_params)
 
std::string bool_to_option_value (const bool value)
 
void throw_unexpected_number_of_items (const size_t &num_expected, const size_t &num_loaded, const std::string &item_type)
 
void throw_removed_row_in_result_set_error (const std::string &select_statement)
 
void throw_removed_row_in_file_error (const std::string &file_path)
 
void throw_removed_file_error (const std::string &file_path)
 
void throw_number_of_columns_mismatch_error (size_t num_table_cols, size_t num_file_cols, const std::string &file_path)
 
void throw_file_access_error (const std::string &file_path, const std::string &message)
 
void throw_file_not_found_error (const std::string &file_path)
 
void throw_s3_compressed_mime_type (const std::string &file_path, const std::string &mime_type)
 
void throw_s3_compressed_extension (const std::string &file_path, const std::string &ext_type)
 
bool is_append_table_chunk_key (const ChunkKey &chunk_key)
 
bool set_comp (const ChunkKey &left, const ChunkKey &right)
 
std::vector< ChunkKeyget_column_key_vec (const ChunkKey &destination_chunk_key)
 
std::set< ChunkKeyget_column_key_set (const ChunkKey &destination_chunk_key)
 
size_t get_max_chunk_size (const ChunkKey &key)
 
bool contains_fragment_key (const std::set< ChunkKey > &key_set, const ChunkKey &target_key)
 
bool is_table_enabled_on_node (const ChunkKey &key)
 
void refresh_foreign_table_unlocked (Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
 
void refresh_foreign_table (Catalog_Namespace::Catalog &catalog, const std::string &table_name, const bool evict_cached_entries)
 
void init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const SQLTypeInfo &type, size_t num_elements)
 
const
foreign_storage::ForeignTable
get_foreign_table_for_key (const ChunkKey &key)
 
bool is_system_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_replicated_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_shardable_key (const ChunkKey &key)
 
bool fragment_maps_to_leaf (const ChunkKey &key)
 
bool key_does_not_shard_to_leaf (const ChunkKey &key)
 
template<typename T >
auto partition_for_threads (const std::set< T > &items, size_t max_threads)
 
template<typename T >
auto partition_for_threads (const std::vector< T > &items, size_t max_threads)
 
template<typename Container >
std::vector< std::future< void > > create_futures_for_workers (const Container &items, size_t max_threads, std::function< void(const Container &)> lambda)
 
template<typename T >
ArrayDatum encode_as_array_datum (const std::vector< T > &data)
 
std::string get_db_name (int32_t db_id)
 
std::string get_table_name (int32_t db_id, int32_t table_id)
 
void set_node_name (std::map< std::string, import_export::TypedImportBuffer * > &import_buffers)
 
void set_value (rapidjson::Value &json_val, const RowGroupInterval &value, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, RowGroupInterval &value)
 
template<typename D , typename T >
bool check_bounds (const T &value)
 
template<typename D >
std::string datetime_to_string (const D &timestamp, const SQLTypeInfo &column_type)
 
void throw_parquet_metadata_out_of_bounds_error (const std::string &min_value, const std::string &max_value, const std::string &encountered_value)
 
UniqueReaderPtr open_parquet_table (const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
 
std::pair< int, int > get_parquet_table_size (const ReaderPtr &reader)
 
const parquet::ColumnDescriptor * get_column_descriptor (const parquet::arrow::FileReader *reader, const int logical_column_index)
 
parquet::Type::type get_physical_type (ReaderPtr &reader, const int logical_column_index)
 
void validate_equal_column_descriptor (const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
 
std::unique_ptr< ColumnDescriptorget_sub_type_column_descriptor (const ColumnDescriptor *column)
 
std::shared_ptr
< parquet::Statistics > 
validate_and_get_column_metadata_statistics (const parquet::ColumnChunkMetaData *column_metadata)
 
std::vector
< Aws::S3::Model::Object > 
s3_objects_filter_sort_files (const std::vector< Aws::S3::Model::Object > &file_paths, const shared::FilePathOptions &options)
 
template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
get_null_value ()
 
template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > get_min_max_bounds ()
 
void populate_string_dictionary (int32_t table_id, int32_t col_id, int32_t db_id)
 
void validate_non_foreign_table_write (const TableDescriptor *table_descriptor)
 

Variables

constexpr const char * kDeletedValueIndicator {"<DELETED>"}
 

Typedef Documentation

Definition at line 31 of file ForeignDataWrapper.h.

Definition at line 277 of file ParquetMetadataValidator.h.

Definition at line 274 of file ParquetMetadataValidator.h.

using foreign_storage::FilePathAndRowGroup = typedef std::pair<std::string, int32_t>

Definition at line 40 of file ParquetDataWrapper.h.

using foreign_storage::FileRegions = typedef std::vector<FileRegion>

Definition at line 60 of file FileRegions.h.

using foreign_storage::FirstLineByFilePath = typedef std::map<std::string, std::string>

Definition at line 37 of file FileReader.h.

using foreign_storage::InvalidRowGroupIndices = typedef std::set<int64_t>

Definition at line 124 of file ParquetEncoder.h.

using foreign_storage::OptionsMap = typedef std::map<std::string, std::string, std::less<>>

Definition at line 30 of file OptionsContainer.h.

template<typename V , typename T , T conversion_denominator, typename NullType = V>
using foreign_storage::ParquetDateInSecondsFromTimestampEncoder = typedef ParquetTimestampEncoder<V, T, conversion_denominator, NullType>

Definition at line 89 of file ParquetTimestampEncoder.h.

using foreign_storage::ReaderPtr = typedef parquet::arrow::FileReader*

Definition at line 33 of file ParquetShared.h.

using foreign_storage::RejectedRowIndices = typedef std::set<int64_t>

Definition at line 28 of file ParquetEncoder.h.

using foreign_storage::S3ObjectComparator = typedef std::function<bool(const Aws::S3::Model::Object&, const Aws::S3::Model::Object&)>

Definition at line 16 of file S3FilePathUtil.h.

using foreign_storage::SampleRows = typedef std::vector<std::vector<std::string>>

Definition at line 26 of file DataPreview.h.

using foreign_storage::UniqueReaderPtr = typedef std::unique_ptr<parquet::arrow::FileReader>

Definition at line 32 of file ParquetShared.h.

Function Documentation

void foreign_storage::add_file_region ( std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
int  fragment_id,
size_t  first_row_index,
const ParseBufferResult &  result,
const std::string &  file_path 
)

Creates a new file region based on metadata from parsed file buffers and adds the new region to the fragment id to file regions map.

Definition at line 562 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferResult::row_count, and foreign_storage::ParseBufferResult::row_offsets.

Referenced by populate_chunks_using_data_blocks(), and process_data_blocks().

566  {
567  fragment_id_to_file_regions_map[fragment_id].emplace_back(
568  // file naming is handled by FileReader
569  FileRegion(file_path,
570  result.row_offsets.front(),
571  first_row_index,
572  result.row_count,
573  result.row_offsets.back() - result.row_offsets.front()));
574 }

+ Here is the caller graph for this function:

void foreign_storage::add_request_to_pool ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Adds the request object for a processed request back to the request pool for reuse in subsequent requests.

Definition at line 850 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests(), populate_chunks(), and scan_metadata().

851  {
852  std::unique_lock<std::mutex> completed_requests_queue_lock(
853  multi_threading_params.request_pool_mutex);
854  multi_threading_params.request_pool.emplace(std::move(request));
855  completed_requests_queue_lock.unlock();
856  multi_threading_params.request_pool_condition.notify_all();
857 }

+ Here is the caller graph for this function:

void foreign_storage::append_data_block_to_chunk ( const foreign_storage::IterativeFileScanParameters file_scan_param,
DataBlockPtr  data_block,
size_t  row_count,
const int  column_id,
const ColumnDescriptor column,
const size_t  element_count_required 
)

Definition at line 649 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::IterativeFileScanParameters::column_id_to_chunk_map, shared::get_from_map(), foreign_storage::IterativeFileScanParameters::getChunkConditionalVariable(), and foreign_storage::IterativeFileScanParameters::getChunkMutex().

Referenced by populate_chunks_using_data_blocks().

655  {
656  auto chunk = shared::get_from_map(file_scan_param.column_id_to_chunk_map, column_id);
657 
658  auto& conditional_variable = file_scan_param.getChunkConditionalVariable(column_id);
659  {
660  std::unique_lock<std::mutex> chunk_lock(file_scan_param.getChunkMutex(column_id));
661  conditional_variable.wait(chunk_lock, [element_count_required, &chunk]() {
662  return chunk.getBuffer()->getEncoder()->getNumElems() == element_count_required;
663  });
664 
665  chunk.appendData(data_block, row_count, 0);
666  }
667 
668  conditional_variable
669  .notify_all(); // notify any threads waiting on the correct element count
670 }
std::condition_variable & getChunkConditionalVariable(const int col_id) const
std::map< int, Chunk_NS::Chunk > & column_id_to_chunk_map
std::mutex & getChunkMutex(const int col_id) const
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::bool_to_option_value ( const bool  value)

Definition at line 131 of file ForeignDataWrapperFactory.cpp.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy().

131  {
132  return value ? "TRUE" : "FALSE";
133 }

+ Here is the caller graph for this function:

void foreign_storage::cache_blocks ( std::map< ChunkKey, Chunk_NS::Chunk > &  cached_chunks,
DataBlockPtr  data_block,
size_t  row_count,
ChunkKey chunk_key,
const ColumnDescriptor column,
bool  is_first_block,
bool  disable_cache 
)

Definition at line 608 of file AbstractTextFileDataWrapper.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_cache_if_enabled(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), SQLTypeInfo::is_varlen_indeed(), and key_does_not_shard_to_leaf().

Referenced by process_data_blocks().

614  {
615  auto catalog =
617  CHECK(catalog);
618  auto cache = get_cache_if_enabled(catalog, disable_cache);
619  if (cache) {
620  // This extra filter needs to be here because this wrapper is the only one that
621  // accesses the cache directly and it should not be inserting chunks which are not
622  // mapped to the current leaf (in distributed mode).
623  if (key_does_not_shard_to_leaf(chunk_key)) {
624  return;
625  }
626 
627  ChunkKey index_key = {chunk_key[CHUNK_KEY_DB_IDX],
628  chunk_key[CHUNK_KEY_TABLE_IDX],
629  chunk_key[CHUNK_KEY_COLUMN_IDX],
630  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
631  2};
632  // Create actual data chunks to prepopulate cache
633  if (cached_chunks.find(chunk_key) == cached_chunks.end()) {
634  cached_chunks[chunk_key] = Chunk_NS::Chunk{column, false};
635  cached_chunks[chunk_key].setBuffer(
636  cache->getChunkBufferForPrecaching(chunk_key, is_first_block));
637  if (column->columnType.is_varlen_indeed()) {
638  cached_chunks[chunk_key].setIndexBuffer(
639  cache->getChunkBufferForPrecaching(index_key, is_first_block));
640  }
641  if (is_first_block) {
642  cached_chunks[chunk_key].initEncoder();
643  }
644  }
645  cached_chunks[chunk_key].appendData(data_block, row_count, 0);
646  }
647 }
std::vector< int > ChunkKey
Definition: types.h:36
foreign_storage::ForeignStorageCache * get_cache_if_enabled(std::shared_ptr< Catalog_Namespace::Catalog > &catalog, const bool disable_cache)
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:343
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:635
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename D , typename T >
bool foreign_storage::check_bounds ( const T &  value)
inline

Definition at line 32 of file ParquetMetadataValidator.h.

32  {
33  auto [min_value, max_value] = get_min_max_bounds<D>();
34  return value >= min_value && value <= max_value;
35 }
bool foreign_storage::contains_fragment_key ( const std::set< ChunkKey > &  key_set,
const ChunkKey target_key 
)
template<typename Container >
std::vector<std::future<void> > foreign_storage::create_futures_for_workers ( const Container &  items,
size_t  max_threads,
std::function< void(const Container &)>  lambda 
)

Definition at line 74 of file FsiChunkUtils.h.

References threading_serial::async(), and partition_for_threads().

Referenced by import_export::ForeignDataImporter::importGeneralS3(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

77  {
78  auto items_per_thread = partition_for_threads(items, max_threads);
79  std::vector<std::future<void>> futures;
80  for (const auto& items : items_per_thread) {
81  futures.emplace_back(std::async(std::launch::async, lambda, items));
82  }
83 
84  return futures;
85 }
auto partition_for_threads(const std::set< T > &items, size_t max_threads)
Definition: FsiChunkUtils.h:41
future< Result > async(Fn &&fn, Args &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const int  db_id,
const TableDescriptor table,
const int32_t  user_id 
)

Create proxy fsi objects for use outside FSI.

Parameters
copy_from_source- the source that will be copied
copy_params- CopyParams that specify parameters around use case
db_id- db id of database in use case
table- the table descriptor of the table in use case
user_id- the user id of user in use case
Returns
tuple of FSI objects that can be used for particular use case (for example import or detect)

Definition at line 61 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and foreign_storage::ForeignDataWrapperFactory::createUserMappingProxyIfApplicable().

Referenced by create_proxy_fsi_objects(), and import_export::ForeignDataImporter::importGeneral().

65  {
67  db_id, user_id, copy_from_source, copy_params);
68 
69  CHECK(server);
70  server->validate();
71 
72  auto user_mapping =
74  db_id, user_id, copy_from_source, copy_params, server.get());
75 
76  if (user_mapping) {
77  user_mapping->validate(server.get());
78  }
79 
80  auto foreign_table =
82  db_id, table, copy_from_source, copy_params, server.get());
83 
84  CHECK(foreign_table);
85  foreign_table->validateOptionValues();
86 
87  return {std::move(server), std::move(user_mapping), std::move(foreign_table)};
88 }
static std::unique_ptr< ForeignServer > createForeignServerProxy(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params)
static std::unique_ptr< ForeignTable > createForeignTableProxy(const int db_id, const TableDescriptor *table, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)
#define CHECK(condition)
Definition: Logger.h:291
static std::unique_ptr< UserMapping > createUserMappingProxyIfApplicable(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const TableDescriptor table 
)

Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above.

Definition at line 93 of file ForeignDataWrapperFactory.cpp.

References create_proxy_fsi_objects().

95  {
96  return create_proxy_fsi_objects(copy_from_source, copy_params, -1, table, -1);
97 }
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > create_proxy_fsi_objects(const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
Create proxy fsi objects for use outside FSI.

+ Here is the call graph for this function:

template<typename D >
std::string foreign_storage::datetime_to_string ( const D &  timestamp,
const SQLTypeInfo column_type 
)
inline

Definition at line 38 of file ParquetMetadataValidator.h.

References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfo::is_date(), and SQLTypeInfo::is_timestamp().

Referenced by foreign_storage::TimestampBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::getMinMaxBoundsAsStrings(), foreign_storage::TimestampBoundsValidator< T >::validateValue(), and foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue().

39  {
40  CHECK(column_type.is_timestamp() || column_type.is_date());
41  Datum d;
42  d.bigintval = timestamp;
43  return DatumToString(d, column_type);
44 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:460
bool is_timestamp() const
Definition: sqltypes.h:1044
int64_t bigintval
Definition: Datum.h:74
#define CHECK(condition)
Definition: Logger.h:291
Definition: Datum.h:69
bool is_date() const
Definition: sqltypes.h:1026

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::defer_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Definition at line 943 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, and foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex.

Referenced by populate_chunks().

944  {
945  std::unique_lock<std::mutex> deferred_requests_lock(
946  multi_threading_params.deferred_requests_mutex);
947  multi_threading_params.deferred_requests.emplace(std::move(request));
948 }

+ Here is the caller graph for this function:

std::optional< SQLTypes > foreign_storage::detect_geo_type ( const SampleRows &  sample_rows,
size_t  column_index 
)

Definition at line 22 of file DataPreview.cpp.

References CHECK_EQ, CHECK_LT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNULLT, kPOINT, kPOLYGON, and UNREACHABLE.

Referenced by foreign_storage::LazyParquetChunkLoader::previewFiles().

23  {
24  std::optional<SQLTypes> tentative_geo_type{};
25  for (const auto& row : sample_rows) {
26  static std::regex geo_regex{
27  "\\s*(POINT|MULTIPOINT|LINESTRING|MULTILINESTRING|POLYGON|MULTIPOLYGON)\\s*\\(.+"
28  "\\)\\s*"};
29  std::smatch match;
30  CHECK_LT(column_index, row.size());
31  if (std::regex_match(row[column_index], match, geo_regex)) {
32  CHECK_EQ(match.size(), static_cast<size_t>(2));
33  SQLTypes geo_type{kNULLT};
34  const auto& geo_type_str = match[1];
35  if (geo_type_str == "POINT") {
36  geo_type = kPOINT;
37  } else if (geo_type_str == "MULTIPOINT") {
38  geo_type = kMULTIPOINT;
39  } else if (geo_type_str == "LINESTRING") {
40  geo_type = kLINESTRING;
41  } else if (geo_type_str == "MULTILINESTRING") {
42  geo_type = kMULTILINESTRING;
43  } else if (geo_type_str == "POLYGON") {
44  geo_type = kPOLYGON;
45  } else if (geo_type_str == "MULTIPOLYGON") {
46  geo_type = kMULTIPOLYGON;
47  } else {
48  UNREACHABLE() << "Unexpected geo type match: " << geo_type_str;
49  }
50  if (tentative_geo_type.has_value()) {
51  if (tentative_geo_type.value() != geo_type) {
52  return {}; // geo type does not match between rows, can not be imported
53  }
54  } else {
55  tentative_geo_type = geo_type;
56  }
57  }
58  }
59  return tentative_geo_type;
60 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
SQLTypes
Definition: sqltypes.h:65
#define UNREACHABLE()
Definition: Logger.h:338
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the caller graph for this function:

void foreign_storage::dispatch_all_deferred_requests ( MetadataScanMultiThreadingParams &  multi_threading_params)

Definition at line 953 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

954  {
955  std::unique_lock<std::mutex> deferred_requests_lock(
956  multi_threading_params.deferred_requests_mutex);
957  {
958  std::unique_lock<std::mutex> pending_requests_lock(
959  multi_threading_params.pending_requests_mutex);
960 
961  while (!multi_threading_params.deferred_requests.empty()) {
962  auto& request = multi_threading_params.deferred_requests.front();
963  multi_threading_params.pending_requests.emplace(std::move(request));
964  multi_threading_params.deferred_requests.pop();
965  }
966  multi_threading_params.pending_requests_condition.notify_all();
967  }
968 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Dispatches a new metadata scan request by adding the request to the pending requests queue to be consumed by a worker thread.

Definition at line 974 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

975  {
976  {
977  std::unique_lock<std::mutex> pending_requests_lock(
978  multi_threading_params.pending_requests_mutex);
979  multi_threading_params.pending_requests.emplace(std::move(request));
980  }
981  multi_threading_params.pending_requests_condition.notify_all();
982 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call,
int &  iterative_scan_last_fragment_id 
)

Reads from a text file iteratively and dispatches metadata scan requests that are processed by worker threads.

Definition at line 1086 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::alloc_size, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_all_deferred_requests(), dispatch_scan_request(), foreign_storage::TextFileBufferParser::findRowEndPosition(), foreign_storage::IterativeFileScanParameters::fragment_id, get_request_from_pool(), foreign_storage::FileReader::getCurrentFilePath(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, TableDescriptor::maxFragRows, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::no_deferred_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::FileReader::read(), request_pool_non_empty(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_alloc_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_data, and resize_buffer_if_needed().

Referenced by dispatch_scan_requests_with_exception_handling().

1100  {
1101  auto& alloc_size = iterative_residual_buffer.alloc_size;
1102  auto& residual_buffer = iterative_residual_buffer.residual_data;
1103  auto& residual_buffer_size = iterative_residual_buffer.residual_buffer_size;
1104  auto& residual_buffer_alloc_size = iterative_residual_buffer.residual_buffer_alloc_size;
1105 
1106  if (is_first_file_scan_call) {
1107  alloc_size = buffer_size;
1108  residual_buffer = std::make_unique<char[]>(alloc_size);
1109  residual_buffer_size = 0;
1110  residual_buffer_alloc_size = alloc_size;
1111  } else if (!no_deferred_requests(multi_threading_params)) {
1112  dispatch_all_deferred_requests(multi_threading_params);
1113  }
1114 
1115  // NOTE: During an interactive scan, it is possible for an entire fragment to
1116  // be parsed into requests, which sit in deferred requests; in order to avoid
1117  // stalling indefinitely while waiting on an available requests, the check
1118  // below determines if this is the case or not.
1119  bool current_fragment_fully_read_during_iterative_scan =
1120  file_scan_param && file_scan_param->fragment_id < iterative_scan_last_fragment_id;
1121 
1122  // NOTE: The conditional below behaves as follows:
1123  //
1124  // * for non-iterative scans,
1125  // current_fragment_fully_read_during_iterative_scan is false, and therefore
1126  // only the first clause of the conditional is relevant
1127  //
1128  // * for interactive scans, if
1129  // current_fragment_fully_read_during_iterative_scan is true, then the
1130  // conditional is skipped, unless it is determined there are still available
1131  // requests to work with, in which case the loop is entered; this is an
1132  // optimization that ensures maximal concurrency of loading while processing
1133  // requests
1134  while (!file_reader.isScanFinished() &&
1135  (request_pool_non_empty(multi_threading_params) ||
1136  !current_fragment_fully_read_during_iterative_scan)) {
1137  {
1138  std::lock_guard<std::mutex> pending_requests_lock(
1139  multi_threading_params.pending_requests_mutex);
1140  if (!multi_threading_params.continue_processing) {
1141  break;
1142  }
1143  }
1144  auto request = get_request_from_pool(multi_threading_params);
1145  request.full_path = file_reader.getCurrentFilePath();
1146  resize_buffer_if_needed(request.buffer, request.buffer_alloc_size, alloc_size);
1147 
1148  if (residual_buffer_size > 0) {
1149  memcpy(request.buffer.get(), residual_buffer.get(), residual_buffer_size);
1150  }
1151  size_t size = residual_buffer_size;
1152  size += file_reader.read(request.buffer.get() + residual_buffer_size,
1153  alloc_size - residual_buffer_size);
1154 
1155  if (size == 0) {
1156  // In some cases at the end of a file we will read 0 bytes even when
1157  // file_reader.isScanFinished() is false. Also add request back to the pool to be
1158  // picked up again in the next iteration.
1159  add_request_to_pool(multi_threading_params, request);
1160  continue;
1161  } else if (size == 1 && request.buffer[0] == copy_params.line_delim) {
1162  // In some cases files with newlines at the end will be encoded with a second
1163  // newline that can end up being the only thing in the buffer. Also add request
1164  // back to the pool to be picked up again in the next iteration.
1165  current_file_offset++;
1166  add_request_to_pool(multi_threading_params, request);
1167  continue;
1168  }
1169  unsigned int num_rows_in_buffer = 0;
1170  request.end_pos = parser.findRowEndPosition(alloc_size,
1171  request.buffer,
1172  size,
1173  copy_params,
1174  first_row_index_in_buffer,
1175  num_rows_in_buffer,
1176  &file_reader);
1177  request.buffer_size = size;
1178  request.buffer_alloc_size = alloc_size;
1179  request.first_row_index = first_row_index_in_buffer;
1180  request.file_offset = current_file_offset;
1181  request.buffer_row_count = num_rows_in_buffer;
1182  request.processed_row_count = 0;
1183  request.begin_pos = 0;
1184 
1185  residual_buffer_size = size - request.end_pos;
1186  if (residual_buffer_size > 0) {
1187  resize_buffer_if_needed(residual_buffer, residual_buffer_alloc_size, alloc_size);
1188  memcpy(residual_buffer.get(),
1189  request.buffer.get() + request.end_pos,
1190  residual_buffer_size);
1191  }
1192 
1193  current_file_offset += request.end_pos;
1194  first_row_index_in_buffer += num_rows_in_buffer;
1195 
1196  if (num_rows_in_buffer > 0) {
1197  dispatch_scan_request(multi_threading_params, request);
1198  } else {
1199  add_request_to_pool(multi_threading_params, request);
1200  }
1201 
1202  if (file_scan_param) {
1203  const int32_t last_fragment_index =
1204  (first_row_index_in_buffer) / table->maxFragRows;
1205  if (last_fragment_index > file_scan_param->fragment_id) {
1206  iterative_scan_last_fragment_id = last_fragment_index;
1207  break;
1208  }
1209  }
1210  }
1211 
1212  std::unique_lock<std::mutex> pending_requests_queue_lock(
1213  multi_threading_params.pending_requests_mutex);
1214  multi_threading_params.pending_requests_condition.wait(
1215  pending_requests_queue_lock, [&multi_threading_params] {
1216  return multi_threading_params.pending_requests.empty() ||
1217  (multi_threading_params.continue_processing == false);
1218  });
1219  multi_threading_params.continue_processing = false;
1220  pending_requests_queue_lock.unlock();
1221  multi_threading_params.pending_requests_condition.notify_all();
1222 }
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
bool no_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
void dispatch_all_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
bool request_pool_non_empty(MetadataScanMultiThreadingParams &multi_threading_params)
ParseBufferRequest get_request_from_pool(MetadataScanMultiThreadingParams &multi_threading_params)
void resize_buffer_if_needed(std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
void dispatch_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests_with_exception_handling ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call,
int &  iterative_scan_last_fragment_id 
)

Definition at line 1224 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_scan_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests_with_exception_handling(), foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan(), and foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

1238  {
1239  try {
1240  dispatch_scan_requests(table,
1241  buffer_size,
1242  file_path,
1243  file_reader,
1244  copy_params,
1245  multi_threading_params,
1246  first_row_index_in_buffer,
1247  current_file_offset,
1248  parser,
1249  file_scan_param,
1250  iterative_residual_buffer,
1251  is_first_file_scan_call,
1252  iterative_scan_last_fragment_id);
1253  } catch (...) {
1254  {
1255  std::unique_lock<std::mutex> pending_requests_lock(
1256  multi_threading_params.pending_requests_mutex);
1257  multi_threading_params.continue_processing = false;
1258  }
1259  multi_threading_params.pending_requests_condition.notify_all();
1260  throw;
1261  }
1262 }
void dispatch_scan_requests(const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests_with_exception_handling ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call 
)

Definition at line 1264 of file AbstractTextFileDataWrapper.cpp.

References dispatch_scan_requests_with_exception_handling().

1277  {
1278  int dummy;
1280  buffer_size,
1281  file_path,
1282  file_reader,
1283  copy_params,
1284  multi_threading_params,
1285  first_row_index_in_buffer,
1286  current_file_offset,
1287  parser,
1288  file_scan_param,
1289  iterative_residual_buffer,
1290  is_first_file_scan_call,
1291  dummy);
1292 }
void dispatch_scan_requests_with_exception_handling(const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)

+ Here is the call graph for this function:

template<typename T >
ArrayDatum foreign_storage::encode_as_array_datum ( const std::vector< T > &  data)
inline

Definition at line 32 of file GeospatialEncoder.h.

References heavydb.dtypes::T.

Referenced by foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

32  {
33  const size_t num_bytes = data.size() * sizeof(T);
34  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
35  memcpy(buffer.get(), data.data(), num_bytes);
36  return ArrayDatum(num_bytes, buffer, false);
37 }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229

+ Here is the caller graph for this function:

bool foreign_storage::fragment_maps_to_leaf ( const ChunkKey key)

Definition at line 128 of file FsiChunkUtils.cpp.

References CHECK, g_distributed_leaf_idx, g_distributed_num_leaves, get_fragment(), dist::is_aggregator(), and dist::is_distributed().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

128  {
133 }
int get_fragment(const ChunkKey &key)
Definition: types.h:52
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
bool is_aggregator()
Definition: distributed.cpp:33
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
#define CHECK(condition)
Definition: Logger.h:291
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const parquet::ColumnDescriptor * foreign_storage::get_column_descriptor ( const parquet::arrow::FileReader *  reader,
const int  logical_column_index 
)

Definition at line 46 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

48  {
49  return reader->parquet_reader()->metadata()->schema()->Column(logical_column_index);
50 }

+ Here is the caller graph for this function:

std::set<ChunkKey> foreign_storage::get_column_key_set ( const ChunkKey destination_chunk_key)
std::vector<ChunkKey> foreign_storage::get_column_key_vec ( const ChunkKey destination_chunk_key)
std::string foreign_storage::get_db_name ( int32_t  db_id)

Definition at line 31 of file InternalSystemDataWrapper.cpp.

References Catalog_Namespace::DBMetadata::dbName, Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_dashboards(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_permissions(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_tables(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_users(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

31  {
33  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
34  if (sys_catalog.getMetadataForDBById(db_id, db_metadata)) {
35  return db_metadata.dbName;
36  } else {
37  // Database has been deleted.
39  }
40 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:343

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const foreign_storage::ForeignTable & foreign_storage::get_foreign_table_for_key ( const ChunkKey key)

Definition at line 101 of file FsiChunkUtils.cpp.

References CHECK, get_table_prefix(), Catalog_Namespace::SysCatalog::getCatalog(), and Catalog_Namespace::SysCatalog::instance().

Referenced by is_append_table_chunk_key(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

101  {
102  auto [db_id, tb_id] = get_table_prefix(key);
103  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
104  CHECK(catalog);
105  auto table = catalog->getForeignTable(tb_id);
106  CHECK(table);
107  return *table;
108 }
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::get_max_chunk_size ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getBufferSize().

+ Here is the caller graph for this function:

template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > foreign_storage::get_min_max_bounds ( )
inline

Definition at line 31 of file SharedMetadataValidator.h.

31  {
32  static_assert(std::is_signed<D>::value,
33  "'get_min_max_bounds' is only valid for signed types");
34  return {get_null_value<D>() + 1, std::numeric_limits<D>::max()};
35 }
std::optional<ParseBufferRequest> foreign_storage::get_next_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets the next metadata scan request object from the pending requests queue. A null optional is returned if there are no further requests to be processed.

Definition at line 539 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by populate_chunks(), and scan_metadata().

540  {
541  std::unique_lock<std::mutex> pending_requests_lock(
542  multi_threading_params.pending_requests_mutex);
543  multi_threading_params.pending_requests_condition.wait(
544  pending_requests_lock, [&multi_threading_params] {
545  return !multi_threading_params.pending_requests.empty() ||
546  !multi_threading_params.continue_processing;
547  });
548  if (multi_threading_params.pending_requests.empty()) {
549  return {};
550  }
551  auto request = std::move(multi_threading_params.pending_requests.front());
552  multi_threading_params.pending_requests.pop();
553  pending_requests_lock.unlock();
554  multi_threading_params.pending_requests_condition.notify_all();
555  return std::move(request);
556 }

+ Here is the caller graph for this function:

template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
V foreign_storage::get_null_value ( )
inline

Definition at line 21 of file SharedMetadataValidator.h.

21  {
22  return inline_int_null_value<V>();
23 }
size_t foreign_storage::get_num_threads ( const ForeignTable &  table)

Definition at line 17 of file AbstractFileStorageDataWrapper.cpp.

References foreign_storage::OptionsContainer::getOption(), import_export::num_import_threads(), and foreign_storage::AbstractFileStorageDataWrapper::THREADS_KEY.

Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

17  {
18  auto num_threads = 0;
19  if (auto opt = table.getOption(AbstractFileStorageDataWrapper::THREADS_KEY);
20  opt.has_value()) {
21  num_threads = std::stoi(opt.value());
22  }
23  return import_export::num_import_threads(num_threads);
24 }
size_t num_import_threads(const int32_t copy_params_threads)
Definition: thread_count.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< int, int > foreign_storage::get_parquet_table_size ( const ReaderPtr &  reader)

Definition at line 39 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), foreign_storage::LazyParquetChunkLoader::loadRowGroups(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

39  {
40  auto file_metadata = reader->parquet_reader()->metadata();
41  const auto num_row_groups = file_metadata->num_row_groups();
42  const auto num_columns = file_metadata->num_columns();
43  return std::make_pair(num_row_groups, num_columns);
44 }

+ Here is the caller graph for this function:

parquet::Type::type foreign_storage::get_physical_type ( ReaderPtr &  reader,
const int  logical_column_index 
)

Definition at line 52 of file ParquetShared.cpp.

Referenced by anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

52  {
53  return reader->parquet_reader()
54  ->metadata()
55  ->schema()
56  ->Column(logical_column_index)
57  ->physical_type();
58 }

+ Here is the caller graph for this function:

std::shared_ptr< ChunkMetadata > foreign_storage::get_placeholder_metadata ( const SQLTypeInfo type,
size_t  num_elements 
)

Definition at line 77 of file FsiChunkUtils.cpp.

References SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::InternalSystemDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::updateRolledOffChunks().

78  {
79  ForeignStorageBuffer empty_buffer;
80  // Use default encoder metadata as in parquet wrapper
81  empty_buffer.initEncoder(type);
82 
83  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(type);
84  chunk_metadata->numElements = num_elements;
85 
86  if (!type.is_varlen_indeed()) {
87  chunk_metadata->numBytes = type.get_size() * num_elements;
88  }
89  // min/max not set by default for arrays, so get from elem type encoder
90  if (type.is_array()) {
91  ForeignStorageBuffer scalar_buffer;
92  scalar_buffer.initEncoder(type.get_elem_type());
93  auto scalar_metadata = scalar_buffer.getEncoder()->getMetadata(type.get_elem_type());
94  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
95  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
96  }
97 
98  return chunk_metadata;
99 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231
bool is_varlen_indeed() const
Definition: sqltypes.h:635
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
bool is_array() const
Definition: sqltypes.h:583

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ParseBufferRequest foreign_storage::get_request_from_pool ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets a request from the metadata scan request pool.

Definition at line 918 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests().

919  {
920  std::unique_lock<std::mutex> request_pool_lock(
921  multi_threading_params.request_pool_mutex);
922  multi_threading_params.request_pool_condition.wait(
923  request_pool_lock,
924  [&multi_threading_params] { return !multi_threading_params.request_pool.empty(); });
925  auto request = std::move(multi_threading_params.request_pool.front());
926  multi_threading_params.request_pool.pop();
927  request_pool_lock.unlock();
928  CHECK(request.buffer);
929  return request;
930 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::unique_ptr< ColumnDescriptor > foreign_storage::get_sub_type_column_descriptor ( const ColumnDescriptor column)

Definition at line 76 of file ParquetShared.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, ColumnDescriptor::db_id, SQLTypeInfo::get_elem_type(), SQLTypeInfo::set_size(), and ColumnDescriptor::tableId.

Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping().

77  {
78  auto column_type = column->columnType.get_elem_type();
79  if (column_type.get_size() == -1 && column_type.is_dict_encoded_string()) {
80  column_type.set_size(4); // override default size of -1
81  }
82  return std::make_unique<ColumnDescriptor>(
83  column->tableId, column->columnId, column->columnName, column_type, column->db_id);
84 }
void set_size(int s)
Definition: sqltypes.h:476
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::get_table_name ( int32_t  db_id,
int32_t  table_id 
)

Definition at line 42 of file InternalSystemDataWrapper.cpp.

References CHECK, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

42  {
44  CHECK(catalog);
45  auto table_name = catalog->getTableName(table_id);
46  if (table_name.has_value()) {
47  return table_name.value();
48  } else {
49  // It is possible for the table to be concurrently deleted while querying the system
50  // table.
52  }
53 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
FileRegion &  file_region 
)

Definition at line 44 of file CsvShared.cpp.

References CHECK, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, json_utils::get_value_from_object(), foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

44  {
45  CHECK(json_val.IsObject());
47  json_val, file_region.first_row_file_offset, "first_row_file_offset");
49  json_val, file_region.first_row_index, "first_row_index");
50  json_utils::get_value_from_object(json_val, file_region.region_size, "region_size");
51  json_utils::get_value_from_object(json_val, file_region.row_count, "row_count");
52  if (json_val.HasMember("filename")) {
53  json_utils::get_value_from_object(json_val, file_region.filename, "filename");
54  }
55 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: JsonUtils.h:270
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
RowGroupInterval &  value 
)

Definition at line 669 of file ParquetDataWrapper.cpp.

References CHECK, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, json_utils::get_value_from_object(), and foreign_storage::RowGroupInterval::start_index.

669  {
670  CHECK(json_val.IsObject());
671  json_utils::get_value_from_object(json_val, value.file_path, "file_path");
672  json_utils::get_value_from_object(json_val, value.start_index, "start_index");
673  json_utils::get_value_from_object(json_val, value.end_index, "end_index");
674 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: JsonUtils.h:270
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::init_chunk_for_column ( const ChunkKey chunk_key,
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers,
Chunk_NS::Chunk chunk 
)

Definition at line 21 of file FsiChunkUtils.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Chunk_NS::Chunk::setPinnable(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMapForColumns().

25  {
26  auto catalog =
28  CHECK(catalog);
29 
30  ChunkKey data_chunk_key = chunk_key;
31  AbstractBuffer* data_buffer = nullptr;
32  AbstractBuffer* index_buffer = nullptr;
33  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
34  chunk_key[CHUNK_KEY_COLUMN_IDX]);
35 
36  if (column->columnType.is_varlen_indeed()) {
37  data_chunk_key.push_back(1);
38  ChunkKey index_chunk_key = chunk_key;
39  index_chunk_key.push_back(2);
40 
41  CHECK(buffers.find(data_chunk_key) != buffers.end());
42  CHECK(buffers.find(index_chunk_key) != buffers.end());
43 
44  data_buffer = buffers.find(data_chunk_key)->second;
45  index_buffer = buffers.find(index_chunk_key)->second;
46  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
47  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
48 
49  size_t index_offset_size{0};
50  if (column->columnType.is_string() || column->columnType.is_geometry()) {
51  index_offset_size = sizeof(StringOffsetT);
52  } else if (column->columnType.is_array()) {
53  index_offset_size = sizeof(ArrayOffsetT);
54  } else {
55  UNREACHABLE();
56  }
57  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
58  index_buffer->reserve(index_offset_size *
59  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
60  }
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68  }
69 
70  chunk.setPinnable(false);
71  chunk.setColumnDesc(column);
72  chunk.setBuffer(data_buffer);
73  chunk.setIndexBuffer(index_buffer);
74  chunk.initEncoder();
75 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:36
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:152
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define UNREACHABLE()
Definition: Logger.h:338
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
int32_t StringOffsetT
Definition: sqltypes.h:1493
static SysCatalog & instance()
Definition: SysCatalog.h:343
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1494
void initEncoder()
Definition: Chunk.cpp:290
#define CHECK(condition)
Definition: Logger.h:291
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_append_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 118 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and foreign_storage::ForeignTable::isAppendMode().

Referenced by foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

118  {
119  return get_foreign_table_for_key(chunk_key).isAppendMode();
120 }
bool isAppendMode() const
Checks if the table is in append mode.
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_replicated_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 114 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and table_is_replicated().

Referenced by is_shardable_key().

114  {
116 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)
bool table_is_replicated(const TableDescriptor *td)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_shardable_key ( const ChunkKey key)

Definition at line 122 of file FsiChunkUtils.cpp.

References dist::is_aggregator(), dist::is_distributed(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

122  {
123  return (dist::is_distributed() && !dist::is_aggregator() &&
125 }
bool is_system_table_chunk_key(const ChunkKey &chunk_key)
bool is_replicated_table_chunk_key(const ChunkKey &chunk_key)
bool is_aggregator()
Definition: distributed.cpp:33
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_system_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 110 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and TableDescriptor::is_system_table.

Referenced by is_shardable_key().

110  {
111  return get_foreign_table_for_key(chunk_key).is_system_table;
112 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_table_enabled_on_node ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper().

+ Here is the caller graph for this function:

bool foreign_storage::is_valid_source_type ( const import_export::CopyParams copy_params)

Verify if source_type is valid.

Definition at line 122 of file ForeignDataWrapperFactory.cpp.

References import_export::kDelimitedFile, import_export::kParquetFile, import_export::kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and import_export::ForeignDataImporter::importGeneral().

+ Here is the caller graph for this function:

bool foreign_storage::key_does_not_shard_to_leaf ( const ChunkKey key)

Definition at line 135 of file FsiChunkUtils.cpp.

References fragment_maps_to_leaf(), and is_shardable_key().

Referenced by cache_blocks(), populate_string_dictionary(), and anonymous_namespace{RelAlgExecutor.cpp}::set_parallelism_hints().

135  {
136  return (is_shardable_key(key) && !fragment_maps_to_leaf(key));
137 }
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::num_rows_to_process ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  rows_remaining 
)

Number of rows to process given the current position defined by start_row_index, the max fragment size and the number of rows left to process.

Definition at line 493 of file AbstractTextFileDataWrapper.cpp.

Referenced by QueryExecutionContext::launchCpuCode(), and populate_chunks().

495  {
496  size_t start_position_in_fragment = start_row_index % max_fragment_size;
497  return std::min<size_t>(rows_remaining, max_fragment_size - start_position_in_fragment);
498 }

+ Here is the caller graph for this function:

UniqueReaderPtr foreign_storage::open_parquet_table ( const std::string &  file_path,
std::shared_ptr< arrow::fs::FileSystem > &  file_system 
)

Definition at line 26 of file ParquetShared.cpp.

Referenced by foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::FileReaderMap::insert(), and foreign_storage::LazyParquetChunkLoader::loadRowGroups().

27  {
28  UniqueReaderPtr reader;
29  auto file_result = file_system->OpenInputFile(file_path);
30  if (!file_result.ok()) {
31  throw std::runtime_error{"Unable to access " + file_system->type_name() + " file: " +
32  file_path + ". " + file_result.status().message()};
33  }
34  auto infile = file_result.ValueOrDie();
35  PARQUET_THROW_NOT_OK(OpenFile(infile, arrow::default_memory_pool(), &reader));
36  return reader;
37 }
std::unique_ptr< parquet::arrow::FileReader > UniqueReaderPtr
Definition: ParquetShared.h:32

+ Here is the caller graph for this function:

ParseFileRegionResult foreign_storage::parse_file_regions ( const FileRegions &  file_regions,
const size_t  start_index,
const size_t  end_index,
FileReader &  file_reader,
ParseBufferRequest &  parse_file_request,
const std::map< int, Chunk_NS::Chunk > &  column_id_to_chunk_map,
const TextFileBufferParser &  parser 
)

Parses a set of file regions given a handle to the file and range of indexes for the file regions to be parsed.

Definition at line 215 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, foreign_storage::ParseBufferRequest::buffer_size, CHECK, CHECK_EQ, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, DEBUG_TIMER, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseFileRegionResult::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getTableName(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::FileReader::readRegion(), foreign_storage::ParseBufferResult::rejected_rows, run_benchmark_import::result, foreign_storage::ParseBufferResult::row_count, and throw_unexpected_number_of_items().

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunks().

222  {
223  auto timer = DEBUG_TIMER(__func__);
224  ParseFileRegionResult load_file_region_result{};
225  load_file_region_result.file_offset = file_regions[start_index].first_row_file_offset;
226  load_file_region_result.row_count = 0;
227 
228  ParseBufferResult result;
229  for (size_t i = start_index; i <= end_index; i++) {
230  CHECK(file_regions[i].region_size <= parse_file_request.buffer_size);
231  auto read_size = file_reader.readRegion(parse_file_request.buffer.get(),
232  file_regions[i].first_row_file_offset,
233  file_regions[i].region_size);
234  if (file_regions[i].region_size != read_size) {
235  throw_unexpected_number_of_items(file_regions[i].region_size,
236  read_size,
237  "bytes",
238  parse_file_request.getTableName());
239  }
240  parse_file_request.begin_pos = 0;
241  parse_file_request.end_pos = file_regions[i].region_size;
242  parse_file_request.first_row_index = file_regions[i].first_row_index;
243  parse_file_request.file_offset = file_regions[i].first_row_file_offset;
244  parse_file_request.process_row_count = file_regions[i].row_count;
245 
246  result = parser.parseBuffer(parse_file_request, (i == end_index));
247  CHECK_EQ(file_regions[i].row_count, result.row_count);
248  for (const auto& rejected_row_index : result.rejected_rows) {
249  load_file_region_result.rejected_row_indices.insert(
250  load_file_region_result.row_count + rejected_row_index);
251  }
252  load_file_region_result.row_count += result.row_count;
253  }
254  load_file_region_result.column_id_to_data_blocks_map =
255  result.column_id_to_data_blocks_map;
256  return load_file_region_result;
257 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void throw_unexpected_number_of_items(const size_t num_expected, const size_t num_loaded, const std::string &item_type, const std::string &foreign_table_name)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<size_t> foreign_storage::partition_by_fragment ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  buffer_row_count 
)

Given a start row index, maximum fragment size, and number of rows in a buffer, this function returns a vector indicating how the rows in the buffer should be partitioned in order to fill up available fragment slots while staying within the capacity of fragments.

Definition at line 506 of file AbstractTextFileDataWrapper.cpp.

References CHECK.

Referenced by scan_metadata().

508  {
509  CHECK(buffer_row_count > 0);
510  std::vector<size_t> partitions{};
511  size_t remaining_rows_in_last_fragment;
512  if (start_row_index % max_fragment_size == 0) {
513  remaining_rows_in_last_fragment = 0;
514  } else {
515  remaining_rows_in_last_fragment =
516  max_fragment_size - (start_row_index % max_fragment_size);
517  }
518  if (buffer_row_count <= remaining_rows_in_last_fragment) {
519  partitions.emplace_back(buffer_row_count);
520  } else {
521  if (remaining_rows_in_last_fragment > 0) {
522  partitions.emplace_back(remaining_rows_in_last_fragment);
523  }
524  size_t remaining_buffer_row_count =
525  buffer_row_count - remaining_rows_in_last_fragment;
526  while (remaining_buffer_row_count > 0) {
527  partitions.emplace_back(
528  std::min<size_t>(remaining_buffer_row_count, max_fragment_size));
529  remaining_buffer_row_count -= partitions.back();
530  }
531  }
532  return partitions;
533 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::pair<std::map<int, DataBlockPtr>, std::map<int, DataBlockPtr> > foreign_storage::partition_data_blocks ( const std::map< int, const ColumnDescriptor * > &  column_by_id,
const std::map< int, DataBlockPtr > &  data_blocks 
)

Partition data blocks such that dictionary encoded columns are disjoint of other columns

Returns
a pair of DataBlockPtr maps with the first being columns that are string dictionary encoded and the second being the complement

Definition at line 679 of file AbstractTextFileDataWrapper.cpp.

References shared::get_from_map().

Referenced by populate_chunks_using_data_blocks().

681  {
682  std::map<int, DataBlockPtr> dict_encoded_data_blocks;
683  std::map<int, DataBlockPtr> none_dict_encoded_data_blocks;
684  for (auto& [column_id, data_block] : data_blocks) {
685  const auto column = shared::get_from_map(column_by_id, column_id);
686  if (column->columnType.is_dict_encoded_string()) {
687  dict_encoded_data_blocks[column_id] = data_block;
688  } else {
689  none_dict_encoded_data_blocks[column_id] = data_block;
690  }
691  }
692  return {dict_encoded_data_blocks, none_dict_encoded_data_blocks};
693 }
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::set< T > &  items,
size_t  max_threads 
)

Definition at line 41 of file FsiChunkUtils.h.

Referenced by create_futures_for_workers(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

41  {
42  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
43  std::list<std::set<T>> items_by_thread;
44  auto i = 0U;
45  for (auto item : items) {
46  if (i++ % items_per_thread == 0) {
47  items_by_thread.emplace_back(std::set<T>{});
48  }
49  items_by_thread.back().emplace(item);
50  }
51  return items_by_thread;
52 }

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::vector< T > &  items,
size_t  max_threads 
)

Definition at line 60 of file FsiChunkUtils.h.

60  {
61  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
62  std::list<std::vector<T>> items_by_thread;
63  auto i = 0U;
64  for (auto item : items) {
65  if (i++ % items_per_thread == 0) {
66  items_by_thread.emplace_back(std::vector<T>{});
67  }
68  items_by_thread.back().emplace_back(item);
69  }
70  return items_by_thread;
71 }
void foreign_storage::populate_chunks ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser,
foreign_storage::IterativeFileScanParameters file_scan_param 
)

Consumes and processes scan requests from a pending requests queue and populates chunks during an iterative file scan

Definition at line 988 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer_row_count, CHECK_LE, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, defer_scan_request(), foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, get_next_scan_request(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getMaxFragRows(), foreign_storage::ParseBufferRequest::import_buffers, num_rows_to_process(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, populate_chunks_using_data_blocks(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::ParseBufferRequest::processed_row_count, run_benchmark_import::result, and update_delete_buffer().

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

991  {
992  std::map<int, const ColumnDescriptor*> column_by_id{};
993  while (true) {
994  auto request_opt = get_next_scan_request(multi_threading_params);
995  if (!request_opt.has_value()) {
996  break;
997  }
998  ParseBufferRequest& request = request_opt.value();
999  try {
1000  if (column_by_id.empty()) {
1001  for (const auto column : request.getColumns()) {
1002  column_by_id[column->columnId] = column;
1003  }
1004  }
1005  CHECK_LE(request.processed_row_count, request.buffer_row_count);
1006  for (size_t num_rows_left_to_process =
1007  request.buffer_row_count - request.processed_row_count;
1008  num_rows_left_to_process > 0;
1009  num_rows_left_to_process =
1010  request.buffer_row_count - request.processed_row_count) {
1011  // NOTE: `request.begin_pos` state is required to be set correctly by this point
1012  // in execution
1013  size_t row_index = request.first_row_index + request.processed_row_count;
1014  int fragment_id = row_index / request.getMaxFragRows();
1015  if (fragment_id >
1016  file_scan_param.fragment_id) { // processing must continue next iteration
1017  defer_scan_request(multi_threading_params, request);
1018  return;
1019  }
1020  request.process_row_count = num_rows_to_process(
1021  row_index, request.getMaxFragRows(), num_rows_left_to_process);
1022  for (const auto& import_buffer : request.import_buffers) {
1023  if (import_buffer != nullptr) {
1024  import_buffer->clear();
1025  }
1026  }
1027  auto result = parser.parseBuffer(request, true, true, true);
1028  size_t start_position_in_fragment = row_index % request.getMaxFragRows();
1029  populate_chunks_using_data_blocks(multi_threading_params,
1030  fragment_id,
1031  request,
1032  result,
1033  column_by_id,
1034  fragment_id_to_file_regions_map,
1035  file_scan_param,
1036  start_position_in_fragment);
1037 
1038  request.processed_row_count += result.row_count;
1039  request.begin_pos = result.row_offsets.back() - request.file_offset;
1040 
1042  request, result, file_scan_param, start_position_in_fragment);
1043  }
1044 
1045  } catch (...) {
1046  // Re-add request to pool so we dont block any other threads
1047  {
1048  std::lock_guard<std::mutex> pending_requests_lock(
1049  multi_threading_params.pending_requests_mutex);
1050  multi_threading_params.continue_processing = false;
1051  }
1052  add_request_to_pool(multi_threading_params, request);
1053  throw;
1054  }
1055  add_request_to_pool(multi_threading_params, request);
1056  }
1057 }
void populate_chunks_using_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t expected_current_element_count)
size_t num_rows_to_process(const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void update_delete_buffer(const ParseBufferRequest &request, const ParseBufferResult &result, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t start_position_in_fragment)
void defer_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
#define CHECK_LE(x, y)
Definition: Logger.h:304
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_chunks_using_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const foreign_storage::IterativeFileScanParameters file_scan_param,
const size_t  expected_current_element_count 
)

Definition at line 717 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), append_data_block_to_chunk(), threading_serial::async(), CHECK_EQ, CHECK_GT, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::import_buffers, kENCODING_DICT, partition_data_blocks(), and foreign_storage::ParseBufferResult::row_count.

Referenced by populate_chunks().

725  {
726  std::unique_lock<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
727  // File regions should be added in same order as appendData
728  add_file_region(fragment_id_to_file_regions_map,
729  fragment_id,
730  request.first_row_index,
731  result,
732  request.getFilePath());
733  CHECK_EQ(fragment_id, file_scan_param.fragment_id);
734 
735  // start string encoding asynchronously
736  std::vector<std::pair<const size_t, std::future<int8_t*>>>
737  encoded_data_block_ptrs_futures;
738 
739  for (const auto& import_buffer : request.import_buffers) {
740  if (import_buffer == nullptr) {
741  continue;
742  }
743 
744  if (import_buffer->getTypeInfo().is_dict_encoded_string()) {
745  auto string_payload_ptr = import_buffer->getStringBuffer();
746  CHECK_EQ(kENCODING_DICT, import_buffer->getTypeInfo().get_compression());
747 
748  auto column_id = import_buffer->getColumnDesc()->columnId;
749  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
750  column_id, std::async(std::launch::async, [&import_buffer, string_payload_ptr] {
751  import_buffer->addDictEncodedString(*string_payload_ptr);
752  return import_buffer->getStringDictBuffer();
753  })));
754  }
755  }
756 
757  auto process_subset_of_data_blocks =
758  [&](const std::map<int, DataBlockPtr>& data_blocks) {
759  for (auto& [column_id, data_block] : data_blocks) {
760  const auto column = column_by_id[column_id];
761  lock.unlock(); // unlock the fragment based lock in order to achieve better
762  // performance
763  append_data_block_to_chunk(file_scan_param,
764  data_block,
765  result.row_count,
766  column_id,
767  column,
768  expected_current_element_count);
769  lock.lock();
770  }
771  };
772 
773  auto [dict_encoded_data_blocks, none_dict_encoded_data_blocks] =
774  partition_data_blocks(column_by_id, result.column_id_to_data_blocks_map);
775 
776  process_subset_of_data_blocks(
777  none_dict_encoded_data_blocks); // skip dict string columns
778 
779  // wait for the async requests we made for string dictionary
780  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
781  encoded_ptr_future.second.wait();
782  }
783  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
784  CHECK_GT(dict_encoded_data_blocks.count(encoded_ptr_future.first), 0UL);
785  dict_encoded_data_blocks[encoded_ptr_future.first].numbersPtr =
786  encoded_ptr_future.second.get();
787  }
788 
789  process_subset_of_data_blocks(
790  dict_encoded_data_blocks); // process only dict string columns
791 }
void append_data_block_to_chunk(const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const int column_id, const ColumnDescriptor *column, const size_t element_count_required)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::pair< std::map< int, DataBlockPtr >, std::map< int, DataBlockPtr > > partition_data_blocks(const std::map< int, const ColumnDescriptor * > &column_by_id, const std::map< int, DataBlockPtr > &data_blocks)
#define CHECK_GT(x, y)
Definition: Logger.h:305
future< Result > async(Fn &&fn, Args &&...args)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_string_dictionary ( int32_t  table_id,
int32_t  col_id,
int32_t  db_id 
)

Definition at line 233 of file Execute.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::getChunk(), Catalog_Namespace::SysCatalog::instance(), anonymous_namespace{Execute.cpp}::is_empty_table(), and key_does_not_shard_to_leaf().

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::prepare_string_dictionaries().

233  {
234  const auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
235  CHECK(catalog);
236  if (const auto foreign_table = dynamic_cast<const ForeignTable*>(
237  catalog->getMetadataForTable(table_id, false))) {
238  const auto col_desc = catalog->getMetadataForColumn(table_id, col_id);
239  if (col_desc->columnType.is_dict_encoded_type()) {
240  auto& fragmenter = foreign_table->fragmenter;
241  CHECK(fragmenter != nullptr);
242  if (is_empty_table(fragmenter.get())) {
243  return;
244  }
245  for (const auto& frag : fragmenter->getFragmentsForQuery().fragments) {
246  ChunkKey chunk_key = {db_id, table_id, col_id, frag.fragmentId};
247  // If the key is sharded across leaves, only populate fragments that are sharded
248  // to this leaf.
249  if (key_does_not_shard_to_leaf(chunk_key)) {
250  continue;
251  }
252 
253  const ChunkMetadataMap& metadata_map = frag.getChunkMetadataMap();
254  CHECK(metadata_map.find(col_id) != metadata_map.end());
255  if (auto& meta = metadata_map.at(col_id); meta->isPlaceholder()) {
256  // When this goes out of scope it will stay in CPU cache but become
257  // evictable
258  auto chunk = Chunk_NS::Chunk::getChunk(col_desc,
259  &(catalog->getDataMgr()),
260  chunk_key,
262  0,
263  0,
264  0);
265  }
266  }
267  }
268  }
269 }
std::vector< int > ChunkKey
Definition: types.h:36
bool is_empty_table(Fragmenter_Namespace::AbstractFragmenter *fragmenter)
Definition: Execute.cpp:223
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::process_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map 
)

Updates metadata encapsulated in encoders for all table columns given new data blocks gotten from parsing a new set of rows in a file buffer. If cache is available, also append the data_blocks to chunks in the cache

Definition at line 798 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), cache_blocks(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::row_count, and update_stats().

Referenced by scan_metadata().

803  {
804  std::lock_guard<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
805  // File regions should be added in same order as appendData
806  add_file_region(fragment_id_to_file_regions_map,
807  fragment_id,
808  request.first_row_index,
809  result,
810  request.getFilePath());
811 
812  for (auto& [column_id, data_block] : result.column_id_to_data_blocks_map) {
813  ChunkKey chunk_key{request.db_id, request.getTableId(), column_id, fragment_id};
814  const auto column = column_by_id[column_id];
815  if (column->columnType.is_varlen_indeed()) {
816  chunk_key.emplace_back(1);
817  }
818  if (multi_threading_params.chunk_encoder_buffers.find(chunk_key) ==
819  multi_threading_params.chunk_encoder_buffers.end()) {
820  multi_threading_params.chunk_encoder_buffers[chunk_key] =
821  std::make_unique<ForeignStorageBuffer>();
822  multi_threading_params.chunk_encoder_buffers[chunk_key]->initEncoder(
823  column->columnType);
824  }
825  update_stats(multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder(),
826  column->columnType,
827  data_block,
828  result.row_count);
829  size_t num_elements = multi_threading_params.chunk_encoder_buffers[chunk_key]
830  ->getEncoder()
831  ->getNumElems() +
832  result.row_count;
833  multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder()->setNumElems(
834  num_elements);
835  cache_blocks(
836  multi_threading_params.cached_chunks,
837  data_block,
838  result.row_count,
839  chunk_key,
840  column,
841  (num_elements - result.row_count) == 0, // Is the first block added to this chunk
842  multi_threading_params.disable_cache);
843  }
844 }
std::vector< int > ChunkKey
Definition: types.h:36
void update_stats(Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
void cache_blocks(std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table ( Catalog_Namespace::Catalog catalog,
const std::string &  table_name,
const bool  evict_cached_entries 
)

Definition at line 103 of file ForeignTableRefresh.cpp.

References CHECK, shared::contains(), StorageType::FOREIGN_TABLE, dist::is_leaf_node(), Catalog_Namespace::kAggregatorOnlySystemTables, and refresh_foreign_table_unlocked().

Referenced by RefreshForeignTablesCommand::execute(), and foreign_storage::ForeignTableRefreshScheduler::start().

105  {
106  if (dist::is_leaf_node() &&
108  // Skip aggregator only system tables on leaf nodes.
109  return;
110  }
111  auto table_lock =
112  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
114  catalog, table_name, false));
115 
116  const TableDescriptor* td = (*table_lock)();
117  if (td->storageType != StorageType::FOREIGN_TABLE) {
118  throw std::runtime_error{
119  table_name +
120  " is not a foreign table. Refreshes are applicable to only foreign tables."};
121  }
122 
123  auto foreign_table = dynamic_cast<const ForeignTable*>(td);
124  CHECK(foreign_table);
125  refresh_foreign_table_unlocked(catalog, *foreign_table, evict_cached_entries);
126 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
bool is_leaf_node()
Definition: distributed.cpp:29
#define CHECK(condition)
Definition: Logger.h:291
static const std::array< std::string, 4 > kAggregatorOnlySystemTables
Definition: Catalog.h:131
void refresh_foreign_table_unlocked(Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
static constexpr char const * FOREIGN_TABLE

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table_unlocked ( Catalog_Namespace::Catalog catalog,
const ForeignTable &  td,
const bool  evict_cached_entries 
)

Definition at line 35 of file ForeignTableRefresh.cpp.

References CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, foreign_storage::anonymous_namespace{ForeignTableRefresh.cpp}::clear_cpu_and_gpu_cache(), Executor::clearExternalCaches(), Catalog_Namespace::DBMetadata::dbId, Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), PostEvictionRefreshException::getOriginalException(), logger::INFO, foreign_storage::ForeignTable::isAppendMode(), LOG, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, TableDescriptor::tableName, and Catalog_Namespace::Catalog::updateForeignTableRefreshTimes().

Referenced by refresh_foreign_table().

37  {
38  LOG(INFO) << "Starting refresh for table: " << td.tableName;
39  auto& data_mgr = catalog.getDataMgr();
40  ChunkKey table_key{catalog.getCurrentDB().dbId, td.tableId};
41  Executor::clearExternalCaches(true, &td, catalog.getDatabaseId());
42  catalog.removeFragmenterForTable(td.tableId);
43 
44  auto fsm = data_mgr.getPersistentStorageMgr()->getForeignStorageMgr();
45  CHECK(fsm);
46  if (auto cfm = dynamic_cast<CachingForeignStorageMgr*>(fsm)) {
47  if (!cfm->hasStoredDataWrapper(table_key[CHUNK_KEY_DB_IDX],
48  table_key[CHUNK_KEY_TABLE_IDX])) {
49  // If there is no wrapper stored on disk, then we have not populated the metadata
50  // for this table and we are free to skip the refresh.
51  catalog.updateForeignTableRefreshTimes(td.tableId);
52  return;
53  }
54  }
55 
56  std::map<ChunkKey, std::shared_ptr<ChunkMetadata>> old_chunk_metadata_by_chunk_key;
57  if (td.isAppendMode() && !evict_cached_entries) {
58  ChunkMetadataVector metadata_vec;
59  data_mgr.getChunkMetadataVecForKeyPrefix(metadata_vec, table_key);
60  int last_fragment_id = 0;
61  for (const auto& [key, metadata] : metadata_vec) {
62  if (key[CHUNK_KEY_FRAGMENT_IDX] > last_fragment_id) {
63  last_fragment_id = key[CHUNK_KEY_FRAGMENT_IDX];
64  }
65  old_chunk_metadata_by_chunk_key[key] = metadata;
66  }
67  for (const auto& [key, metadata] : metadata_vec) {
68  if (key[CHUNK_KEY_FRAGMENT_IDX] == last_fragment_id) {
69  clear_cpu_and_gpu_cache(data_mgr, key);
70  }
71  }
72  } else {
73  clear_cpu_and_gpu_cache(data_mgr, table_key);
74  }
75 
76  try {
77  fsm->refreshTable(table_key, evict_cached_entries);
78  catalog.updateForeignTableRefreshTimes(td.tableId);
79  } catch (PostEvictionRefreshException& e) {
80  catalog.updateForeignTableRefreshTimes(td.tableId);
81  clear_cpu_and_gpu_cache(data_mgr, table_key);
82  throw e.getOriginalException();
83  } catch (...) {
84  clear_cpu_and_gpu_cache(data_mgr, table_key);
85  throw;
86  }
87 
88  // Delete cached rolled off/updated chunks.
89  if (!old_chunk_metadata_by_chunk_key.empty()) {
90  ChunkMetadataVector new_metadata_vec;
91  data_mgr.getChunkMetadataVecForKeyPrefix(new_metadata_vec, table_key);
92  for (const auto& [key, metadata] : new_metadata_vec) {
93  auto it = old_chunk_metadata_by_chunk_key.find(key);
94  if (it != old_chunk_metadata_by_chunk_key.end() &&
95  it->second->numElements != metadata->numElements) {
96  clear_cpu_and_gpu_cache(data_mgr, key);
97  }
98  }
99  }
100  LOG(INFO) << "Completed refresh for table: " << td.tableName;
101 }
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
#define LOG(tag)
Definition: Logger.h:285
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::runtime_error getOriginalException()
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:326
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4260
void clear_cpu_and_gpu_cache(Data_Namespace::DataMgr &data_mgr, const ChunkKey &key_prefix)
#define CHECK(condition)
Definition: Logger.h:291
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438
void updateForeignTableRefreshTimes(const int32_t table_id)
Definition: Catalog.cpp:5805

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::request_pool_non_empty ( MetadataScanMultiThreadingParams &  multi_threading_params)

Definition at line 932 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests().

932  {
933  std::unique_lock<std::mutex> request_pool_lock(
934  multi_threading_params.request_pool_mutex);
935  return !multi_threading_params.request_pool.empty();
936 }

+ Here is the caller graph for this function:

void foreign_storage::reset_multithreading_params ( foreign_storage::MetadataScanMultiThreadingParams multi_threading_params)

Definition at line 1073 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, and foreign_storage::MetadataScanMultiThreadingParams::request_pool.

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

1074  {
1075  multi_threading_params.request_pool = {};
1076  multi_threading_params.cached_chunks = {};
1077  multi_threading_params.pending_requests = {};
1078  multi_threading_params.deferred_requests = {};
1079  multi_threading_params.chunk_encoder_buffers.clear();
1080 }
std::map< ChunkKey, std::unique_ptr< ForeignStorageBuffer > > chunk_encoder_buffers

+ Here is the caller graph for this function:

void foreign_storage::resize_buffer_if_needed ( std::unique_ptr< char[]> &  buffer,
size_t &  buffer_size,
const size_t  alloc_size 
)

Optionally resizes the given buffer if the buffer size is less than the current buffer allocation size.

Definition at line 1063 of file AbstractTextFileDataWrapper.cpp.

References CHECK_LE.

Referenced by dispatch_scan_requests().

1065  {
1066  CHECK_LE(buffer_size, alloc_size);
1067  if (buffer_size < alloc_size) {
1068  buffer = std::make_unique<char[]>(alloc_size);
1069  buffer_size = alloc_size;
1070  }
1071 }
#define CHECK_LE(x, y)
Definition: Logger.h:304

+ Here is the caller graph for this function:

std::vector< Aws::S3::Model::Object > foreign_storage::s3_objects_filter_sort_files ( const std::vector< Aws::S3::Model::Object > &  file_paths,
const shared::FilePathOptions options 
)

Definition at line 22 of file S3FilePathUtil.cpp.

References shared::FilePathOptions::filter_regex, shared::PATHNAME_ORDER_TYPE, foreign_storage::anonymous_namespace{S3FilePathUtil.cpp}::s3_objects_regex_file_filter(), and shared::FilePathOptions::sort_by.

24  {
25  auto result_files =
26  options.filter_regex.has_value()
27  ? s3_objects_regex_file_filter(options.filter_regex.value(), file_paths)
28  : file_paths;
29  // initial lexicographical order ensures a determinisitc ordering for files not matching
30  // sort_regex
31  shared::FilePathOptions temp_options;
32  temp_options.sort_by = shared::PATHNAME_ORDER_TYPE;
33  auto initial_file_order = FileOrderS3(temp_options);
34  auto lexi_comp = initial_file_order.getFileComparator();
35  std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
36 
37  auto file_order = FileOrderS3(options);
38  auto comp = file_order.getFileComparator();
39  std::stable_sort(result_files.begin(), result_files.end(), comp);
40  return result_files;
41 }
std::optional< std::string > filter_regex
std::vector< Aws::S3::Model::Object > s3_objects_regex_file_filter(const std::string &pattern, const std::vector< Aws::S3::Model::Object > &objects_list)
const std::string PATHNAME_ORDER_TYPE
std::optional< std::string > sort_by

+ Here is the call graph for this function:

void foreign_storage::scan_metadata ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser 
)

Consumes and processes metadata scan requests from a pending requests queue and updates existing metadata objects based on newly scanned metadata.

Definition at line 863 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::MetadataScanMultiThreadingParams::continue_processing, get_next_scan_request(), foreign_storage::TextFileBufferParser::parseBuffer(), partition_by_fragment(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, process_data_blocks(), and run_benchmark_import::result.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

865  {
866  std::map<int, const ColumnDescriptor*> column_by_id{};
867  while (true) {
868  auto request_opt = get_next_scan_request(multi_threading_params);
869  if (!request_opt.has_value()) {
870  break;
871  }
872  auto& request = request_opt.value();
873  try {
874  if (column_by_id.empty()) {
875  for (const auto column : request.getColumns()) {
876  column_by_id[column->columnId] = column;
877  }
878  }
879  auto partitions = partition_by_fragment(
880  request.first_row_index, request.getMaxFragRows(), request.buffer_row_count);
881  request.begin_pos = 0;
882  size_t row_index = request.first_row_index;
883  for (const auto partition : partitions) {
884  request.process_row_count = partition;
885  for (const auto& import_buffer : request.import_buffers) {
886  if (import_buffer != nullptr) {
887  import_buffer->clear();
888  }
889  }
890  auto result = parser.parseBuffer(request, true);
891  int fragment_id = row_index / request.getMaxFragRows();
892  process_data_blocks(multi_threading_params,
893  fragment_id,
894  request,
895  result,
896  column_by_id,
897  fragment_id_to_file_regions_map);
898  row_index += result.row_count;
899  request.begin_pos = result.row_offsets.back() - request.file_offset;
900  }
901  } catch (...) {
902  // Re-add request to pool so we dont block any other threads
903  {
904  std::lock_guard<std::mutex> pending_requests_lock(
905  multi_threading_params.pending_requests_mutex);
906  multi_threading_params.continue_processing = false;
907  }
908  add_request_to_pool(multi_threading_params, request);
909  throw;
910  }
911  add_request_to_pool(multi_threading_params, request);
912  }
913 }
std::vector< size_t > partition_by_fragment(const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void process_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::set_comp ( const ChunkKey left,
const ChunkKey right 
)
void foreign_storage::set_node_name ( std::map< std::string, import_export::TypedImportBuffer * > &  import_buffers)

Definition at line 55 of file InternalSystemDataWrapper.cpp.

References g_distributed_leaf_idx, dist::is_leaf_node(), and to_string().

Referenced by foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_summary(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

56  {
57  if (import_buffers.find("node") != import_buffers.end()) {
58  if (dist::is_leaf_node()) {
59  std::string leaf_string{"Leaf " + to_string(g_distributed_leaf_idx)};
60  import_buffers["node"]->addDictStringWithTruncation(leaf_string);
61  } else {
62  import_buffers["node"]->addDictStringWithTruncation("Server");
63  }
64  }
65 }
std::string to_string(char const *&&v)
bool is_leaf_node()
Definition: distributed.cpp:29
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const FileRegion &  file_region,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 26 of file CsvShared.cpp.

References json_utils::add_value_to_object(), foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

28  {
29  json_val.SetObject();
31  json_val, file_region.first_row_file_offset, "first_row_file_offset", allocator);
33  json_val, file_region.first_row_index, "first_row_index", allocator);
35  json_val, file_region.region_size, "region_size", allocator);
37  json_val, file_region.row_count, "row_count", allocator);
38  if (file_region.filename.size()) {
40  json_val, file_region.filename, "filename", allocator);
41  }
42 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: JsonUtils.h:255

+ Here is the call graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const RowGroupInterval &  value,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 660 of file ParquetDataWrapper.cpp.

References json_utils::add_value_to_object(), foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, and foreign_storage::RowGroupInterval::start_index.

662  {
663  json_val.SetObject();
664  json_utils::add_value_to_object(json_val, value.file_path, "file_path", allocator);
665  json_utils::add_value_to_object(json_val, value.start_index, "start_index", allocator);
666  json_utils::add_value_to_object(json_val, value.end_index, "end_index", allocator);
667 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: JsonUtils.h:255

+ Here is the call graph for this function:

void foreign_storage::throw_file_access_error ( const std::string &  file_path,
const std::string &  message 
)
inline

Definition at line 89 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

90  {
91  std::string error_message{"Unable to access file \"" + file_path + "\". " + message};
92  throw ForeignStorageException{error_message};
93 }

+ Here is the caller graph for this function:

void foreign_storage::throw_file_not_found_error ( const std::string &  file_path)
inline

Definition at line 95 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

95  {
96  throw ForeignStorageException{"File or directory \"" + file_path +
97  "\" does not exist."};
98 }

+ Here is the caller graph for this function:

void foreign_storage::throw_number_of_columns_mismatch_error ( size_t  num_table_cols,
size_t  num_file_cols,
const std::string &  file_path 
)
inline

Definition at line 80 of file ForeignStorageException.h.

References to_string().

Referenced by foreign_storage::RegexFileBufferParser::regexMatchColumns(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_expected_column_count(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().

82  {
83  throw ForeignStorageException{"Mismatched number of logical columns: (expected " +
84  std::to_string(num_table_cols) + " columns, has " +
85  std::to_string(num_file_cols) + "): in file '" +
86  file_path + "'"};
87 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::throw_parquet_metadata_out_of_bounds_error ( const std::string &  min_value,
const std::string &  max_value,
const std::string &  encountered_value 
)
inline

Definition at line 46 of file ParquetMetadataValidator.h.

Referenced by foreign_storage::TimestampBoundsValidator< T >::validateValue(), foreign_storage::IntegralFixedLengthBoundsValidator< T >::validateValue(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue(), and foreign_storage::FloatPointValidator< T >::validateValue().

49  {
50  std::stringstream error_message;
51  error_message << "Parquet column contains values that are outside the range of the "
52  "HeavyDB column "
53  "type. Consider using a wider column type. Min allowed value: "
54  << min_value << ". Max allowed value: " << max_value
55  << ". Encountered value: " << encountered_value << ".";
56  throw std::runtime_error(error_message.str());
57 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_file_error ( const std::string &  file_path)
inline

Definition at line 73 of file ForeignStorageException.h.

Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

73  {
74  throw ForeignStorageException{
75  "Refresh of foreign table created with \"APPEND\" update type failed as "
76  "file \"" +
77  file_path + "\" was removed."};
78 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_file_error ( const std::string &  file_path)
inline

Definition at line 66 of file ForeignStorageException.h.

Referenced by foreign_storage::SingleTextFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

66  {
67  throw ForeignStorageException{
68  "Refresh of foreign table created with \"APPEND\" update type failed as file "
69  "reduced in size: \"" +
70  file_path + "\""};
71 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_result_set_error ( const std::string &  select_statement)
inline

Definition at line 58 of file ForeignStorageException.h.

58  {
59  throw ForeignStorageException{
60  "Refresh of foreign table created with \"APPEND\" update type failed as result set "
61  "of select statement "
62  "reduced in size: \"" +
63  select_statement + "\""};
64 }
void foreign_storage::throw_s3_compressed_extension ( const std::string &  file_path,
const std::string &  ext_type 
)
inline

Definition at line 107 of file ForeignStorageException.h.

108  {
109  throw ForeignStorageException{
110  "File \"" + file_path + "\" has extension type \"" + ext_type +
111  "\", compressed file formats are not supported by S3 Foreign Tables."};
112 }
void foreign_storage::throw_s3_compressed_mime_type ( const std::string &  file_path,
const std::string &  mime_type 
)
inline

Definition at line 100 of file ForeignStorageException.h.

101  {
102  throw ForeignStorageException{
103  "File \"" + file_path + "\" has mime type \"" + mime_type +
104  "\", compressed file formats are not supported by S3 Foreign Tables."};
105 }
void foreign_storage::throw_unexpected_number_of_items ( const size_t &  num_expected,
const size_t &  num_loaded,
const std::string &  item_type 
)
inline

Definition at line 40 of file ForeignStorageException.h.

References to_string().

Referenced by parse_file_regions(), and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_unexpected_number_of_items().

42  {
43  throw ForeignStorageException(
44  "Unexpected number of " + item_type +
45  " while loading from foreign data source: expected " +
46  std::to_string(num_expected) + " , obtained " + std::to_string(num_loaded) + " " +
47  item_type +
48  ". Please use the \"REFRESH FOREIGN TABLES\" command on the foreign table "
49  "if data source has been updated.");
50 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::update_delete_buffer ( const ParseBufferRequest &  request,
const ParseBufferResult &  result,
const foreign_storage::IterativeFileScanParameters file_scan_param,
const size_t  start_position_in_fragment 
)

Definition at line 695 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::IterativeFileScanParameters::delete_buffer, foreign_storage::IterativeFileScanParameters::delete_buffer_mutex, foreign_storage::ParseBufferRequest::processed_row_count, foreign_storage::ParseBufferResult::rejected_rows, and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer().

Referenced by populate_chunks().

699  {
700  if (file_scan_param.delete_buffer) {
701  std::unique_lock delete_buffer_lock(file_scan_param.delete_buffer_mutex);
702  auto& delete_buffer = file_scan_param.delete_buffer;
703  auto chunk_offset = start_position_in_fragment;
704  auto chunk_element_count = chunk_offset + request.processed_row_count;
705 
706  // ensure delete buffer is sized appropriately
707  resize_delete_buffer(delete_buffer, chunk_element_count);
708 
709  auto delete_buffer_data = delete_buffer->getMemoryPtr();
710  for (const auto rejected_row_index : result.rejected_rows) {
711  CHECK(rejected_row_index + chunk_offset < delete_buffer->size());
712  delete_buffer_data[rejected_row_index + chunk_offset] = true;
713  }
714  }
715 }
void resize_delete_buffer(AbstractBuffer *delete_buffer, const size_t chunk_element_count)
std::unique_lock< T > unique_lock
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::update_stats ( Encoder encoder,
const SQLTypeInfo column_type,
DataBlockPtr  data_block,
const size_t  row_count 
)

Updates the statistics metadata encapsulated in the encoder given new data in a data block.

Definition at line 580 of file AbstractTextFileDataWrapper.cpp.

References DataBlockPtr::arraysPtr, SQLTypeInfo::is_array(), SQLTypeInfo::is_varlen(), DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and Encoder::updateStats().

Referenced by process_data_blocks(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata(), and StorageIOFacility::yieldUpdateCallback().

583  {
584  if (column_type.is_array()) {
585  encoder->updateStats(data_block.arraysPtr, 0, row_count);
586  } else if (!column_type.is_varlen()) {
587  encoder->updateStats(data_block.numbersPtr, row_count);
588  } else {
589  encoder->updateStats(data_block.stringsPtr, 0, row_count);
590  }
591 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:234
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:235
bool is_varlen() const
Definition: sqltypes.h:629
int8_t * numbersPtr
Definition: sqltypes.h:233
virtual void updateStats(const int64_t val, const bool is_null)=0
bool is_array() const
Definition: sqltypes.h:583

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< parquet::Statistics > foreign_storage::validate_and_get_column_metadata_statistics ( const parquet::ColumnChunkMetaData *  column_metadata)

Definition at line 86 of file ParquetShared.cpp.

References CHECK, and report::stats.

Referenced by foreign_storage::ParquetEncoder::getRowGroupMetadata(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_list_column_metadata_statistics().

87  {
88  CHECK(column_metadata->is_stats_set());
89  std::shared_ptr<parquet::Statistics> stats = column_metadata->statistics();
90  return stats;
91 }
dictionary stats
Definition: report.py:116
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void foreign_storage::validate_equal_column_descriptor ( const parquet::ColumnDescriptor *  reference_descriptor,
const parquet::ColumnDescriptor *  new_descriptor,
const std::string &  reference_file_path,
const std::string &  new_file_path 
)

Definition at line 60 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

64  {
65  if (!reference_descriptor->Equals(*new_descriptor)) {
66  throw std::runtime_error{"Parquet file \"" + new_file_path +
67  "\" has a different schema. Please ensure that all Parquet "
68  "files use the same schema. Reference Parquet file: " +
69  reference_file_path +
70  ", column name: " + reference_descriptor->name() +
71  ". New Parquet file: " + new_file_path +
72  ", column name: " + new_descriptor->name() + "."};
73  }
74 }

+ Here is the caller graph for this function:

void foreign_storage::validate_non_foreign_table_write ( const TableDescriptor table_descriptor)
inline

Definition at line 22 of file FsiUtils.h.

References StorageType::FOREIGN_TABLE, and TableDescriptor::storageType.

Referenced by Parser::InsertStmt::analyze(), Parser::TruncateTableStmt::execute(), Parser::InsertValuesStmt::execute(), Parser::InsertIntoTableAsSelectStmt::populateData(), and RelModify::RelModify().

22  {
23  if (table_descriptor && table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
24  throw std::runtime_error{
25  "DELETE, INSERT, TRUNCATE, OR UPDATE commands are not supported for foreign "
26  "tables."};
27  }
28 }
std::string storageType
static constexpr char const * FOREIGN_TABLE

+ Here is the caller graph for this function:

void foreign_storage::validate_regex_parser_options ( const import_export::CopyParams copy_params)

Definition at line 116 of file ForeignDataWrapperFactory.cpp.

References import_export::CopyParams::line_regex.

Referenced by anonymous_namespace{ForeignDataImporter.cpp}::validate_copy_params().

116  {
117  if (copy_params.line_regex.empty()) {
118  throw std::runtime_error{"Regex parser options must contain a line regex."};
119  }
120 }

+ Here is the caller graph for this function:

Variable Documentation