OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage Namespace Reference

Namespaces

 anonymous_namespace{AbstractFileStorageDataWrapper.cpp}
 
 anonymous_namespace{AbstractTextFileDataWrapper.cpp}
 
 anonymous_namespace{CachingForeignStorageMgr.cpp}
 
 anonymous_namespace{CsvFileBufferParser.cpp}
 
 anonymous_namespace{FileReader.cpp}
 
 anonymous_namespace{ForeignDataWrapperFactory.cpp}
 
 anonymous_namespace{ForeignStorageCache.cpp}
 
 anonymous_namespace{ForeignTableRefresh.cpp}
 
 anonymous_namespace{InternalCatalogDataWrapper.cpp}
 
 anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalStorageStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalSystemDataWrapper.cpp}
 
 anonymous_namespace{LazyParquetChunkLoader.cpp}
 
 anonymous_namespace{LogFileBufferParser.cpp}
 
 anonymous_namespace{ParquetDataWrapper.cpp}
 
 anonymous_namespace{RefreshTimeCalculator.cpp}
 
 anonymous_namespace{RegexFileBufferParser.cpp}
 
 anonymous_namespace{RegexParserDataWrapper.cpp}
 
 anonymous_namespace{S3FilePathUtil.cpp}
 
 anonymous_namespace{TextFileBufferParser.cpp}
 
 Csv
 
 json_utils
 

Classes

struct  ForeignServer
 
struct  ForeignTable
 
struct  OptionsContainer
 
struct  UserMappingType
 
struct  UserMapping
 
class  RefreshTimeCalculator
 
class  AbstractFileStorageDataWrapper
 
struct  ParseFileRegionResult
 
struct  MetadataScanMultiThreadingParams
 
struct  IterativeFileScanParameters
 
class  AbstractTextFileDataWrapper
 
class  CachingForeignStorageMgr
 
class  CsvDataWrapper
 
class  CsvFileBufferParser
 
struct  DataPreview
 
class  FileReader
 
class  SingleFileReader
 
class  SingleTextFileReader
 
class  ArchiveWrapper
 
class  CompressedFileReader
 
class  MultiFileReader
 
class  LocalMultiFileReader
 
struct  FileRegion
 
class  ForeignDataWrapper
 
struct  DataWrapperType
 Encapsulates an enumeration of foreign data wrapper type strings. More...
 
class  ForeignDataWrapperFactory
 
class  ForeignStorageBuffer
 
class  ForeignStorageCache
 
class  ForeignStorageException
 
class  MetadataScanInfeasibleFragmentSizeException
 
class  RequestedFragmentIdOutOfBoundsException
 
class  ChunkSizeValidator
 
class  MockForeignDataWrapper
 
class  ForeignStorageMgr
 
class  ForeignTableSchema
 
class  GeospatialEncoder
 
class  InternalCatalogDataWrapper
 
class  InternalLogsDataWrapper
 
class  InternalMemoryStatsDataWrapper
 
struct  StorageDetails
 
class  InternalStorageStatsDataWrapper
 
class  InternalSystemDataWrapper
 
struct  ColumnType
 
struct  FragmentType
 
struct  Interval
 
struct  ParquetBatchData
 
class  ParquetRowGroupReader
 
struct  PreviewContext
 
class  LazyParquetChunkLoader
 
class  LogFileBufferParser
 
class  OdbcGeospatialEncoder
 
class  ParquetArrayDetectEncoder
 
class  ParquetArrayEncoder
 
class  ParquetArrayImportEncoder
 
class  ParquetDataWrapper
 
class  ParquetDateInDaysFromTimestampEncoder
 
class  ParquetDateInSecondsEncoder
 
class  ParquetDecimalEncoder
 
class  ParquetDetectStringEncoder
 
class  ParquetEncoder
 
class  ParquetImportEncoder
 
class  ParquetScalarEncoder
 
class  ParquetFixedLengthArrayEncoder
 
class  ParquetFixedLengthEncoder
 
class  ParquetUnsignedFixedLengthEncoder
 
class  ParquetGeospatialEncoder
 
class  ParquetGeospatialImportEncoder
 
class  RowGroupIntervalTracker
 
class  ParquetImportBatchResult
 
class  AbstractRowGroupIntervalTracker
 
class  ParquetImporter
 
class  ParquetInPlaceEncoder
 
class  TypedParquetInPlaceEncoder
 
class  ParquetMetadataValidator
 
class  TimestampBoundsValidator
 
class  IntegralFixedLengthBoundsValidator
 
class  BaseDateBoundsValidator
 
class  FloatPointValidator
 
struct  RowGroupInterval
 
struct  RowGroupMetadata
 
class  FileReaderMap
 
class  ParquetStringEncoder
 
class  ParquetStringImportEncoder
 
class  ParquetStringNoneEncoder
 
class  ParquetTimeEncoder
 
class  ParquetTimestampEncoder
 
class  ParquetVariableLengthArrayEncoder
 
class  PassThroughBuffer
 
class  RegexFileBufferParser
 
class  RegexParserDataWrapper
 
class  FileOrderS3
 
struct  ParseBufferRequest
 
struct  ParseBufferResult
 
class  TextFileBufferParser
 
class  TypedParquetDetectBuffer
 
class  TypedParquetStorageBuffer
 
class  ForeignTableRefreshScheduler
 

Typedefs

using OptionsMap = std::map< std::string, std::string, std::less<>>
 
using SampleRows = std::vector< std::vector< std::string >>
 
using FirstLineByFilePath = std::map< std::string, std::string >
 
using FileRegions = std::vector< FileRegion >
 
using ChunkToBufferMap = std::map< ChunkKey, AbstractBuffer * >
 
using RenderGroupAnalyzerMap = std::map< int, std::unique_ptr< import_export::RenderGroupAnalyzer >>
 
using read_lock = heavyai::shared_lock< heavyai::shared_mutex >
 
using write_lock = heavyai::unique_lock< heavyai::shared_mutex >
 
using FilePathAndRowGroup = std::pair< std::string, int32_t >
 
using RejectedRowIndices = std::set< int64_t >
 
using InvalidRowGroupIndices = std::set< int64_t >
 
template<typename T >
using DateInSecondsBoundsValidator = BaseDateBoundsValidator< T, true >
 
template<typename T >
using DateInDaysBoundsValidator = BaseDateBoundsValidator< T, false >
 
using UniqueReaderPtr = std::unique_ptr< parquet::arrow::FileReader >
 
using ReaderPtr = parquet::arrow::FileReader *
 
template<typename V , typename T , T conversion_denominator, typename NullType = V>
using ParquetDateInSecondsFromTimestampEncoder = ParquetTimestampEncoder< V, T, conversion_denominator, NullType >
 
using S3ObjectComparator = std::function< bool(const Aws::S3::Model::Object &, const Aws::S3::Model::Object &)>
 

Functions

size_t get_num_threads (const ForeignTable &table)
 
ParseFileRegionResult parse_file_regions (const FileRegions &file_regions, const size_t start_index, const size_t end_index, FileReader &file_reader, ParseBufferRequest &parse_file_request, const std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, const TextFileBufferParser &parser)
 
size_t num_rows_to_process (const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
 
std::vector< size_t > partition_by_fragment (const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
 
std::optional< ParseBufferRequestget_next_scan_request (MetadataScanMultiThreadingParams &multi_threading_params)
 
void add_file_region (std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
 
void update_stats (Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
 
void cache_blocks (std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)
 
void append_data_block_to_chunk (const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const int column_id, const ColumnDescriptor *column, const size_t element_count_required)
 
std::pair< std::map< int,
DataBlockPtr >, std::map< int,
DataBlockPtr > > 
partition_data_blocks (const std::map< int, const ColumnDescriptor * > &column_by_id, const std::map< int, DataBlockPtr > &data_blocks)
 
void update_delete_buffer (const ParseBufferRequest &request, const ParseBufferResult &result, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t start_position_in_fragment)
 
void populate_chunks_using_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t expected_current_element_count)
 
void process_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
 
void add_request_to_pool (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void scan_metadata (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser)
 
ParseBufferRequest get_request_from_pool (MetadataScanMultiThreadingParams &multi_threading_params)
 
bool request_pool_non_empty (MetadataScanMultiThreadingParams &multi_threading_params)
 
void defer_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void dispatch_all_deferred_requests (MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void populate_chunks (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser, foreign_storage::IterativeFileScanParameters &file_scan_param)
 
void resize_buffer_if_needed (std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
 
void reset_multithreading_params (foreign_storage::MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_requests (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)
 
void dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)
 
void dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call)
 
void set_value (rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, FileRegion &file_region)
 
std::optional< SQLTypesdetect_geo_type (const SampleRows &sample_rows, size_t column_index)
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
 Create proxy fsi objects for use outside FSI. More...
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const TableDescriptor *table)
 Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above. More...
 
void validate_regex_parser_options (const import_export::CopyParams &copy_params)
 
bool is_valid_source_type (const import_export::CopyParams &copy_params)
 
std::string bool_to_option_value (const bool value)
 
void throw_unexpected_number_of_items (const size_t &num_expected, const size_t &num_loaded, const std::string &item_type)
 
void throw_removed_row_in_result_set_error (const std::string &select_statement)
 
void throw_removed_row_in_file_error (const std::string &file_path)
 
void throw_removed_file_error (const std::string &file_path)
 
void throw_number_of_columns_mismatch_error (size_t num_table_cols, size_t num_file_cols, const std::string &file_path)
 
void throw_file_access_error (const std::string &file_path, const std::string &message)
 
void throw_file_not_found_error (const std::string &file_path)
 
void throw_s3_compressed_mime_type (const std::string &file_path, const std::string &mime_type)
 
void throw_s3_compressed_extension (const std::string &file_path, const std::string &ext_type)
 
bool is_append_table_chunk_key (const ChunkKey &chunk_key)
 
bool set_comp (const ChunkKey &left, const ChunkKey &right)
 
std::vector< ChunkKeyget_column_key_vec (const ChunkKey &destination_chunk_key)
 
std::set< ChunkKeyget_column_key_set (const ChunkKey &destination_chunk_key)
 
size_t get_max_chunk_size (const ChunkKey &key)
 
bool contains_fragment_key (const std::set< ChunkKey > &key_set, const ChunkKey &target_key)
 
bool is_table_enabled_on_node (const ChunkKey &key)
 
void refresh_foreign_table_unlocked (Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
 
void refresh_foreign_table (Catalog_Namespace::Catalog &catalog, const std::string &table_name, const bool evict_cached_entries)
 
void init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const SQLTypeInfo &type, size_t num_elements)
 
const
foreign_storage::ForeignTable
get_foreign_table_for_key (const ChunkKey &key)
 
bool is_system_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_replicated_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_shardable_key (const ChunkKey &key)
 
bool fragment_maps_to_leaf (const ChunkKey &key)
 
bool key_does_not_shard_to_leaf (const ChunkKey &key)
 
template<typename T >
auto partition_for_threads (const std::set< T > &items, size_t max_threads)
 
template<typename T >
auto partition_for_threads (const std::vector< T > &items, size_t max_threads)
 
template<typename Container >
std::vector< std::future< void > > create_futures_for_workers (const Container &items, size_t max_threads, std::function< void(const Container &)> lambda)
 
template<typename T >
ArrayDatum encode_as_array_datum (const std::vector< T > &data)
 
std::string get_db_name (int32_t db_id)
 
std::string get_table_name (int32_t db_id, int32_t table_id)
 
void set_node_name (std::map< std::string, import_export::TypedImportBuffer * > &import_buffers)
 
void set_value (rapidjson::Value &json_val, const RowGroupInterval &value, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, RowGroupInterval &value)
 
template<typename D , typename T >
bool check_bounds (const T &value)
 
template<typename D >
std::string datetime_to_string (const D &timestamp, const SQLTypeInfo &column_type)
 
void throw_parquet_metadata_out_of_bounds_error (const std::string &min_value, const std::string &max_value, const std::string &encountered_value)
 
UniqueReaderPtr open_parquet_table (const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
 
std::pair< int, int > get_parquet_table_size (const ReaderPtr &reader)
 
const parquet::ColumnDescriptor * get_column_descriptor (const parquet::arrow::FileReader *reader, const int logical_column_index)
 
parquet::Type::type get_physical_type (ReaderPtr &reader, const int logical_column_index)
 
void validate_equal_column_descriptor (const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
 
std::unique_ptr< ColumnDescriptorget_sub_type_column_descriptor (const ColumnDescriptor *column)
 
std::shared_ptr
< parquet::Statistics > 
validate_and_get_column_metadata_statistics (const parquet::ColumnChunkMetaData *column_metadata)
 
std::vector
< Aws::S3::Model::Object > 
s3_objects_filter_sort_files (const std::vector< Aws::S3::Model::Object > &file_paths, const shared::FilePathOptions &options)
 
template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
get_null_value ()
 
template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > get_min_max_bounds ()
 
void populate_string_dictionary (int32_t table_id, int32_t col_id, int32_t db_id)
 
void validate_non_foreign_table_write (const TableDescriptor *table_descriptor)
 

Variables

constexpr const char * kDeletedValueIndicator {"<DELETED>"}
 

Typedef Documentation

Definition at line 34 of file ForeignDataWrapper.h.

Definition at line 277 of file ParquetMetadataValidator.h.

Definition at line 274 of file ParquetMetadataValidator.h.

using foreign_storage::FilePathAndRowGroup = typedef std::pair<std::string, int32_t>

Definition at line 40 of file ParquetDataWrapper.h.

using foreign_storage::FileRegions = typedef std::vector<FileRegion>

Definition at line 60 of file FileRegions.h.

using foreign_storage::FirstLineByFilePath = typedef std::map<std::string, std::string>

Definition at line 37 of file FileReader.h.

using foreign_storage::InvalidRowGroupIndices = typedef std::set<int64_t>

Definition at line 123 of file ParquetEncoder.h.

using foreign_storage::OptionsMap = typedef std::map<std::string, std::string, std::less<>>

Definition at line 30 of file OptionsContainer.h.

template<typename V , typename T , T conversion_denominator, typename NullType = V>
using foreign_storage::ParquetDateInSecondsFromTimestampEncoder = typedef ParquetTimestampEncoder<V, T, conversion_denominator, NullType>

Definition at line 89 of file ParquetTimestampEncoder.h.

using foreign_storage::ReaderPtr = typedef parquet::arrow::FileReader*

Definition at line 33 of file ParquetShared.h.

using foreign_storage::RejectedRowIndices = typedef std::set<int64_t>

Definition at line 28 of file ParquetEncoder.h.

using foreign_storage::RenderGroupAnalyzerMap = typedef std::map<int, std::unique_ptr<import_export::RenderGroupAnalyzer>>

Definition at line 37 of file ForeignDataWrapper.h.

using foreign_storage::S3ObjectComparator = typedef std::function<bool(const Aws::S3::Model::Object&, const Aws::S3::Model::Object&)>

Definition at line 17 of file S3FilePathUtil.h.

using foreign_storage::SampleRows = typedef std::vector<std::vector<std::string>>

Definition at line 26 of file DataPreview.h.

using foreign_storage::UniqueReaderPtr = typedef std::unique_ptr<parquet::arrow::FileReader>

Definition at line 32 of file ParquetShared.h.

Function Documentation

void foreign_storage::add_file_region ( std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
int  fragment_id,
size_t  first_row_index,
const ParseBufferResult &  result,
const std::string &  file_path 
)

Creates a new file region based on metadata from parsed file buffers and adds the new region to the fragment id to file regions map.

Definition at line 552 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferResult::row_count, and foreign_storage::ParseBufferResult::row_offsets.

Referenced by populate_chunks_using_data_blocks(), and process_data_blocks().

556  {
557  fragment_id_to_file_regions_map[fragment_id].emplace_back(
558  // file naming is handled by FileReader
559  FileRegion(file_path,
560  result.row_offsets.front(),
561  first_row_index,
562  result.row_count,
563  result.row_offsets.back() - result.row_offsets.front()));
564 }

+ Here is the caller graph for this function:

void foreign_storage::add_request_to_pool ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Adds the request object for a processed request back to the request pool for reuse in subsequent requests.

Definition at line 840 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests(), populate_chunks(), and scan_metadata().

841  {
842  std::unique_lock<std::mutex> completed_requests_queue_lock(
843  multi_threading_params.request_pool_mutex);
844  multi_threading_params.request_pool.emplace(std::move(request));
845  completed_requests_queue_lock.unlock();
846  multi_threading_params.request_pool_condition.notify_all();
847 }

+ Here is the caller graph for this function:

void foreign_storage::append_data_block_to_chunk ( const foreign_storage::IterativeFileScanParameters file_scan_param,
DataBlockPtr  data_block,
size_t  row_count,
const int  column_id,
const ColumnDescriptor column,
const size_t  element_count_required 
)

Definition at line 639 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::IterativeFileScanParameters::column_id_to_chunk_map, shared::get_from_map(), foreign_storage::IterativeFileScanParameters::getChunkConditionalVariable(), and foreign_storage::IterativeFileScanParameters::getChunkMutex().

Referenced by populate_chunks_using_data_blocks().

645  {
646  auto chunk = shared::get_from_map(file_scan_param.column_id_to_chunk_map, column_id);
647 
648  auto& conditional_variable = file_scan_param.getChunkConditionalVariable(column_id);
649  {
650  std::unique_lock<std::mutex> chunk_lock(file_scan_param.getChunkMutex(column_id));
651  conditional_variable.wait(chunk_lock, [element_count_required, &chunk]() {
652  return chunk.getBuffer()->getEncoder()->getNumElems() == element_count_required;
653  });
654 
655  chunk.appendData(data_block, row_count, 0);
656  }
657 
658  conditional_variable
659  .notify_all(); // notify any threads waiting on the correct element count
660 }
std::condition_variable & getChunkConditionalVariable(const int col_id) const
std::map< int, Chunk_NS::Chunk > & column_id_to_chunk_map
std::mutex & getChunkMutex(const int col_id) const
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::bool_to_option_value ( const bool  value)

Definition at line 129 of file ForeignDataWrapperFactory.cpp.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy().

129  {
130  return value ? "TRUE" : "FALSE";
131 }

+ Here is the caller graph for this function:

void foreign_storage::cache_blocks ( std::map< ChunkKey, Chunk_NS::Chunk > &  cached_chunks,
DataBlockPtr  data_block,
size_t  row_count,
ChunkKey chunk_key,
const ColumnDescriptor column,
bool  is_first_block,
bool  disable_cache 
)

Definition at line 598 of file AbstractTextFileDataWrapper.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_cache_if_enabled(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), SQLTypeInfo::is_varlen_indeed(), and key_does_not_shard_to_leaf().

Referenced by process_data_blocks().

604  {
605  auto catalog =
607  CHECK(catalog);
608  auto cache = get_cache_if_enabled(catalog, disable_cache);
609  if (cache) {
610  // This extra filter needs to be here because this wrapper is the only one that
611  // accesses the cache directly and it should not be inserting chunks which are not
612  // mapped to the current leaf (in distributed mode).
613  if (key_does_not_shard_to_leaf(chunk_key)) {
614  return;
615  }
616 
617  ChunkKey index_key = {chunk_key[CHUNK_KEY_DB_IDX],
618  chunk_key[CHUNK_KEY_TABLE_IDX],
619  chunk_key[CHUNK_KEY_COLUMN_IDX],
620  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
621  2};
622  // Create actual data chunks to prepopulate cache
623  if (cached_chunks.find(chunk_key) == cached_chunks.end()) {
624  cached_chunks[chunk_key] = Chunk_NS::Chunk{column, false};
625  cached_chunks[chunk_key].setBuffer(
626  cache->getChunkBufferForPrecaching(chunk_key, is_first_block));
627  if (column->columnType.is_varlen_indeed()) {
628  cached_chunks[chunk_key].setIndexBuffer(
629  cache->getChunkBufferForPrecaching(index_key, is_first_block));
630  }
631  if (is_first_block) {
632  cached_chunks[chunk_key].initEncoder();
633  }
634  }
635  cached_chunks[chunk_key].appendData(data_block, row_count, 0);
636  }
637 }
std::vector< int > ChunkKey
Definition: types.h:36
foreign_storage::ForeignStorageCache * get_cache_if_enabled(std::shared_ptr< Catalog_Namespace::Catalog > &catalog, const bool disable_cache)
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:343
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:626
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename D , typename T >
bool foreign_storage::check_bounds ( const T &  value)
inline

Definition at line 32 of file ParquetMetadataValidator.h.

32  {
33  auto [min_value, max_value] = get_min_max_bounds<D>();
34  return value >= min_value && value <= max_value;
35 }
bool foreign_storage::contains_fragment_key ( const std::set< ChunkKey > &  key_set,
const ChunkKey target_key 
)
template<typename Container >
std::vector<std::future<void> > foreign_storage::create_futures_for_workers ( const Container &  items,
size_t  max_threads,
std::function< void(const Container &)>  lambda 
)

Definition at line 74 of file FsiChunkUtils.h.

References threading_serial::async(), and partition_for_threads().

Referenced by import_export::ForeignDataImporter::importGeneralS3(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

77  {
78  auto items_per_thread = partition_for_threads(items, max_threads);
79  std::vector<std::future<void>> futures;
80  for (const auto& items : items_per_thread) {
81  futures.emplace_back(std::async(std::launch::async, lambda, items));
82  }
83 
84  return futures;
85 }
auto partition_for_threads(const std::set< T > &items, size_t max_threads)
Definition: FsiChunkUtils.h:41
future< Result > async(Fn &&fn, Args &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const int  db_id,
const TableDescriptor table,
const int32_t  user_id 
)

Create proxy fsi objects for use outside FSI.

Parameters
copy_from_source- the source that will be copied
copy_params- CopyParams that specify parameters around use case
db_id- db id of database in use case
table- the table descriptor of the table in use case
user_id- the user id of user in use case
Returns
tuple of FSI objects that can be used for particular use case (for example import or detect)

Definition at line 59 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and foreign_storage::ForeignDataWrapperFactory::createUserMappingProxyIfApplicable().

Referenced by create_proxy_fsi_objects(), and import_export::ForeignDataImporter::importGeneral().

63  {
65  db_id, user_id, copy_from_source, copy_params);
66 
67  CHECK(server);
68  server->validate();
69 
70  auto user_mapping =
72  db_id, user_id, copy_from_source, copy_params, server.get());
73 
74  if (user_mapping) {
75  user_mapping->validate(server.get());
76  }
77 
78  auto foreign_table =
80  db_id, table, copy_from_source, copy_params, server.get());
81 
82  CHECK(foreign_table);
83  foreign_table->validateOptionValues();
84 
85  return {std::move(server), std::move(user_mapping), std::move(foreign_table)};
86 }
static std::unique_ptr< ForeignServer > createForeignServerProxy(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params)
static std::unique_ptr< ForeignTable > createForeignTableProxy(const int db_id, const TableDescriptor *table, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)
#define CHECK(condition)
Definition: Logger.h:291
static std::unique_ptr< UserMapping > createUserMappingProxyIfApplicable(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const TableDescriptor table 
)

Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above.

Definition at line 91 of file ForeignDataWrapperFactory.cpp.

References create_proxy_fsi_objects().

93  {
94  return create_proxy_fsi_objects(copy_from_source, copy_params, -1, table, -1);
95 }
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > create_proxy_fsi_objects(const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
Create proxy fsi objects for use outside FSI.

+ Here is the call graph for this function:

template<typename D >
std::string foreign_storage::datetime_to_string ( const D &  timestamp,
const SQLTypeInfo column_type 
)
inline

Definition at line 38 of file ParquetMetadataValidator.h.

References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfo::is_date(), and SQLTypeInfo::is_timestamp().

Referenced by foreign_storage::TimestampBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::getMinMaxBoundsAsStrings(), foreign_storage::TimestampBoundsValidator< T >::validateValue(), and foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue().

39  {
40  CHECK(column_type.is_timestamp() || column_type.is_date());
41  Datum d;
42  d.bigintval = timestamp;
43  return DatumToString(d, column_type);
44 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:458
bool is_timestamp() const
Definition: sqltypes.h:1014
int64_t bigintval
Definition: Datum.h:72
#define CHECK(condition)
Definition: Logger.h:291
Definition: Datum.h:67
bool is_date() const
Definition: sqltypes.h:998

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::defer_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Definition at line 933 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, and foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex.

Referenced by populate_chunks().

934  {
935  std::unique_lock<std::mutex> deferred_requests_lock(
936  multi_threading_params.deferred_requests_mutex);
937  multi_threading_params.deferred_requests.emplace(std::move(request));
938 }

+ Here is the caller graph for this function:

std::optional< SQLTypes > foreign_storage::detect_geo_type ( const SampleRows &  sample_rows,
size_t  column_index 
)

Definition at line 22 of file DataPreview.cpp.

References CHECK_EQ, CHECK_LT, kLINESTRING, kMULTIPOLYGON, kNULLT, kPOINT, kPOLYGON, and UNREACHABLE.

Referenced by foreign_storage::LazyParquetChunkLoader::previewFiles().

23  {
24  std::optional<SQLTypes> tentative_geo_type{};
25  for (const auto& row : sample_rows) {
26  static std::regex geo_regex{
27  "\\s*(POINT|LINESTRING|POLYGON|MULTIPOLYGON)\\s*\\(.+\\)\\s*"};
28  std::smatch match;
29  CHECK_LT(column_index, row.size());
30  if (std::regex_match(row[column_index], match, geo_regex)) {
31  CHECK_EQ(match.size(), static_cast<size_t>(2));
32  SQLTypes geo_type{kNULLT};
33  const auto& geo_type_str = match[1];
34  if (geo_type_str == "POINT") {
35  geo_type = kPOINT;
36  } else if (geo_type_str == "LINESTRING") {
37  geo_type = kLINESTRING;
38  } else if (geo_type_str == "POLYGON") {
39  geo_type = kPOLYGON;
40  } else if (geo_type_str == "MULTIPOLYGON") {
41  geo_type = kMULTIPOLYGON;
42  } else {
43  UNREACHABLE() << "Unexpected geo type match: " << geo_type_str;
44  }
45  if (tentative_geo_type.has_value()) {
46  if (tentative_geo_type.value() != geo_type) {
47  return {}; // geo type does not match between rows, can not be imported
48  }
49  } else {
50  tentative_geo_type = geo_type;
51  }
52  }
53  }
54  return tentative_geo_type;
55 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
SQLTypes
Definition: sqltypes.h:55
#define UNREACHABLE()
Definition: Logger.h:337
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the caller graph for this function:

void foreign_storage::dispatch_all_deferred_requests ( MetadataScanMultiThreadingParams &  multi_threading_params)

Definition at line 943 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

944  {
945  std::unique_lock<std::mutex> deferred_requests_lock(
946  multi_threading_params.deferred_requests_mutex);
947  {
948  std::unique_lock<std::mutex> pending_requests_lock(
949  multi_threading_params.pending_requests_mutex);
950 
951  while (!multi_threading_params.deferred_requests.empty()) {
952  auto& request = multi_threading_params.deferred_requests.front();
953  multi_threading_params.pending_requests.emplace(std::move(request));
954  multi_threading_params.deferred_requests.pop();
955  }
956  multi_threading_params.pending_requests_condition.notify_all();
957  }
958 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Dispatches a new metadata scan request by adding the request to the pending requests queue to be consumed by a worker thread.

Definition at line 964 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

965  {
966  {
967  std::unique_lock<std::mutex> pending_requests_lock(
968  multi_threading_params.pending_requests_mutex);
969  multi_threading_params.pending_requests.emplace(std::move(request));
970  }
971  multi_threading_params.pending_requests_condition.notify_all();
972 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call,
int &  iterative_scan_last_fragment_id 
)

Reads from a text file iteratively and dispatches metadata scan requests that are processed by worker threads.

Definition at line 1076 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::alloc_size, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_all_deferred_requests(), dispatch_scan_request(), foreign_storage::TextFileBufferParser::findRowEndPosition(), foreign_storage::IterativeFileScanParameters::fragment_id, get_request_from_pool(), foreign_storage::FileReader::getCurrentFilePath(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, TableDescriptor::maxFragRows, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::no_deferred_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::FileReader::read(), request_pool_non_empty(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_alloc_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_data, and resize_buffer_if_needed().

Referenced by dispatch_scan_requests_with_exception_handling().

1090  {
1091  auto& alloc_size = iterative_residual_buffer.alloc_size;
1092  auto& residual_buffer = iterative_residual_buffer.residual_data;
1093  auto& residual_buffer_size = iterative_residual_buffer.residual_buffer_size;
1094  auto& residual_buffer_alloc_size = iterative_residual_buffer.residual_buffer_alloc_size;
1095 
1096  if (is_first_file_scan_call) {
1097  alloc_size = buffer_size;
1098  residual_buffer = std::make_unique<char[]>(alloc_size);
1099  residual_buffer_size = 0;
1100  residual_buffer_alloc_size = alloc_size;
1101  } else if (!no_deferred_requests(multi_threading_params)) {
1102  dispatch_all_deferred_requests(multi_threading_params);
1103  }
1104 
1105  // NOTE: During an interactive scan, it is possible for an entire fragment to
1106  // be parsed into requests, which sit in deferred requests; in order to avoid
1107  // stalling indefinitely while waiting on an available requests, the check
1108  // below determines if this is the case or not.
1109  bool current_fragment_fully_read_during_iterative_scan =
1110  file_scan_param && file_scan_param->fragment_id < iterative_scan_last_fragment_id;
1111 
1112  // NOTE: The conditional below behaves as follows:
1113  //
1114  // * for non-iterative scans,
1115  // current_fragment_fully_read_during_iterative_scan is false, and therefore
1116  // only the first clause of the conditional is relevant
1117  //
1118  // * for interactive scans, if
1119  // current_fragment_fully_read_during_iterative_scan is true, then the
1120  // conditional is skipped, unless it is determined there are still available
1121  // requests to work with, in which case the loop is entered; this is an
1122  // optimization that ensures maximal concurrency of loading while processing
1123  // requests
1124  while (!file_reader.isScanFinished() &&
1125  (request_pool_non_empty(multi_threading_params) ||
1126  !current_fragment_fully_read_during_iterative_scan)) {
1127  {
1128  std::lock_guard<std::mutex> pending_requests_lock(
1129  multi_threading_params.pending_requests_mutex);
1130  if (!multi_threading_params.continue_processing) {
1131  break;
1132  }
1133  }
1134  auto request = get_request_from_pool(multi_threading_params);
1135  request.full_path = file_reader.getCurrentFilePath();
1136  resize_buffer_if_needed(request.buffer, request.buffer_alloc_size, alloc_size);
1137 
1138  if (residual_buffer_size > 0) {
1139  memcpy(request.buffer.get(), residual_buffer.get(), residual_buffer_size);
1140  }
1141  size_t size = residual_buffer_size;
1142  size += file_reader.read(request.buffer.get() + residual_buffer_size,
1143  alloc_size - residual_buffer_size);
1144 
1145  if (size == 0) {
1146  // In some cases at the end of a file we will read 0 bytes even when
1147  // file_reader.isScanFinished() is false. Also add request back to the pool to be
1148  // picked up again in the next iteration.
1149  add_request_to_pool(multi_threading_params, request);
1150  continue;
1151  } else if (size == 1 && request.buffer[0] == copy_params.line_delim) {
1152  // In some cases files with newlines at the end will be encoded with a second
1153  // newline that can end up being the only thing in the buffer. Also add request
1154  // back to the pool to be picked up again in the next iteration.
1155  current_file_offset++;
1156  add_request_to_pool(multi_threading_params, request);
1157  continue;
1158  }
1159  unsigned int num_rows_in_buffer = 0;
1160  request.end_pos = parser.findRowEndPosition(alloc_size,
1161  request.buffer,
1162  size,
1163  copy_params,
1164  first_row_index_in_buffer,
1165  num_rows_in_buffer,
1166  &file_reader);
1167  request.buffer_size = size;
1168  request.buffer_alloc_size = alloc_size;
1169  request.first_row_index = first_row_index_in_buffer;
1170  request.file_offset = current_file_offset;
1171  request.buffer_row_count = num_rows_in_buffer;
1172  request.processed_row_count = 0;
1173  request.begin_pos = 0;
1174 
1175  residual_buffer_size = size - request.end_pos;
1176  if (residual_buffer_size > 0) {
1177  resize_buffer_if_needed(residual_buffer, residual_buffer_alloc_size, alloc_size);
1178  memcpy(residual_buffer.get(),
1179  request.buffer.get() + request.end_pos,
1180  residual_buffer_size);
1181  }
1182 
1183  current_file_offset += request.end_pos;
1184  first_row_index_in_buffer += num_rows_in_buffer;
1185 
1186  if (num_rows_in_buffer > 0) {
1187  dispatch_scan_request(multi_threading_params, request);
1188  } else {
1189  add_request_to_pool(multi_threading_params, request);
1190  }
1191 
1192  if (file_scan_param) {
1193  const int32_t last_fragment_index =
1194  (first_row_index_in_buffer) / table->maxFragRows;
1195  if (last_fragment_index > file_scan_param->fragment_id) {
1196  iterative_scan_last_fragment_id = last_fragment_index;
1197  break;
1198  }
1199  }
1200  }
1201 
1202  std::unique_lock<std::mutex> pending_requests_queue_lock(
1203  multi_threading_params.pending_requests_mutex);
1204  multi_threading_params.pending_requests_condition.wait(
1205  pending_requests_queue_lock, [&multi_threading_params] {
1206  return multi_threading_params.pending_requests.empty() ||
1207  (multi_threading_params.continue_processing == false);
1208  });
1209  multi_threading_params.continue_processing = false;
1210  pending_requests_queue_lock.unlock();
1211  multi_threading_params.pending_requests_condition.notify_all();
1212 }
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
bool no_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
void dispatch_all_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
bool request_pool_non_empty(MetadataScanMultiThreadingParams &multi_threading_params)
ParseBufferRequest get_request_from_pool(MetadataScanMultiThreadingParams &multi_threading_params)
void resize_buffer_if_needed(std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
void dispatch_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests_with_exception_handling ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call,
int &  iterative_scan_last_fragment_id 
)

Definition at line 1214 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_scan_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests_with_exception_handling(), foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan(), and foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

1228  {
1229  try {
1230  dispatch_scan_requests(table,
1231  buffer_size,
1232  file_path,
1233  file_reader,
1234  copy_params,
1235  multi_threading_params,
1236  first_row_index_in_buffer,
1237  current_file_offset,
1238  parser,
1239  file_scan_param,
1240  iterative_residual_buffer,
1241  is_first_file_scan_call,
1242  iterative_scan_last_fragment_id);
1243  } catch (...) {
1244  {
1245  std::unique_lock<std::mutex> pending_requests_lock(
1246  multi_threading_params.pending_requests_mutex);
1247  multi_threading_params.continue_processing = false;
1248  }
1249  multi_threading_params.pending_requests_condition.notify_all();
1250  throw;
1251  }
1252 }
void dispatch_scan_requests(const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests_with_exception_handling ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call 
)

Definition at line 1254 of file AbstractTextFileDataWrapper.cpp.

References dispatch_scan_requests_with_exception_handling().

1267  {
1268  int dummy;
1270  buffer_size,
1271  file_path,
1272  file_reader,
1273  copy_params,
1274  multi_threading_params,
1275  first_row_index_in_buffer,
1276  current_file_offset,
1277  parser,
1278  file_scan_param,
1279  iterative_residual_buffer,
1280  is_first_file_scan_call,
1281  dummy);
1282 }
void dispatch_scan_requests_with_exception_handling(const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id)

+ Here is the call graph for this function:

template<typename T >
ArrayDatum foreign_storage::encode_as_array_datum ( const std::vector< T > &  data)
inline

Definition at line 33 of file GeospatialEncoder.h.

References heavydb.dtypes::T.

Referenced by foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

33  {
34  const size_t num_bytes = data.size() * sizeof(T);
35  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
36  memcpy(buffer.get(), data.data(), num_bytes);
37  return ArrayDatum(num_bytes, buffer, false);
38 }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219

+ Here is the caller graph for this function:

bool foreign_storage::fragment_maps_to_leaf ( const ChunkKey key)

Definition at line 128 of file FsiChunkUtils.cpp.

References CHECK, g_distributed_leaf_idx, g_distributed_num_leaves, get_fragment(), dist::is_aggregator(), and dist::is_distributed().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

128  {
133 }
int get_fragment(const ChunkKey &key)
Definition: types.h:52
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
bool is_aggregator()
Definition: distributed.cpp:33
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
#define CHECK(condition)
Definition: Logger.h:291
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const parquet::ColumnDescriptor * foreign_storage::get_column_descriptor ( const parquet::arrow::FileReader *  reader,
const int  logical_column_index 
)

Definition at line 46 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

48  {
49  return reader->parquet_reader()->metadata()->schema()->Column(logical_column_index);
50 }

+ Here is the caller graph for this function:

std::set<ChunkKey> foreign_storage::get_column_key_set ( const ChunkKey destination_chunk_key)

Referenced by foreign_storage::CachingForeignStorageMgr::fetchBuffer(), foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit(), and foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize().

+ Here is the caller graph for this function:

std::vector<ChunkKey> foreign_storage::get_column_key_vec ( const ChunkKey destination_chunk_key)
std::string foreign_storage::get_db_name ( int32_t  db_id)

Definition at line 31 of file InternalSystemDataWrapper.cpp.

References Catalog_Namespace::DBMetadata::dbName, Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_dashboards(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_permissions(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_tables(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_users(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

31  {
33  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
34  if (sys_catalog.getMetadataForDBById(db_id, db_metadata)) {
35  return db_metadata.dbName;
36  } else {
37  // Database has been deleted.
39  }
40 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:343

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const foreign_storage::ForeignTable & foreign_storage::get_foreign_table_for_key ( const ChunkKey key)

Definition at line 101 of file FsiChunkUtils.cpp.

References CHECK, get_table_prefix(), Catalog_Namespace::SysCatalog::getCatalog(), and Catalog_Namespace::SysCatalog::instance().

Referenced by is_append_table_chunk_key(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

101  {
102  auto [db_id, tb_id] = get_table_prefix(key);
103  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
104  CHECK(catalog);
105  auto table = catalog->getForeignTable(tb_id);
106  CHECK(table);
107  return *table;
108 }
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::get_max_chunk_size ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getBufferSize().

+ Here is the caller graph for this function:

template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > foreign_storage::get_min_max_bounds ( )
inline

Definition at line 31 of file SharedMetadataValidator.h.

31  {
32  static_assert(std::is_signed<D>::value,
33  "'get_min_max_bounds' is only valid for signed types");
34  return {get_null_value<D>() + 1, std::numeric_limits<D>::max()};
35 }
std::optional<ParseBufferRequest> foreign_storage::get_next_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets the next metadata scan request object from the pending requests queue. A null optional is returned if there are no further requests to be processed.

Definition at line 529 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by populate_chunks(), and scan_metadata().

530  {
531  std::unique_lock<std::mutex> pending_requests_lock(
532  multi_threading_params.pending_requests_mutex);
533  multi_threading_params.pending_requests_condition.wait(
534  pending_requests_lock, [&multi_threading_params] {
535  return !multi_threading_params.pending_requests.empty() ||
536  !multi_threading_params.continue_processing;
537  });
538  if (multi_threading_params.pending_requests.empty()) {
539  return {};
540  }
541  auto request = std::move(multi_threading_params.pending_requests.front());
542  multi_threading_params.pending_requests.pop();
543  pending_requests_lock.unlock();
544  multi_threading_params.pending_requests_condition.notify_all();
545  return std::move(request);
546 }

+ Here is the caller graph for this function:

template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
V foreign_storage::get_null_value ( )
inline

Definition at line 21 of file SharedMetadataValidator.h.

21  {
22  return inline_int_null_value<V>();
23 }
size_t foreign_storage::get_num_threads ( const ForeignTable &  table)

Definition at line 17 of file AbstractFileStorageDataWrapper.cpp.

References foreign_storage::OptionsContainer::getOption(), import_export::num_import_threads(), and foreign_storage::AbstractFileStorageDataWrapper::THREADS_KEY.

Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

17  {
18  auto num_threads = 0;
19  if (auto opt = table.getOption(AbstractFileStorageDataWrapper::THREADS_KEY);
20  opt.has_value()) {
21  num_threads = std::stoi(opt.value());
22  }
23  return import_export::num_import_threads(num_threads);
24 }
size_t num_import_threads(const int32_t copy_params_threads)
Definition: thread_count.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< int, int > foreign_storage::get_parquet_table_size ( const ReaderPtr &  reader)

Definition at line 39 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), foreign_storage::LazyParquetChunkLoader::loadRowGroups(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

39  {
40  auto file_metadata = reader->parquet_reader()->metadata();
41  const auto num_row_groups = file_metadata->num_row_groups();
42  const auto num_columns = file_metadata->num_columns();
43  return std::make_pair(num_row_groups, num_columns);
44 }

+ Here is the caller graph for this function:

parquet::Type::type foreign_storage::get_physical_type ( ReaderPtr &  reader,
const int  logical_column_index 
)

Definition at line 52 of file ParquetShared.cpp.

Referenced by anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

52  {
53  return reader->parquet_reader()
54  ->metadata()
55  ->schema()
56  ->Column(logical_column_index)
57  ->physical_type();
58 }

+ Here is the caller graph for this function:

std::shared_ptr< ChunkMetadata > foreign_storage::get_placeholder_metadata ( const SQLTypeInfo type,
size_t  num_elements 
)

Definition at line 77 of file FsiChunkUtils.cpp.

References SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::InternalSystemDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::updateRolledOffChunks().

78  {
79  ForeignStorageBuffer empty_buffer;
80  // Use default encoder metadata as in parquet wrapper
81  empty_buffer.initEncoder(type);
82 
83  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(type);
84  chunk_metadata->numElements = num_elements;
85 
86  if (!type.is_varlen_indeed()) {
87  chunk_metadata->numBytes = type.get_size() * num_elements;
88  }
89  // min/max not set by default for arrays, so get from elem type encoder
90  if (type.is_array()) {
91  ForeignStorageBuffer scalar_buffer;
92  scalar_buffer.initEncoder(type.get_elem_type());
93  auto scalar_metadata = scalar_buffer.getEncoder()->getMetadata(type.get_elem_type());
94  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
95  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
96  }
97 
98  return chunk_metadata;
99 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231
bool is_varlen_indeed() const
Definition: sqltypes.h:626
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963
bool is_array() const
Definition: sqltypes.h:588

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ParseBufferRequest foreign_storage::get_request_from_pool ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets a request from the metadata scan request pool.

Definition at line 908 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests().

909  {
910  std::unique_lock<std::mutex> request_pool_lock(
911  multi_threading_params.request_pool_mutex);
912  multi_threading_params.request_pool_condition.wait(
913  request_pool_lock,
914  [&multi_threading_params] { return !multi_threading_params.request_pool.empty(); });
915  auto request = std::move(multi_threading_params.request_pool.front());
916  multi_threading_params.request_pool.pop();
917  request_pool_lock.unlock();
918  CHECK(request.buffer);
919  return request;
920 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::unique_ptr< ColumnDescriptor > foreign_storage::get_sub_type_column_descriptor ( const ColumnDescriptor column)

Definition at line 76 of file ParquetShared.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, ColumnDescriptor::db_id, SQLTypeInfo::get_elem_type(), SQLTypeInfo::set_size(), and ColumnDescriptor::tableId.

Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_array_mapping().

77  {
78  auto column_type = column->columnType.get_elem_type();
79  if (column_type.get_size() == -1 && column_type.is_dict_encoded_string()) {
80  column_type.set_size(4); // override default size of -1
81  }
82  return std::make_unique<ColumnDescriptor>(
83  column->tableId, column->columnId, column->columnName, column_type, column->db_id);
84 }
void set_size(int s)
Definition: sqltypes.h:501
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::get_table_name ( int32_t  db_id,
int32_t  table_id 
)

Definition at line 42 of file InternalSystemDataWrapper.cpp.

References CHECK, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

42  {
44  CHECK(catalog);
45  auto table_name = catalog->getTableName(table_id);
46  if (table_name.has_value()) {
47  return table_name.value();
48  } else {
49  // It is possible for the table to be concurrently deleted while querying the system
50  // table.
52  }
53 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
FileRegion &  file_region 
)

Definition at line 44 of file CsvShared.cpp.

References CHECK, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::json_utils::get_value_from_object(), foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

44  {
45  CHECK(json_val.IsObject());
47  json_val, file_region.first_row_file_offset, "first_row_file_offset");
49  json_val, file_region.first_row_index, "first_row_index");
50  json_utils::get_value_from_object(json_val, file_region.region_size, "region_size");
51  json_utils::get_value_from_object(json_val, file_region.row_count, "row_count");
52  if (json_val.HasMember("filename")) {
53  json_utils::get_value_from_object(json_val, file_region.filename, "filename");
54  }
55 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:172
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
RowGroupInterval &  value 
)

Definition at line 671 of file ParquetDataWrapper.cpp.

References CHECK, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, foreign_storage::json_utils::get_value_from_object(), and foreign_storage::RowGroupInterval::start_index.

671  {
672  CHECK(json_val.IsObject());
673  json_utils::get_value_from_object(json_val, value.file_path, "file_path");
674  json_utils::get_value_from_object(json_val, value.start_index, "start_index");
675  json_utils::get_value_from_object(json_val, value.end_index, "end_index");
676 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:172
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::init_chunk_for_column ( const ChunkKey chunk_key,
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers,
Chunk_NS::Chunk chunk 
)

Definition at line 21 of file FsiChunkUtils.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Chunk_NS::Chunk::setPinnable(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMapForColumns().

25  {
26  auto catalog =
28  CHECK(catalog);
29 
30  ChunkKey data_chunk_key = chunk_key;
31  AbstractBuffer* data_buffer = nullptr;
32  AbstractBuffer* index_buffer = nullptr;
33  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
34  chunk_key[CHUNK_KEY_COLUMN_IDX]);
35 
36  if (column->columnType.is_varlen_indeed()) {
37  data_chunk_key.push_back(1);
38  ChunkKey index_chunk_key = chunk_key;
39  index_chunk_key.push_back(2);
40 
41  CHECK(buffers.find(data_chunk_key) != buffers.end());
42  CHECK(buffers.find(index_chunk_key) != buffers.end());
43 
44  data_buffer = buffers.find(data_chunk_key)->second;
45  index_buffer = buffers.find(index_chunk_key)->second;
46  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
47  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
48 
49  size_t index_offset_size{0};
50  if (column->columnType.is_string() || column->columnType.is_geometry()) {
51  index_offset_size = sizeof(StringOffsetT);
52  } else if (column->columnType.is_array()) {
53  index_offset_size = sizeof(ArrayOffsetT);
54  } else {
55  UNREACHABLE();
56  }
57  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
58  index_buffer->reserve(index_offset_size *
59  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
60  }
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68  }
69 
70  chunk.setPinnable(false);
71  chunk.setColumnDesc(column);
72  chunk.setBuffer(data_buffer);
73  chunk.setIndexBuffer(index_buffer);
74  chunk.initEncoder();
75 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:36
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:152
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define UNREACHABLE()
Definition: Logger.h:337
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
int32_t StringOffsetT
Definition: sqltypes.h:1258
static SysCatalog & instance()
Definition: SysCatalog.h:343
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1259
void initEncoder()
Definition: Chunk.cpp:290
#define CHECK(condition)
Definition: Logger.h:291
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_append_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 118 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and foreign_storage::ForeignTable::isAppendMode().

Referenced by foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

118  {
119  return get_foreign_table_for_key(chunk_key).isAppendMode();
120 }
bool isAppendMode() const
Checks if the table is in append mode.
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_replicated_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 114 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and table_is_replicated().

Referenced by is_shardable_key().

114  {
116 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)
bool table_is_replicated(const TableDescriptor *td)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_shardable_key ( const ChunkKey key)

Definition at line 122 of file FsiChunkUtils.cpp.

References dist::is_aggregator(), dist::is_distributed(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

122  {
123  return (dist::is_distributed() && !dist::is_aggregator() &&
125 }
bool is_system_table_chunk_key(const ChunkKey &chunk_key)
bool is_replicated_table_chunk_key(const ChunkKey &chunk_key)
bool is_aggregator()
Definition: distributed.cpp:33
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_system_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 110 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and TableDescriptor::is_system_table.

Referenced by is_shardable_key().

110  {
111  return get_foreign_table_for_key(chunk_key).is_system_table;
112 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_table_enabled_on_node ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper().

+ Here is the caller graph for this function:

bool foreign_storage::is_valid_source_type ( const import_export::CopyParams copy_params)

Verify if source_type is valid.

Definition at line 120 of file ForeignDataWrapperFactory.cpp.

References import_export::kDelimitedFile, import_export::kParquetFile, import_export::kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and import_export::ForeignDataImporter::importGeneral().

+ Here is the caller graph for this function:

bool foreign_storage::key_does_not_shard_to_leaf ( const ChunkKey key)

Definition at line 135 of file FsiChunkUtils.cpp.

References fragment_maps_to_leaf(), and is_shardable_key().

Referenced by cache_blocks(), populate_string_dictionary(), and anonymous_namespace{RelAlgExecutor.cpp}::set_parallelism_hints().

135  {
136  return (is_shardable_key(key) && !fragment_maps_to_leaf(key));
137 }
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::num_rows_to_process ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  rows_remaining 
)

Number of rows to process given the current position defined by start_row_index, the max fragment size and the number of rows left to process.

Definition at line 483 of file AbstractTextFileDataWrapper.cpp.

Referenced by QueryExecutionContext::launchCpuCode(), and populate_chunks().

485  {
486  size_t start_position_in_fragment = start_row_index % max_fragment_size;
487  return std::min<size_t>(rows_remaining, max_fragment_size - start_position_in_fragment);
488 }

+ Here is the caller graph for this function:

UniqueReaderPtr foreign_storage::open_parquet_table ( const std::string &  file_path,
std::shared_ptr< arrow::fs::FileSystem > &  file_system 
)

Definition at line 26 of file ParquetShared.cpp.

Referenced by foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::FileReaderMap::insert(), and foreign_storage::LazyParquetChunkLoader::loadRowGroups().

27  {
28  UniqueReaderPtr reader;
29  auto file_result = file_system->OpenInputFile(file_path);
30  if (!file_result.ok()) {
31  throw std::runtime_error{"Unable to access " + file_system->type_name() + " file: " +
32  file_path + ". " + file_result.status().message()};
33  }
34  auto infile = file_result.ValueOrDie();
35  PARQUET_THROW_NOT_OK(OpenFile(infile, arrow::default_memory_pool(), &reader));
36  return reader;
37 }
std::unique_ptr< parquet::arrow::FileReader > UniqueReaderPtr
Definition: ParquetShared.h:32

+ Here is the caller graph for this function:

ParseFileRegionResult foreign_storage::parse_file_regions ( const FileRegions &  file_regions,
const size_t  start_index,
const size_t  end_index,
FileReader &  file_reader,
ParseBufferRequest &  parse_file_request,
const std::map< int, Chunk_NS::Chunk > &  column_id_to_chunk_map,
const TextFileBufferParser &  parser 
)

Parses a set of file regions given a handle to the file and range of indexes for the file regions to be parsed.

Definition at line 216 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, foreign_storage::ParseBufferRequest::buffer_size, CHECK, CHECK_EQ, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, DEBUG_TIMER, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseFileRegionResult::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getTableName(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::FileReader::readRegion(), foreign_storage::ParseBufferResult::rejected_rows, run_benchmark_import::result, foreign_storage::ParseBufferResult::row_count, and throw_unexpected_number_of_items().

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunks().

223  {
224  auto timer = DEBUG_TIMER(__func__);
225  ParseFileRegionResult load_file_region_result{};
226  load_file_region_result.file_offset = file_regions[start_index].first_row_file_offset;
227  load_file_region_result.row_count = 0;
228 
229  ParseBufferResult result;
230  for (size_t i = start_index; i <= end_index; i++) {
231  CHECK(file_regions[i].region_size <= parse_file_request.buffer_size);
232  auto read_size = file_reader.readRegion(parse_file_request.buffer.get(),
233  file_regions[i].first_row_file_offset,
234  file_regions[i].region_size);
235  if (file_regions[i].region_size != read_size) {
236  throw_unexpected_number_of_items(file_regions[i].region_size,
237  read_size,
238  "bytes",
239  parse_file_request.getTableName());
240  }
241  parse_file_request.begin_pos = 0;
242  parse_file_request.end_pos = file_regions[i].region_size;
243  parse_file_request.first_row_index = file_regions[i].first_row_index;
244  parse_file_request.file_offset = file_regions[i].first_row_file_offset;
245  parse_file_request.process_row_count = file_regions[i].row_count;
246 
247  result = parser.parseBuffer(parse_file_request, i == end_index);
248  CHECK_EQ(file_regions[i].row_count, result.row_count);
249  for (const auto& rejected_row_index : result.rejected_rows) {
250  load_file_region_result.rejected_row_indices.insert(
251  load_file_region_result.row_count + rejected_row_index);
252  }
253  load_file_region_result.row_count += result.row_count;
254  }
255  load_file_region_result.column_id_to_data_blocks_map =
256  result.column_id_to_data_blocks_map;
257  return load_file_region_result;
258 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void throw_unexpected_number_of_items(const size_t num_expected, const size_t num_loaded, const std::string &item_type, const std::string &foreign_table_name)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<size_t> foreign_storage::partition_by_fragment ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  buffer_row_count 
)

Given a start row index, maximum fragment size, and number of rows in a buffer, this function returns a vector indicating how the rows in the buffer should be partitioned in order to fill up available fragment slots while staying within the capacity of fragments.

Definition at line 496 of file AbstractTextFileDataWrapper.cpp.

References CHECK.

Referenced by scan_metadata().

498  {
499  CHECK(buffer_row_count > 0);
500  std::vector<size_t> partitions{};
501  size_t remaining_rows_in_last_fragment;
502  if (start_row_index % max_fragment_size == 0) {
503  remaining_rows_in_last_fragment = 0;
504  } else {
505  remaining_rows_in_last_fragment =
506  max_fragment_size - (start_row_index % max_fragment_size);
507  }
508  if (buffer_row_count <= remaining_rows_in_last_fragment) {
509  partitions.emplace_back(buffer_row_count);
510  } else {
511  if (remaining_rows_in_last_fragment > 0) {
512  partitions.emplace_back(remaining_rows_in_last_fragment);
513  }
514  size_t remaining_buffer_row_count =
515  buffer_row_count - remaining_rows_in_last_fragment;
516  while (remaining_buffer_row_count > 0) {
517  partitions.emplace_back(
518  std::min<size_t>(remaining_buffer_row_count, max_fragment_size));
519  remaining_buffer_row_count -= partitions.back();
520  }
521  }
522  return partitions;
523 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::pair<std::map<int, DataBlockPtr>, std::map<int, DataBlockPtr> > foreign_storage::partition_data_blocks ( const std::map< int, const ColumnDescriptor * > &  column_by_id,
const std::map< int, DataBlockPtr > &  data_blocks 
)

Partition data blocks such that dictionary encoded columns are disjoint of other columns

Returns
a pair of DataBlockPtr maps with the first being columns that are string dictionary encoded and the second being the complement

Definition at line 669 of file AbstractTextFileDataWrapper.cpp.

References shared::get_from_map().

Referenced by populate_chunks_using_data_blocks().

671  {
672  std::map<int, DataBlockPtr> dict_encoded_data_blocks;
673  std::map<int, DataBlockPtr> none_dict_encoded_data_blocks;
674  for (auto& [column_id, data_block] : data_blocks) {
675  const auto column = shared::get_from_map(column_by_id, column_id);
676  if (column->columnType.is_dict_encoded_string()) {
677  dict_encoded_data_blocks[column_id] = data_block;
678  } else {
679  none_dict_encoded_data_blocks[column_id] = data_block;
680  }
681  }
682  return {dict_encoded_data_blocks, none_dict_encoded_data_blocks};
683 }
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::set< T > &  items,
size_t  max_threads 
)

Definition at line 41 of file FsiChunkUtils.h.

Referenced by create_futures_for_workers(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

41  {
42  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
43  std::list<std::set<T>> items_by_thread;
44  auto i = 0U;
45  for (auto item : items) {
46  if (i++ % items_per_thread == 0) {
47  items_by_thread.emplace_back(std::set<T>{});
48  }
49  items_by_thread.back().emplace(item);
50  }
51  return items_by_thread;
52 }

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::vector< T > &  items,
size_t  max_threads 
)

Definition at line 60 of file FsiChunkUtils.h.

60  {
61  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
62  std::list<std::vector<T>> items_by_thread;
63  auto i = 0U;
64  for (auto item : items) {
65  if (i++ % items_per_thread == 0) {
66  items_by_thread.emplace_back(std::vector<T>{});
67  }
68  items_by_thread.back().emplace_back(item);
69  }
70  return items_by_thread;
71 }
void foreign_storage::populate_chunks ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser,
foreign_storage::IterativeFileScanParameters file_scan_param 
)

Consumes and processes scan requests from a pending requests queue and populates chunks during an iterative file scan

Definition at line 978 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer_row_count, CHECK_LE, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, defer_scan_request(), foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, get_next_scan_request(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getMaxFragRows(), foreign_storage::ParseBufferRequest::import_buffers, num_rows_to_process(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, populate_chunks_using_data_blocks(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::ParseBufferRequest::processed_row_count, run_benchmark_import::result, and update_delete_buffer().

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

981  {
982  std::map<int, const ColumnDescriptor*> column_by_id{};
983  while (true) {
984  auto request_opt = get_next_scan_request(multi_threading_params);
985  if (!request_opt.has_value()) {
986  break;
987  }
988  ParseBufferRequest& request = request_opt.value();
989  try {
990  if (column_by_id.empty()) {
991  for (const auto column : request.getColumns()) {
992  column_by_id[column->columnId] = column;
993  }
994  }
995  CHECK_LE(request.processed_row_count, request.buffer_row_count);
996  for (size_t num_rows_left_to_process =
997  request.buffer_row_count - request.processed_row_count;
998  num_rows_left_to_process > 0;
999  num_rows_left_to_process =
1000  request.buffer_row_count - request.processed_row_count) {
1001  // NOTE: `request.begin_pos` state is required to be set correctly by this point
1002  // in execution
1003  size_t row_index = request.first_row_index + request.processed_row_count;
1004  int fragment_id = row_index / request.getMaxFragRows();
1005  if (fragment_id >
1006  file_scan_param.fragment_id) { // processing must continue next iteration
1007  defer_scan_request(multi_threading_params, request);
1008  return;
1009  }
1010  request.process_row_count = num_rows_to_process(
1011  row_index, request.getMaxFragRows(), num_rows_left_to_process);
1012  for (const auto& import_buffer : request.import_buffers) {
1013  if (import_buffer != nullptr) {
1014  import_buffer->clear();
1015  }
1016  }
1017  auto result = parser.parseBuffer(request, true, true, true);
1018  size_t start_position_in_fragment = row_index % request.getMaxFragRows();
1019  populate_chunks_using_data_blocks(multi_threading_params,
1020  fragment_id,
1021  request,
1022  result,
1023  column_by_id,
1024  fragment_id_to_file_regions_map,
1025  file_scan_param,
1026  start_position_in_fragment);
1027 
1028  request.processed_row_count += result.row_count;
1029  request.begin_pos = result.row_offsets.back() - request.file_offset;
1030 
1032  request, result, file_scan_param, start_position_in_fragment);
1033  }
1034 
1035  } catch (...) {
1036  // Re-add request to pool so we dont block any other threads
1037  {
1038  std::lock_guard<std::mutex> pending_requests_lock(
1039  multi_threading_params.pending_requests_mutex);
1040  multi_threading_params.continue_processing = false;
1041  }
1042  add_request_to_pool(multi_threading_params, request);
1043  throw;
1044  }
1045  add_request_to_pool(multi_threading_params, request);
1046  }
1047 }
void populate_chunks_using_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t expected_current_element_count)
size_t num_rows_to_process(const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void update_delete_buffer(const ParseBufferRequest &request, const ParseBufferResult &result, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t start_position_in_fragment)
void defer_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
#define CHECK_LE(x, y)
Definition: Logger.h:304
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_chunks_using_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const foreign_storage::IterativeFileScanParameters file_scan_param,
const size_t  expected_current_element_count 
)

Definition at line 707 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), append_data_block_to_chunk(), threading_serial::async(), CHECK_EQ, CHECK_GT, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::import_buffers, kENCODING_DICT, partition_data_blocks(), and foreign_storage::ParseBufferResult::row_count.

Referenced by populate_chunks().

715  {
716  std::unique_lock<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
717  // File regions should be added in same order as appendData
718  add_file_region(fragment_id_to_file_regions_map,
719  fragment_id,
720  request.first_row_index,
721  result,
722  request.getFilePath());
723  CHECK_EQ(fragment_id, file_scan_param.fragment_id);
724 
725  // start string encoding asynchronously
726  std::vector<std::pair<const size_t, std::future<int8_t*>>>
727  encoded_data_block_ptrs_futures;
728 
729  for (const auto& import_buffer : request.import_buffers) {
730  if (import_buffer == nullptr) {
731  continue;
732  }
733 
734  if (import_buffer->getTypeInfo().is_dict_encoded_string()) {
735  auto string_payload_ptr = import_buffer->getStringBuffer();
736  CHECK_EQ(kENCODING_DICT, import_buffer->getTypeInfo().get_compression());
737 
738  auto column_id = import_buffer->getColumnDesc()->columnId;
739  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
740  column_id, std::async(std::launch::async, [&import_buffer, string_payload_ptr] {
741  import_buffer->addDictEncodedString(*string_payload_ptr);
742  return import_buffer->getStringDictBuffer();
743  })));
744  }
745  }
746 
747  auto process_subset_of_data_blocks =
748  [&](const std::map<int, DataBlockPtr>& data_blocks) {
749  for (auto& [column_id, data_block] : data_blocks) {
750  const auto column = column_by_id[column_id];
751  lock.unlock(); // unlock the fragment based lock in order to achieve better
752  // performance
753  append_data_block_to_chunk(file_scan_param,
754  data_block,
755  result.row_count,
756  column_id,
757  column,
758  expected_current_element_count);
759  lock.lock();
760  }
761  };
762 
763  auto [dict_encoded_data_blocks, none_dict_encoded_data_blocks] =
764  partition_data_blocks(column_by_id, result.column_id_to_data_blocks_map);
765 
766  process_subset_of_data_blocks(
767  none_dict_encoded_data_blocks); // skip dict string columns
768 
769  // wait for the async requests we made for string dictionary
770  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
771  encoded_ptr_future.second.wait();
772  }
773  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
774  CHECK_GT(dict_encoded_data_blocks.count(encoded_ptr_future.first), 0UL);
775  dict_encoded_data_blocks[encoded_ptr_future.first].numbersPtr =
776  encoded_ptr_future.second.get();
777  }
778 
779  process_subset_of_data_blocks(
780  dict_encoded_data_blocks); // process only dict string columns
781 }
void append_data_block_to_chunk(const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const int column_id, const ColumnDescriptor *column, const size_t element_count_required)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::pair< std::map< int, DataBlockPtr >, std::map< int, DataBlockPtr > > partition_data_blocks(const std::map< int, const ColumnDescriptor * > &column_by_id, const std::map< int, DataBlockPtr > &data_blocks)
#define CHECK_GT(x, y)
Definition: Logger.h:305
future< Result > async(Fn &&fn, Args &&...args)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_string_dictionary ( int32_t  table_id,
int32_t  col_id,
int32_t  db_id 
)

Definition at line 205 of file Execute.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::getChunk(), Catalog_Namespace::SysCatalog::instance(), anonymous_namespace{Execute.cpp}::is_empty_table(), and key_does_not_shard_to_leaf().

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::prepare_string_dictionaries().

205  {
206  const auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
207  CHECK(catalog);
208  if (const auto foreign_table = dynamic_cast<const ForeignTable*>(
209  catalog->getMetadataForTable(table_id, false))) {
210  const auto col_desc = catalog->getMetadataForColumn(table_id, col_id);
211  if (col_desc->columnType.is_dict_encoded_type()) {
212  auto& fragmenter = foreign_table->fragmenter;
213  CHECK(fragmenter != nullptr);
214  if (is_empty_table(fragmenter.get())) {
215  return;
216  }
217  for (const auto& frag : fragmenter->getFragmentsForQuery().fragments) {
218  ChunkKey chunk_key = {db_id, table_id, col_id, frag.fragmentId};
219  // If the key is sharded across leaves, only populate fragments that are sharded
220  // to this leaf.
221  if (key_does_not_shard_to_leaf(chunk_key)) {
222  continue;
223  }
224 
225  const ChunkMetadataMap& metadata_map = frag.getChunkMetadataMap();
226  CHECK(metadata_map.find(col_id) != metadata_map.end());
227  if (auto& meta = metadata_map.at(col_id); meta->isPlaceholder()) {
228  // When this goes out of scope it will stay in CPU cache but become
229  // evictable
230  auto chunk = Chunk_NS::Chunk::getChunk(col_desc,
231  &(catalog->getDataMgr()),
232  chunk_key,
234  0,
235  0,
236  0);
237  }
238  }
239  }
240  }
241 }
std::vector< int > ChunkKey
Definition: types.h:36
bool is_empty_table(Fragmenter_Namespace::AbstractFragmenter *fragmenter)
Definition: Execute.cpp:195
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::process_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map 
)

Updates metadata encapsulated in encoders for all table columns given new data blocks gotten from parsing a new set of rows in a file buffer. If cache is available, also append the data_blocks to chunks in the cache

Definition at line 788 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), cache_blocks(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::row_count, and update_stats().

Referenced by scan_metadata().

793  {
794  std::lock_guard<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
795  // File regions should be added in same order as appendData
796  add_file_region(fragment_id_to_file_regions_map,
797  fragment_id,
798  request.first_row_index,
799  result,
800  request.getFilePath());
801 
802  for (auto& [column_id, data_block] : result.column_id_to_data_blocks_map) {
803  ChunkKey chunk_key{request.db_id, request.getTableId(), column_id, fragment_id};
804  const auto column = column_by_id[column_id];
805  if (column->columnType.is_varlen_indeed()) {
806  chunk_key.emplace_back(1);
807  }
808  if (multi_threading_params.chunk_encoder_buffers.find(chunk_key) ==
809  multi_threading_params.chunk_encoder_buffers.end()) {
810  multi_threading_params.chunk_encoder_buffers[chunk_key] =
811  std::make_unique<ForeignStorageBuffer>();
812  multi_threading_params.chunk_encoder_buffers[chunk_key]->initEncoder(
813  column->columnType);
814  }
815  update_stats(multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder(),
816  column->columnType,
817  data_block,
818  result.row_count);
819  size_t num_elements = multi_threading_params.chunk_encoder_buffers[chunk_key]
820  ->getEncoder()
821  ->getNumElems() +
822  result.row_count;
823  multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder()->setNumElems(
824  num_elements);
825  cache_blocks(
826  multi_threading_params.cached_chunks,
827  data_block,
828  result.row_count,
829  chunk_key,
830  column,
831  (num_elements - result.row_count) == 0, // Is the first block added to this chunk
832  multi_threading_params.disable_cache);
833  }
834 }
std::vector< int > ChunkKey
Definition: types.h:36
void update_stats(Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
void cache_blocks(std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table ( Catalog_Namespace::Catalog catalog,
const std::string &  table_name,
const bool  evict_cached_entries 
)

Definition at line 103 of file ForeignTableRefresh.cpp.

References CHECK, shared::contains(), StorageType::FOREIGN_TABLE, dist::is_leaf_node(), Catalog_Namespace::kAggregatorOnlySystemTables, and refresh_foreign_table_unlocked().

Referenced by RefreshForeignTablesCommand::execute(), and foreign_storage::ForeignTableRefreshScheduler::start().

105  {
106  if (dist::is_leaf_node() &&
108  // Skip aggregator only system tables on leaf nodes.
109  return;
110  }
111  auto table_lock =
112  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
114  catalog, table_name, false));
115 
116  const TableDescriptor* td = (*table_lock)();
117  if (td->storageType != StorageType::FOREIGN_TABLE) {
118  throw std::runtime_error{
119  table_name +
120  " is not a foreign table. Refreshes are applicable to only foreign tables."};
121  }
122 
123  auto foreign_table = dynamic_cast<const ForeignTable*>(td);
124  CHECK(foreign_table);
125  refresh_foreign_table_unlocked(catalog, *foreign_table, evict_cached_entries);
126 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
bool is_leaf_node()
Definition: distributed.cpp:29
#define CHECK(condition)
Definition: Logger.h:291
static const std::array< std::string, 4 > kAggregatorOnlySystemTables
Definition: Catalog.h:120
void refresh_foreign_table_unlocked(Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
static constexpr char const * FOREIGN_TABLE

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table_unlocked ( Catalog_Namespace::Catalog catalog,
const ForeignTable &  td,
const bool  evict_cached_entries 
)

Definition at line 35 of file ForeignTableRefresh.cpp.

References CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, foreign_storage::anonymous_namespace{ForeignTableRefresh.cpp}::clear_cpu_and_gpu_cache(), Catalog_Namespace::DBMetadata::dbId, Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), PostEvictionRefreshException::getOriginalException(), logger::INFO, CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable(), foreign_storage::ForeignTable::isAppendMode(), LOG, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, TableDescriptor::tableName, and Catalog_Namespace::Catalog::updateForeignTableRefreshTimes().

Referenced by refresh_foreign_table().

37  {
38  LOG(INFO) << "Starting refresh for table: " << td.tableName;
39  auto& data_mgr = catalog.getDataMgr();
40  ChunkKey table_key{catalog.getCurrentDB().dbId, td.tableId};
41  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
42  catalog.removeFragmenterForTable(td.tableId);
43 
44  auto fsm = data_mgr.getPersistentStorageMgr()->getForeignStorageMgr();
45  CHECK(fsm);
46  if (auto cfm = dynamic_cast<CachingForeignStorageMgr*>(fsm)) {
47  if (!cfm->hasStoredDataWrapper(table_key[CHUNK_KEY_DB_IDX],
48  table_key[CHUNK_KEY_TABLE_IDX])) {
49  // If there is no wrapper stored on disk, then we have not populated the metadata
50  // for this table and we are free to skip the refresh.
51  catalog.updateForeignTableRefreshTimes(td.tableId);
52  return;
53  }
54  }
55 
56  std::map<ChunkKey, std::shared_ptr<ChunkMetadata>> old_chunk_metadata_by_chunk_key;
57  if (td.isAppendMode() && !evict_cached_entries) {
58  ChunkMetadataVector metadata_vec;
59  data_mgr.getChunkMetadataVecForKeyPrefix(metadata_vec, table_key);
60  int last_fragment_id = 0;
61  for (const auto& [key, metadata] : metadata_vec) {
62  if (key[CHUNK_KEY_FRAGMENT_IDX] > last_fragment_id) {
63  last_fragment_id = key[CHUNK_KEY_FRAGMENT_IDX];
64  }
65  old_chunk_metadata_by_chunk_key[key] = metadata;
66  }
67  for (const auto& [key, metadata] : metadata_vec) {
68  if (key[CHUNK_KEY_FRAGMENT_IDX] == last_fragment_id) {
69  clear_cpu_and_gpu_cache(data_mgr, key);
70  }
71  }
72  } else {
73  clear_cpu_and_gpu_cache(data_mgr, table_key);
74  }
75 
76  try {
77  fsm->refreshTable(table_key, evict_cached_entries);
78  catalog.updateForeignTableRefreshTimes(td.tableId);
79  } catch (PostEvictionRefreshException& e) {
80  catalog.updateForeignTableRefreshTimes(td.tableId);
81  clear_cpu_and_gpu_cache(data_mgr, table_key);
82  throw e.getOriginalException();
83  } catch (...) {
84  clear_cpu_and_gpu_cache(data_mgr, table_key);
85  throw;
86  }
87 
88  // Delete cached rolled off/updated chunks.
89  if (!old_chunk_metadata_by_chunk_key.empty()) {
90  ChunkMetadataVector new_metadata_vec;
91  data_mgr.getChunkMetadataVecForKeyPrefix(new_metadata_vec, table_key);
92  for (const auto& [key, metadata] : new_metadata_vec) {
93  auto it = old_chunk_metadata_by_chunk_key.find(key);
94  if (it != old_chunk_metadata_by_chunk_key.end() &&
95  it->second->numElements != metadata->numElements) {
96  clear_cpu_and_gpu_cache(data_mgr, key);
97  }
98  }
99  }
100  LOG(INFO) << "Completed refresh for table: " << td.tableName;
101 }
static void invalidateCachesByTable(size_t table_key)
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:249
#define LOG(tag)
Definition: Logger.h:285
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::runtime_error getOriginalException()
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:248
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4064
void clear_cpu_and_gpu_cache(Data_Namespace::DataMgr &data_mgr, const ChunkKey &key_prefix)
#define CHECK(condition)
Definition: Logger.h:291
void updateForeignTableRefreshTimes(const int32_t table_id)
Definition: Catalog.cpp:5468

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::request_pool_non_empty ( MetadataScanMultiThreadingParams &  multi_threading_params)

Definition at line 922 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests().

922  {
923  std::unique_lock<std::mutex> request_pool_lock(
924  multi_threading_params.request_pool_mutex);
925  return !multi_threading_params.request_pool.empty();
926 }

+ Here is the caller graph for this function:

void foreign_storage::reset_multithreading_params ( foreign_storage::MetadataScanMultiThreadingParams multi_threading_params)

Definition at line 1063 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, and foreign_storage::MetadataScanMultiThreadingParams::request_pool.

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

1064  {
1065  multi_threading_params.request_pool = {};
1066  multi_threading_params.cached_chunks = {};
1067  multi_threading_params.pending_requests = {};
1068  multi_threading_params.deferred_requests = {};
1069  multi_threading_params.chunk_encoder_buffers.clear();
1070 }
std::map< ChunkKey, std::unique_ptr< ForeignStorageBuffer > > chunk_encoder_buffers

+ Here is the caller graph for this function:

void foreign_storage::resize_buffer_if_needed ( std::unique_ptr< char[]> &  buffer,
size_t &  buffer_size,
const size_t  alloc_size 
)

Optionally resizes the given buffer if the buffer size is less than the current buffer allocation size.

Definition at line 1053 of file AbstractTextFileDataWrapper.cpp.

References CHECK_LE.

Referenced by dispatch_scan_requests().

1055  {
1056  CHECK_LE(buffer_size, alloc_size);
1057  if (buffer_size < alloc_size) {
1058  buffer = std::make_unique<char[]>(alloc_size);
1059  buffer_size = alloc_size;
1060  }
1061 }
#define CHECK_LE(x, y)
Definition: Logger.h:304

+ Here is the caller graph for this function:

std::vector< Aws::S3::Model::Object > foreign_storage::s3_objects_filter_sort_files ( const std::vector< Aws::S3::Model::Object > &  file_paths,
const shared::FilePathOptions options 
)

Definition at line 22 of file S3FilePathUtil.cpp.

References shared::FilePathOptions::filter_regex, shared::PATHNAME_ORDER_TYPE, foreign_storage::anonymous_namespace{S3FilePathUtil.cpp}::s3_objects_regex_file_filter(), and shared::FilePathOptions::sort_by.

24  {
25  auto result_files =
26  options.filter_regex.has_value()
27  ? s3_objects_regex_file_filter(options.filter_regex.value(), file_paths)
28  : file_paths;
29  // initial lexicographical order ensures a determinisitc ordering for files not matching
30  // sort_regex
31  shared::FilePathOptions temp_options;
32  temp_options.sort_by = shared::PATHNAME_ORDER_TYPE;
33  auto initial_file_order = FileOrderS3(temp_options);
34  auto lexi_comp = initial_file_order.getFileComparator();
35  std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
36 
37  auto file_order = FileOrderS3(options);
38  auto comp = file_order.getFileComparator();
39  std::stable_sort(result_files.begin(), result_files.end(), comp);
40  return result_files;
41 }
std::optional< std::string > filter_regex
std::vector< Aws::S3::Model::Object > s3_objects_regex_file_filter(const std::string &pattern, const std::vector< Aws::S3::Model::Object > &objects_list)
const std::string PATHNAME_ORDER_TYPE
std::optional< std::string > sort_by

+ Here is the call graph for this function:

void foreign_storage::scan_metadata ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser 
)

Consumes and processes metadata scan requests from a pending requests queue and updates existing metadata objects based on newly scanned metadata.

Definition at line 853 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::MetadataScanMultiThreadingParams::continue_processing, get_next_scan_request(), foreign_storage::TextFileBufferParser::parseBuffer(), partition_by_fragment(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, process_data_blocks(), and run_benchmark_import::result.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

855  {
856  std::map<int, const ColumnDescriptor*> column_by_id{};
857  while (true) {
858  auto request_opt = get_next_scan_request(multi_threading_params);
859  if (!request_opt.has_value()) {
860  break;
861  }
862  auto& request = request_opt.value();
863  try {
864  if (column_by_id.empty()) {
865  for (const auto column : request.getColumns()) {
866  column_by_id[column->columnId] = column;
867  }
868  }
869  auto partitions = partition_by_fragment(
870  request.first_row_index, request.getMaxFragRows(), request.buffer_row_count);
871  request.begin_pos = 0;
872  size_t row_index = request.first_row_index;
873  for (const auto partition : partitions) {
874  request.process_row_count = partition;
875  for (const auto& import_buffer : request.import_buffers) {
876  if (import_buffer != nullptr) {
877  import_buffer->clear();
878  }
879  }
880  auto result = parser.parseBuffer(request, true);
881  int fragment_id = row_index / request.getMaxFragRows();
882  process_data_blocks(multi_threading_params,
883  fragment_id,
884  request,
885  result,
886  column_by_id,
887  fragment_id_to_file_regions_map);
888  row_index += result.row_count;
889  request.begin_pos = result.row_offsets.back() - request.file_offset;
890  }
891  } catch (...) {
892  // Re-add request to pool so we dont block any other threads
893  {
894  std::lock_guard<std::mutex> pending_requests_lock(
895  multi_threading_params.pending_requests_mutex);
896  multi_threading_params.continue_processing = false;
897  }
898  add_request_to_pool(multi_threading_params, request);
899  throw;
900  }
901  add_request_to_pool(multi_threading_params, request);
902  }
903 }
std::vector< size_t > partition_by_fragment(const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void process_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::set_comp ( const ChunkKey left,
const ChunkKey right 
)
void foreign_storage::set_node_name ( std::map< std::string, import_export::TypedImportBuffer * > &  import_buffers)

Definition at line 55 of file InternalSystemDataWrapper.cpp.

References g_distributed_leaf_idx, dist::is_leaf_node(), and to_string().

Referenced by foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_summary(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

56  {
57  if (import_buffers.find("node") != import_buffers.end()) {
58  if (dist::is_leaf_node()) {
59  std::string leaf_string{"Leaf " + to_string(g_distributed_leaf_idx)};
60  import_buffers["node"]->addString(leaf_string);
61  } else {
62  import_buffers["node"]->addString("Server");
63  }
64  }
65 }
std::string to_string(char const *&&v)
bool is_leaf_node()
Definition: distributed.cpp:29
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const FileRegion &  file_region,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 26 of file CsvShared.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

28  {
29  json_val.SetObject();
31  json_val, file_region.first_row_file_offset, "first_row_file_offset", allocator);
33  json_val, file_region.first_row_index, "first_row_index", allocator);
35  json_val, file_region.region_size, "region_size", allocator);
37  json_val, file_region.row_count, "row_count", allocator);
38  if (file_region.filename.size()) {
40  json_val, file_region.filename, "filename", allocator);
41  }
42 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:157

+ Here is the call graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const RowGroupInterval &  value,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 662 of file ParquetDataWrapper.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, and foreign_storage::RowGroupInterval::start_index.

664  {
665  json_val.SetObject();
666  json_utils::add_value_to_object(json_val, value.file_path, "file_path", allocator);
667  json_utils::add_value_to_object(json_val, value.start_index, "start_index", allocator);
668  json_utils::add_value_to_object(json_val, value.end_index, "end_index", allocator);
669 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:157

+ Here is the call graph for this function:

void foreign_storage::throw_file_access_error ( const std::string &  file_path,
const std::string &  message 
)
inline

Definition at line 89 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

90  {
91  std::string error_message{"Unable to access file \"" + file_path + "\". " + message};
92  throw ForeignStorageException{error_message};
93 }

+ Here is the caller graph for this function:

void foreign_storage::throw_file_not_found_error ( const std::string &  file_path)
inline

Definition at line 95 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

95  {
96  throw ForeignStorageException{"File or directory \"" + file_path +
97  "\" does not exist."};
98 }

+ Here is the caller graph for this function:

void foreign_storage::throw_number_of_columns_mismatch_error ( size_t  num_table_cols,
size_t  num_file_cols,
const std::string &  file_path 
)
inline

Definition at line 80 of file ForeignStorageException.h.

References to_string().

Referenced by foreign_storage::RegexFileBufferParser::regexMatchColumns(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_expected_column_count(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().

82  {
83  throw ForeignStorageException{"Mismatched number of logical columns: (expected " +
84  std::to_string(num_table_cols) + " columns, has " +
85  std::to_string(num_file_cols) + "): in file '" +
86  file_path + "'"};
87 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::throw_parquet_metadata_out_of_bounds_error ( const std::string &  min_value,
const std::string &  max_value,
const std::string &  encountered_value 
)
inline

Definition at line 46 of file ParquetMetadataValidator.h.

Referenced by foreign_storage::TimestampBoundsValidator< T >::validateValue(), foreign_storage::IntegralFixedLengthBoundsValidator< T >::validateValue(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue(), and foreign_storage::FloatPointValidator< T >::validateValue().

49  {
50  std::stringstream error_message;
51  error_message << "Parquet column contains values that are outside the range of the "
52  "HeavyDB column "
53  "type. Consider using a wider column type. Min allowed value: "
54  << min_value << ". Max allowed value: " << max_value
55  << ". Encountered value: " << encountered_value << ".";
56  throw std::runtime_error(error_message.str());
57 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_file_error ( const std::string &  file_path)
inline

Definition at line 73 of file ForeignStorageException.h.

Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

73  {
74  throw ForeignStorageException{
75  "Refresh of foreign table created with \"APPEND\" update type failed as "
76  "file \"" +
77  file_path + "\" was removed."};
78 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_file_error ( const std::string &  file_path)
inline

Definition at line 66 of file ForeignStorageException.h.

Referenced by foreign_storage::SingleTextFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

66  {
67  throw ForeignStorageException{
68  "Refresh of foreign table created with \"APPEND\" update type failed as file "
69  "reduced in size: \"" +
70  file_path + "\""};
71 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_result_set_error ( const std::string &  select_statement)
inline

Definition at line 58 of file ForeignStorageException.h.

58  {
59  throw ForeignStorageException{
60  "Refresh of foreign table created with \"APPEND\" update type failed as result set "
61  "of select statement "
62  "reduced in size: \"" +
63  select_statement + "\""};
64 }
void foreign_storage::throw_s3_compressed_extension ( const std::string &  file_path,
const std::string &  ext_type 
)
inline

Definition at line 107 of file ForeignStorageException.h.

108  {
109  throw ForeignStorageException{
110  "File \"" + file_path + "\" has extension type \"" + ext_type +
111  "\", compressed file formats are not supported by S3 Foreign Tables."};
112 }
void foreign_storage::throw_s3_compressed_mime_type ( const std::string &  file_path,
const std::string &  mime_type 
)
inline

Definition at line 100 of file ForeignStorageException.h.

101  {
102  throw ForeignStorageException{
103  "File \"" + file_path + "\" has mime type \"" + mime_type +
104  "\", compressed file formats are not supported by S3 Foreign Tables."};
105 }
void foreign_storage::throw_unexpected_number_of_items ( const size_t &  num_expected,
const size_t &  num_loaded,
const std::string &  item_type 
)
inline

Definition at line 40 of file ForeignStorageException.h.

References to_string().

Referenced by parse_file_regions(), and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_unexpected_number_of_items().

42  {
43  throw ForeignStorageException(
44  "Unexpected number of " + item_type +
45  " while loading from foreign data source: expected " +
46  std::to_string(num_expected) + " , obtained " + std::to_string(num_loaded) + " " +
47  item_type +
48  ". Please use the \"REFRESH FOREIGN TABLES\" command on the foreign table "
49  "if data source has been updated.");
50 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::update_delete_buffer ( const ParseBufferRequest &  request,
const ParseBufferResult &  result,
const foreign_storage::IterativeFileScanParameters file_scan_param,
const size_t  start_position_in_fragment 
)

Definition at line 685 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::IterativeFileScanParameters::delete_buffer, foreign_storage::IterativeFileScanParameters::delete_buffer_mutex, foreign_storage::ParseBufferRequest::processed_row_count, foreign_storage::ParseBufferResult::rejected_rows, and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer().

Referenced by populate_chunks().

689  {
690  if (file_scan_param.delete_buffer) {
691  std::unique_lock delete_buffer_lock(file_scan_param.delete_buffer_mutex);
692  auto& delete_buffer = file_scan_param.delete_buffer;
693  auto chunk_offset = start_position_in_fragment;
694  auto chunk_element_count = chunk_offset + request.processed_row_count;
695 
696  // ensure delete buffer is sized appropriately
697  resize_delete_buffer(delete_buffer, chunk_element_count);
698 
699  auto delete_buffer_data = delete_buffer->getMemoryPtr();
700  for (const auto rejected_row_index : result.rejected_rows) {
701  CHECK(rejected_row_index + chunk_offset < delete_buffer->size());
702  delete_buffer_data[rejected_row_index + chunk_offset] = true;
703  }
704  }
705 }
void resize_delete_buffer(AbstractBuffer *delete_buffer, const size_t chunk_element_count)
std::unique_lock< T > unique_lock
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::update_stats ( Encoder encoder,
const SQLTypeInfo column_type,
DataBlockPtr  data_block,
const size_t  row_count 
)

Updates the statistics metadata encapsulated in the encoder given new data in a data block.

Definition at line 570 of file AbstractTextFileDataWrapper.cpp.

References DataBlockPtr::arraysPtr, SQLTypeInfo::is_array(), SQLTypeInfo::is_varlen(), DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and Encoder::updateStats().

Referenced by process_data_blocks(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata(), and StorageIOFacility::yieldUpdateCallback().

573  {
574  if (column_type.is_array()) {
575  encoder->updateStats(data_block.arraysPtr, 0, row_count);
576  } else if (!column_type.is_varlen()) {
577  encoder->updateStats(data_block.numbersPtr, row_count);
578  } else {
579  encoder->updateStats(data_block.stringsPtr, 0, row_count);
580  }
581 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:224
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:225
bool is_varlen() const
Definition: sqltypes.h:620
int8_t * numbersPtr
Definition: sqltypes.h:223
virtual void updateStats(const int64_t val, const bool is_null)=0
bool is_array() const
Definition: sqltypes.h:588

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< parquet::Statistics > foreign_storage::validate_and_get_column_metadata_statistics ( const parquet::ColumnChunkMetaData *  column_metadata)

Definition at line 86 of file ParquetShared.cpp.

References CHECK.

Referenced by foreign_storage::ParquetEncoder::getRowGroupMetadata(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_definition_levels().

87  {
88  CHECK(column_metadata->is_stats_set());
89  std::shared_ptr<parquet::Statistics> stats = column_metadata->statistics();
90  return stats;
91 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void foreign_storage::validate_equal_column_descriptor ( const parquet::ColumnDescriptor *  reference_descriptor,
const parquet::ColumnDescriptor *  new_descriptor,
const std::string &  reference_file_path,
const std::string &  new_file_path 
)

Definition at line 60 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

64  {
65  if (!reference_descriptor->Equals(*new_descriptor)) {
66  throw std::runtime_error{"Parquet file \"" + new_file_path +
67  "\" has a different schema. Please ensure that all Parquet "
68  "files use the same schema. Reference Parquet file: " +
69  reference_file_path +
70  ", column name: " + reference_descriptor->name() +
71  ". New Parquet file: " + new_file_path +
72  ", column name: " + new_descriptor->name() + "."};
73  }
74 }

+ Here is the caller graph for this function:

void foreign_storage::validate_non_foreign_table_write ( const TableDescriptor table_descriptor)
inline

Definition at line 22 of file FsiUtils.h.

References StorageType::FOREIGN_TABLE, and TableDescriptor::storageType.

Referenced by Parser::InsertStmt::analyze(), Parser::TruncateTableStmt::execute(), Parser::InsertValuesStmt::execute(), Parser::InsertIntoTableAsSelectStmt::populateData(), and RelModify::RelModify().

22  {
23  if (table_descriptor && table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
24  throw std::runtime_error{
25  "DELETE, INSERT, TRUNCATE, OR UPDATE commands are not supported for foreign "
26  "tables."};
27  }
28 }
std::string storageType
static constexpr char const * FOREIGN_TABLE

+ Here is the caller graph for this function:

void foreign_storage::validate_regex_parser_options ( const import_export::CopyParams copy_params)

Definition at line 114 of file ForeignDataWrapperFactory.cpp.

References import_export::CopyParams::line_regex.

Referenced by anonymous_namespace{ForeignDataImporter.cpp}::validate_copy_params().

114  {
115  if (copy_params.line_regex.empty()) {
116  throw std::runtime_error{"Regex parser options must contain a line regex."};
117  }
118 }

+ Here is the caller graph for this function:

Variable Documentation