OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage Namespace Reference

Namespaces

 anonymous_namespace{AbstractFileStorageDataWrapper.cpp}
 
 anonymous_namespace{AbstractTextFileDataWrapper.cpp}
 
 anonymous_namespace{CachingForeignStorageMgr.cpp}
 
 anonymous_namespace{CsvFileBufferParser.cpp}
 
 anonymous_namespace{FileReader.cpp}
 
 anonymous_namespace{ForeignDataWrapperFactory.cpp}
 
 anonymous_namespace{ForeignStorageCache.cpp}
 
 anonymous_namespace{ForeignTableRefresh.cpp}
 
 anonymous_namespace{InternalCatalogDataWrapper.cpp}
 
 anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalStorageStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalSystemDataWrapper.cpp}
 
 anonymous_namespace{LazyParquetChunkLoader.cpp}
 
 anonymous_namespace{LogFileBufferParser.cpp}
 
 anonymous_namespace{ParquetDataWrapper.cpp}
 
 anonymous_namespace{RefreshTimeCalculator.cpp}
 
 anonymous_namespace{RegexFileBufferParser.cpp}
 
 anonymous_namespace{RegexParserDataWrapper.cpp}
 
 anonymous_namespace{S3FilePathUtil.cpp}
 
 anonymous_namespace{TextFileBufferParser.cpp}
 
 Csv
 
 json_utils
 

Classes

struct  ForeignServer
 
struct  ForeignTable
 
struct  OptionsContainer
 
struct  UserMappingType
 
struct  UserMapping
 
class  RefreshTimeCalculator
 
class  AbstractFileStorageDataWrapper
 
struct  ParseFileRegionResult
 
struct  MetadataScanMultiThreadingParams
 
struct  IterativeFileScanParameters
 
class  AbstractTextFileDataWrapper
 
class  CachingForeignStorageMgr
 
class  CsvDataWrapper
 
class  CsvFileBufferParser
 
struct  DataPreview
 
class  FileReader
 
class  SingleFileReader
 
class  SingleTextFileReader
 
class  ArchiveWrapper
 
class  CompressedFileReader
 
class  MultiFileReader
 
class  LocalMultiFileReader
 
struct  FileRegion
 
class  ForeignDataWrapper
 
struct  DataWrapperType
 Encapsulates an enumeration of foreign data wrapper type strings. More...
 
class  ForeignDataWrapperFactory
 
class  ForeignStorageBuffer
 
class  ForeignStorageCache
 
class  ForeignStorageException
 
class  MetadataScanInfeasibleFragmentSizeException
 
class  RequestedFragmentIdOutOfBoundsException
 
class  ChunkSizeValidator
 
class  MockForeignDataWrapper
 
class  ForeignStorageMgr
 
class  ForeignTableSchema
 
class  GeospatialEncoder
 
class  InternalCatalogDataWrapper
 
class  InternalLogsDataWrapper
 
class  InternalMemoryStatsDataWrapper
 
struct  StorageDetails
 
class  InternalStorageStatsDataWrapper
 
class  InternalSystemDataWrapper
 
struct  ColumnType
 
struct  FragmentType
 
struct  Interval
 
struct  ParquetBatchData
 
class  ParquetRowGroupReader
 
struct  PreviewContext
 
class  LazyParquetChunkLoader
 
class  LogFileBufferParser
 
class  OdbcGeospatialEncoder
 
class  ParquetArrayDetectEncoder
 
class  ParquetArrayEncoder
 
class  ParquetArrayImportEncoder
 
class  ParquetDataWrapper
 
class  ParquetDateInDaysFromTimestampEncoder
 
class  ParquetDateInSecondsEncoder
 
class  ParquetDecimalEncoder
 
class  ParquetDetectStringEncoder
 
class  ParquetEncoder
 
class  ParquetImportEncoder
 
class  ParquetScalarEncoder
 
class  ParquetFixedLengthArrayEncoder
 
class  ParquetFixedLengthEncoder
 
class  ParquetUnsignedFixedLengthEncoder
 
class  ParquetGeospatialEncoder
 
class  ParquetGeospatialImportEncoder
 
class  RowGroupIntervalTracker
 
class  ParquetImportBatchResult
 
class  AbstractRowGroupIntervalTracker
 
class  ParquetImporter
 
class  ParquetInPlaceEncoder
 
class  TypedParquetInPlaceEncoder
 
class  ParquetMetadataValidator
 
class  TimestampBoundsValidator
 
class  IntegralFixedLengthBoundsValidator
 
class  BaseDateBoundsValidator
 
class  FloatPointValidator
 
struct  RowGroupInterval
 
struct  RowGroupMetadata
 
class  FileReaderMap
 
class  ParquetStringEncoder
 
class  ParquetStringImportEncoder
 
class  ParquetStringNoneEncoder
 
class  ParquetTimeEncoder
 
class  ParquetTimestampEncoder
 
class  ParquetVariableLengthArrayEncoder
 
class  PassThroughBuffer
 
class  RegexFileBufferParser
 
class  RegexParserDataWrapper
 
class  FileOrderS3
 
struct  ParseBufferRequest
 
struct  ParseBufferResult
 
class  TextFileBufferParser
 
class  TypedParquetDetectBuffer
 
class  TypedParquetStorageBuffer
 
class  ForeignTableRefreshScheduler
 

Typedefs

using OptionsMap = std::map< std::string, std::string, std::less<>>
 
using SampleRows = std::vector< std::vector< std::string >>
 
using FirstLineByFilePath = std::map< std::string, std::string >
 
using FileRegions = std::vector< FileRegion >
 
using ChunkToBufferMap = std::map< ChunkKey, AbstractBuffer * >
 
using RenderGroupAnalyzerMap = std::map< int, std::unique_ptr< import_export::RenderGroupAnalyzer >>
 
using read_lock = heavyai::shared_lock< heavyai::shared_mutex >
 
using write_lock = heavyai::unique_lock< heavyai::shared_mutex >
 
using FilePathAndRowGroup = std::pair< std::string, int32_t >
 
using RejectedRowIndices = std::set< int64_t >
 
using InvalidRowGroupIndices = std::set< int64_t >
 
template<typename T >
using DateInSecondsBoundsValidator = BaseDateBoundsValidator< T, true >
 
template<typename T >
using DateInDaysBoundsValidator = BaseDateBoundsValidator< T, false >
 
using UniqueReaderPtr = std::unique_ptr< parquet::arrow::FileReader >
 
using ReaderPtr = parquet::arrow::FileReader *
 
template<typename V , typename T , T conversion_denominator, typename NullType = V>
using ParquetDateInSecondsFromTimestampEncoder = ParquetTimestampEncoder< V, T, conversion_denominator, NullType >
 
using S3ObjectComparator = std::function< bool(const Aws::S3::Model::Object &, const Aws::S3::Model::Object &)>
 

Functions

size_t get_num_threads (const ForeignTable &table)
 
ParseFileRegionResult parse_file_regions (const FileRegions &file_regions, const size_t start_index, const size_t end_index, FileReader &file_reader, ParseBufferRequest &parse_file_request, const std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, const TextFileBufferParser &parser)
 
size_t num_rows_to_process (const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
 
std::vector< size_t > partition_by_fragment (const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
 
std::optional< ParseBufferRequestget_next_scan_request (MetadataScanMultiThreadingParams &multi_threading_params)
 
void add_file_region (std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
 
void update_stats (Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
 
void cache_blocks (std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)
 
void append_data_block_to_chunk (const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const ChunkKey &chunk_key, const ColumnDescriptor *column, const std::set< size_t > &rejected_row_indices, const size_t element_count_required)
 
void populate_chunks_using_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param)
 
void process_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
 
void add_request_to_pool (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void scan_metadata (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser)
 
ParseBufferRequest get_request_from_pool (MetadataScanMultiThreadingParams &multi_threading_params)
 
void defer_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void dispatch_all_deferred_requests (MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void populate_chunks (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser, foreign_storage::IterativeFileScanParameters &file_scan_param)
 
void resize_buffer_if_needed (std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
 
void reset_multithreading_params (foreign_storage::MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_scan_requests (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call)
 
void dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call)
 
void set_value (rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, FileRegion &file_region)
 
std::optional< SQLTypesdetect_geo_type (const SampleRows &sample_rows, size_t column_index)
 
bool is_s3_uri (const std::string &file_path)
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
 Create proxy fsi objects for use outside FSI. More...
 
std::tuple< std::unique_ptr
< foreign_storage::ForeignServer >
, std::unique_ptr
< foreign_storage::UserMapping >
, std::unique_ptr
< foreign_storage::ForeignTable > > 
create_proxy_fsi_objects (const std::string &copy_from_source, const import_export::CopyParams &copy_params, const TableDescriptor *table)
 Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above. More...
 
void validate_regex_parser_options (const import_export::CopyParams &copy_params)
 
bool is_valid_source_type (const import_export::CopyParams &copy_params)
 
std::string bool_to_option_value (const bool value)
 
void throw_unexpected_number_of_items (const size_t &num_expected, const size_t &num_loaded, const std::string &item_type)
 
void throw_removed_row_in_result_set_error (const std::string &select_statement)
 
void throw_removed_row_in_file_error (const std::string &file_path)
 
void throw_removed_file_error (const std::string &file_path)
 
void throw_number_of_columns_mismatch_error (size_t num_table_cols, size_t num_file_cols, const std::string &file_path)
 
void throw_file_access_error (const std::string &file_path, const std::string &message)
 
void throw_file_not_found_error (const std::string &file_path)
 
void throw_s3_compressed_mime_type (const std::string &file_path, const std::string &mime_type)
 
void throw_s3_compressed_extension (const std::string &file_path, const std::string &ext_type)
 
bool is_append_table_chunk_key (const ChunkKey &chunk_key)
 
bool set_comp (const ChunkKey &left, const ChunkKey &right)
 
std::vector< ChunkKeyget_column_key_vec (const ChunkKey &destination_chunk_key)
 
std::set< ChunkKeyget_column_key_set (const ChunkKey &destination_chunk_key)
 
size_t get_max_chunk_size (const ChunkKey &key)
 
bool contains_fragment_key (const std::set< ChunkKey > &key_set, const ChunkKey &target_key)
 
bool is_table_enabled_on_node (const ChunkKey &key)
 
void refresh_foreign_table_unlocked (Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
 
void refresh_foreign_table (Catalog_Namespace::Catalog &catalog, const std::string &table_name, const bool evict_cached_entries)
 
void init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const SQLTypeInfo &type, size_t num_elements)
 
const
foreign_storage::ForeignTable
get_foreign_table_for_key (const ChunkKey &key)
 
bool is_system_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_replicated_table_chunk_key (const ChunkKey &chunk_key)
 
bool is_shardable_key (const ChunkKey &key)
 
bool fragment_maps_to_leaf (const ChunkKey &key)
 
bool key_does_not_shard_to_leaf (const ChunkKey &key)
 
template<typename T >
auto partition_for_threads (const std::set< T > &items, size_t max_threads)
 
template<typename T >
auto partition_for_threads (const std::vector< T > &items, size_t max_threads)
 
template<typename Container >
std::vector< std::future< void > > create_futures_for_workers (const Container &items, size_t max_threads, std::function< void(const Container &)> lambda)
 
template<typename T >
ArrayDatum encode_as_array_datum (const std::vector< T > &data)
 
std::string get_db_name (int32_t db_id)
 
std::string get_table_name (int32_t db_id, int32_t table_id)
 
void set_node_name (std::map< std::string, import_export::TypedImportBuffer * > &import_buffers)
 
void set_value (rapidjson::Value &json_val, const RowGroupInterval &value, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, RowGroupInterval &value)
 
template<typename D , typename T >
bool check_bounds (const T &value)
 
template<typename D >
std::string datetime_to_string (const D &timestamp, const SQLTypeInfo &column_type)
 
void throw_parquet_metadata_out_of_bounds_error (const std::string &min_value, const std::string &max_value, const std::string &encountered_value)
 
UniqueReaderPtr open_parquet_table (const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
 
std::pair< int, int > get_parquet_table_size (const ReaderPtr &reader)
 
const parquet::ColumnDescriptor * get_column_descriptor (const parquet::arrow::FileReader *reader, const int logical_column_index)
 
parquet::Type::type get_physical_type (ReaderPtr &reader, const int logical_column_index)
 
void validate_equal_column_descriptor (const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
 
std::unique_ptr< ColumnDescriptorget_sub_type_column_descriptor (const ColumnDescriptor *column)
 
std::shared_ptr
< parquet::Statistics > 
validate_and_get_column_metadata_statistics (const parquet::ColumnChunkMetaData *column_metadata)
 
std::vector
< Aws::S3::Model::Object > 
s3_objects_filter_sort_files (const std::vector< Aws::S3::Model::Object > &file_paths, const shared::FilePathOptions &options)
 
template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
get_null_value ()
 
template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > get_min_max_bounds ()
 
void populate_string_dictionary (const int32_t table_id, const int32_t col_id, const Catalog_Namespace::Catalog &catalog)
 
void validate_non_foreign_table_write (const TableDescriptor *table_descriptor)
 

Variables

constexpr const char * kDeletedValueIndicator {"<DELETED>"}
 

Typedef Documentation

Definition at line 34 of file ForeignDataWrapper.h.

Definition at line 277 of file ParquetMetadataValidator.h.

Definition at line 274 of file ParquetMetadataValidator.h.

using foreign_storage::FilePathAndRowGroup = typedef std::pair<std::string, int32_t>

Definition at line 40 of file ParquetDataWrapper.h.

using foreign_storage::FileRegions = typedef std::vector<FileRegion>

Definition at line 60 of file FileRegions.h.

using foreign_storage::FirstLineByFilePath = typedef std::map<std::string, std::string>

Definition at line 37 of file FileReader.h.

using foreign_storage::InvalidRowGroupIndices = typedef std::set<int64_t>

Definition at line 123 of file ParquetEncoder.h.

using foreign_storage::OptionsMap = typedef std::map<std::string, std::string, std::less<>>

Definition at line 30 of file OptionsContainer.h.

template<typename V , typename T , T conversion_denominator, typename NullType = V>
using foreign_storage::ParquetDateInSecondsFromTimestampEncoder = typedef ParquetTimestampEncoder<V, T, conversion_denominator, NullType>

Definition at line 89 of file ParquetTimestampEncoder.h.

using foreign_storage::ReaderPtr = typedef parquet::arrow::FileReader*

Definition at line 33 of file ParquetShared.h.

using foreign_storage::RejectedRowIndices = typedef std::set<int64_t>

Definition at line 28 of file ParquetEncoder.h.

using foreign_storage::RenderGroupAnalyzerMap = typedef std::map<int, std::unique_ptr<import_export::RenderGroupAnalyzer>>

Definition at line 37 of file ForeignDataWrapper.h.

using foreign_storage::S3ObjectComparator = typedef std::function<bool(const Aws::S3::Model::Object&, const Aws::S3::Model::Object&)>

Definition at line 17 of file S3FilePathUtil.h.

using foreign_storage::SampleRows = typedef std::vector<std::vector<std::string>>

Definition at line 26 of file DataPreview.h.

using foreign_storage::UniqueReaderPtr = typedef std::unique_ptr<parquet::arrow::FileReader>

Definition at line 32 of file ParquetShared.h.

Function Documentation

void foreign_storage::add_file_region ( std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
int  fragment_id,
size_t  first_row_index,
const ParseBufferResult &  result,
const std::string &  file_path 
)

Creates a new file region based on metadata from parsed file buffers and adds the new region to the fragment id to file regions map.

Definition at line 549 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferResult::row_count, and foreign_storage::ParseBufferResult::row_offsets.

Referenced by populate_chunks_using_data_blocks(), and process_data_blocks().

553  {
554  fragment_id_to_file_regions_map[fragment_id].emplace_back(
555  // file naming is handled by FileReader
556  FileRegion(file_path,
557  result.row_offsets.front(),
558  first_row_index,
559  result.row_count,
560  result.row_offsets.back() - result.row_offsets.front()));
561 }

+ Here is the caller graph for this function:

void foreign_storage::add_request_to_pool ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Adds the request object for a processed request back to the request pool for reuse in subsequent requests.

Definition at line 786 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests(), populate_chunks(), and scan_metadata().

787  {
788  std::unique_lock<std::mutex> completed_requests_queue_lock(
789  multi_threading_params.request_pool_mutex);
790  multi_threading_params.request_pool.emplace(std::move(request));
791  completed_requests_queue_lock.unlock();
792  multi_threading_params.request_pool_condition.notify_all();
793 }

+ Here is the caller graph for this function:

void foreign_storage::append_data_block_to_chunk ( const foreign_storage::IterativeFileScanParameters file_scan_param,
DataBlockPtr  data_block,
size_t  row_count,
const ChunkKey chunk_key,
const ColumnDescriptor column,
const std::set< size_t > &  rejected_row_indices,
const size_t  element_count_required 
)

Definition at line 636 of file AbstractTextFileDataWrapper.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, foreign_storage::IterativeFileScanParameters::column_id_to_chunk_map, foreign_storage::IterativeFileScanParameters::delete_buffer, foreign_storage::IterativeFileScanParameters::delete_buffer_mutex, shared::get_from_map(), foreign_storage::IterativeFileScanParameters::getChunkConditionalVariable(), foreign_storage::IterativeFileScanParameters::getChunkMutex(), and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer().

Referenced by populate_chunks_using_data_blocks().

643  {
644  auto chunk = shared::get_from_map(file_scan_param.column_id_to_chunk_map,
645  chunk_key[CHUNK_KEY_COLUMN_IDX]);
646 
647  auto& conditional_variable =
648  file_scan_param.getChunkConditionalVariable(chunk_key[CHUNK_KEY_COLUMN_IDX]);
649  {
650  std::unique_lock<std::mutex> chunk_lock(
651  file_scan_param.getChunkMutex(chunk_key[CHUNK_KEY_COLUMN_IDX]));
652  conditional_variable.wait(chunk_lock, [element_count_required, &chunk]() {
653  return chunk.getBuffer()->getEncoder()->getNumElems() == element_count_required;
654  });
655 
656  chunk.appendData(data_block, row_count, 0);
657  }
658 
659  conditional_variable
660  .notify_all(); // notify any threads waiting on the correct element count
661 
662  if (file_scan_param.delete_buffer) {
663  std::unique_lock delete_buffer_lock(file_scan_param.delete_buffer_mutex);
664  auto& delete_buffer = file_scan_param.delete_buffer;
665  auto chunk_offset = element_count_required;
666  auto chunk_element_count = chunk_offset + row_count;
667 
668  // ensure delete buffer is sized appropriately
669  resize_delete_buffer(delete_buffer, chunk_element_count);
670 
671  auto delete_buffer_data = delete_buffer->getMemoryPtr();
672  for (const auto rejected_row_index : rejected_row_indices) {
673  CHECK(rejected_row_index + chunk_offset < delete_buffer->size());
674  delete_buffer_data[rejected_row_index + chunk_offset] = true;
675  }
676  }
677 }
void resize_delete_buffer(AbstractBuffer *delete_buffer, const size_t chunk_element_count)
std::condition_variable & getChunkConditionalVariable(const int col_id) const
std::map< int, Chunk_NS::Chunk > & column_id_to_chunk_map
std::unique_lock< T > unique_lock
std::mutex & getChunkMutex(const int col_id) const
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61
#define CHECK(condition)
Definition: Logger.h:222
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::bool_to_option_value ( const bool  value)

Definition at line 133 of file ForeignDataWrapperFactory.cpp.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy().

133  {
134  return value ? "TRUE" : "FALSE";
135 }

+ Here is the caller graph for this function:

void foreign_storage::cache_blocks ( std::map< ChunkKey, Chunk_NS::Chunk > &  cached_chunks,
DataBlockPtr  data_block,
size_t  row_count,
ChunkKey chunk_key,
const ColumnDescriptor column,
bool  is_first_block,
bool  disable_cache 
)

Definition at line 595 of file AbstractTextFileDataWrapper.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_cache_if_enabled(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), SQLTypeInfo::is_varlen_indeed(), and key_does_not_shard_to_leaf().

Referenced by process_data_blocks().

601  {
602  auto catalog =
604  CHECK(catalog);
605  auto cache = get_cache_if_enabled(catalog, disable_cache);
606  if (cache) {
607  // This extra filter needs to be here because this wrapper is the only one that
608  // accesses the cache directly and it should not be inserting chunks which are not
609  // mapped to the current leaf (in distributed mode).
610  if (key_does_not_shard_to_leaf(chunk_key)) {
611  return;
612  }
613 
614  ChunkKey index_key = {chunk_key[CHUNK_KEY_DB_IDX],
615  chunk_key[CHUNK_KEY_TABLE_IDX],
616  chunk_key[CHUNK_KEY_COLUMN_IDX],
617  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
618  2};
619  // Create actual data chunks to prepopulate cache
620  if (cached_chunks.find(chunk_key) == cached_chunks.end()) {
621  cached_chunks[chunk_key] = Chunk_NS::Chunk{column, false};
622  cached_chunks[chunk_key].setBuffer(
623  cache->getChunkBufferForPrecaching(chunk_key, is_first_block));
624  if (column->columnType.is_varlen_indeed()) {
625  cached_chunks[chunk_key].setIndexBuffer(
626  cache->getChunkBufferForPrecaching(index_key, is_first_block));
627  }
628  if (is_first_block) {
629  cached_chunks[chunk_key].initEncoder();
630  }
631  }
632  cached_chunks[chunk_key].appendData(data_block, row_count, 0);
633  }
634 }
std::vector< int > ChunkKey
Definition: types.h:36
foreign_storage::ForeignStorageCache * get_cache_if_enabled(std::shared_ptr< Catalog_Namespace::Catalog > &catalog, const bool disable_cache)
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:341
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:222
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:621
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename D , typename T >
bool foreign_storage::check_bounds ( const T &  value)
inline

Definition at line 32 of file ParquetMetadataValidator.h.

32  {
33  auto [min_value, max_value] = get_min_max_bounds<D>();
34  return value >= min_value && value <= max_value;
35 }
bool foreign_storage::contains_fragment_key ( const std::set< ChunkKey > &  key_set,
const ChunkKey target_key 
)
template<typename Container >
std::vector<std::future<void> > foreign_storage::create_futures_for_workers ( const Container &  items,
size_t  max_threads,
std::function< void(const Container &)>  lambda 
)

Definition at line 74 of file FsiChunkUtils.h.

References threading_serial::async(), and partition_for_threads().

Referenced by import_export::ForeignDataImporter::importGeneralS3(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

77  {
78  auto items_per_thread = partition_for_threads(items, max_threads);
79  std::vector<std::future<void>> futures;
80  for (const auto& items : items_per_thread) {
81  futures.emplace_back(std::async(std::launch::async, lambda, items));
82  }
83 
84  return futures;
85 }
auto partition_for_threads(const std::set< T > &items, size_t max_threads)
Definition: FsiChunkUtils.h:41
future< Result > async(Fn &&fn, Args &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const int  db_id,
const TableDescriptor table,
const int32_t  user_id 
)

Create proxy fsi objects for use outside FSI.

Parameters
copy_from_source- the source that will be copied
copy_params- CopyParams that specify parameters around use case
db_id- db id of database in use case
table- the table descriptor of the table in use case
user_id- the user id of user in use case
Returns
tuple of FSI objects that can be used for particular use case (for example import or detect)

Definition at line 63 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and foreign_storage::ForeignDataWrapperFactory::createUserMappingProxyIfApplicable().

Referenced by create_proxy_fsi_objects(), and import_export::ForeignDataImporter::importGeneral().

67  {
69  db_id, user_id, copy_from_source, copy_params);
70 
71  CHECK(server);
72  server->validate();
73 
74  auto user_mapping =
76  db_id, user_id, copy_from_source, copy_params, server.get());
77 
78  if (user_mapping) {
79  user_mapping->validate(server.get());
80  }
81 
82  auto foreign_table =
84  db_id, table, copy_from_source, copy_params, server.get());
85 
86  CHECK(foreign_table);
87  foreign_table->validateOptionValues();
88 
89  return {std::move(server), std::move(user_mapping), std::move(foreign_table)};
90 }
static std::unique_ptr< ForeignServer > createForeignServerProxy(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params)
static std::unique_ptr< ForeignTable > createForeignTableProxy(const int db_id, const TableDescriptor *table, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< UserMapping > createUserMappingProxyIfApplicable(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects ( const std::string &  copy_from_source,
const import_export::CopyParams copy_params,
const TableDescriptor table 
)

Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above.

Definition at line 95 of file ForeignDataWrapperFactory.cpp.

References create_proxy_fsi_objects().

97  {
98  return create_proxy_fsi_objects(copy_from_source, copy_params, -1, table, -1);
99 }
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > create_proxy_fsi_objects(const std::string &copy_from_source, const import_export::CopyParams &copy_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
Create proxy fsi objects for use outside FSI.

+ Here is the call graph for this function:

template<typename D >
std::string foreign_storage::datetime_to_string ( const D &  timestamp,
const SQLTypeInfo column_type 
)
inline

Definition at line 38 of file ParquetMetadataValidator.h.

References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfo::is_date(), and SQLTypeInfo::is_timestamp().

Referenced by foreign_storage::TimestampBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::getMinMaxBoundsAsStrings(), foreign_storage::TimestampBoundsValidator< T >::validateValue(), and foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue().

39  {
40  CHECK(column_type.is_timestamp() || column_type.is_date());
41  Datum d;
42  d.bigintval = timestamp;
43  return DatumToString(d, column_type);
44 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:458
bool is_timestamp() const
Definition: sqltypes.h:995
int64_t bigintval
Definition: Datum.h:49
#define CHECK(condition)
Definition: Logger.h:222
Definition: Datum.h:44
bool is_date() const
Definition: sqltypes.h:983

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::defer_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Definition at line 873 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, and foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex.

Referenced by populate_chunks().

874  {
875  std::unique_lock<std::mutex> deferred_requests_lock(
876  multi_threading_params.deferred_requests_mutex);
877  multi_threading_params.deferred_requests.emplace(std::move(request));
878 }

+ Here is the caller graph for this function:

std::optional< SQLTypes > foreign_storage::detect_geo_type ( const SampleRows &  sample_rows,
size_t  column_index 
)

Definition at line 22 of file DataPreview.cpp.

References CHECK_EQ, CHECK_LT, kLINESTRING, kMULTIPOLYGON, kNULLT, kPOINT, kPOLYGON, and UNREACHABLE.

Referenced by foreign_storage::LazyParquetChunkLoader::previewFiles().

23  {
24  std::optional<SQLTypes> tentative_geo_type{};
25  for (const auto& row : sample_rows) {
26  static std::regex geo_regex{
27  "\\s*(POINT|LINESTRING|POLYGON|MULTIPOLYGON)\\s*\\(.+\\)\\s*"};
28  std::smatch match;
29  CHECK_LT(column_index, row.size());
30  if (std::regex_match(row[column_index], match, geo_regex)) {
31  CHECK_EQ(match.size(), static_cast<size_t>(2));
32  SQLTypes geo_type{kNULLT};
33  const auto& geo_type_str = match[1];
34  if (geo_type_str == "POINT") {
35  geo_type = kPOINT;
36  } else if (geo_type_str == "LINESTRING") {
37  geo_type = kLINESTRING;
38  } else if (geo_type_str == "POLYGON") {
39  geo_type = kPOLYGON;
40  } else if (geo_type_str == "MULTIPOLYGON") {
41  geo_type = kMULTIPOLYGON;
42  } else {
43  UNREACHABLE() << "Unexpected geo type match: " << geo_type_str;
44  }
45  if (tentative_geo_type.has_value()) {
46  if (tentative_geo_type.value() != geo_type) {
47  return {}; // geo type does not match between rows, can not be imported
48  }
49  } else {
50  tentative_geo_type = geo_type;
51  }
52  }
53  }
54  return tentative_geo_type;
55 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
SQLTypes
Definition: sqltypes.h:53
#define UNREACHABLE()
Definition: Logger.h:266
#define CHECK_LT(x, y)
Definition: Logger.h:232

+ Here is the caller graph for this function:

void foreign_storage::dispatch_all_deferred_requests ( MetadataScanMultiThreadingParams &  multi_threading_params)

Definition at line 883 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

884  {
885  std::unique_lock<std::mutex> deferred_requests_lock(
886  multi_threading_params.deferred_requests_mutex);
887  {
888  std::unique_lock<std::mutex> pending_requests_lock(
889  multi_threading_params.pending_requests_mutex);
890 
891  while (!multi_threading_params.deferred_requests.empty()) {
892  auto& request = multi_threading_params.deferred_requests.front();
893  multi_threading_params.pending_requests.emplace(std::move(request));
894  multi_threading_params.deferred_requests.pop();
895  }
896  multi_threading_params.pending_requests_condition.notify_all();
897  }
898 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Dispatches a new metadata scan request by adding the request to the pending requests queue to be consumed by a worker thread.

Definition at line 904 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_scan_requests().

905  {
906  {
907  std::unique_lock<std::mutex> pending_requests_lock(
908  multi_threading_params.pending_requests_mutex);
909  multi_threading_params.pending_requests.emplace(std::move(request));
910  }
911  multi_threading_params.pending_requests_condition.notify_all();
912 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call 
)

Reads from a text file iteratively and dispatches metadata scan requests that are processed by worker threads.

Definition at line 1010 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::alloc_size, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_all_deferred_requests(), dispatch_scan_request(), foreign_storage::TextFileBufferParser::findRowEndPosition(), foreign_storage::IterativeFileScanParameters::fragment_id, get_request_from_pool(), foreign_storage::FileReader::getCurrentFilePath(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, TableDescriptor::maxFragRows, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::no_deferred_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::FileReader::read(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_alloc_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_data, and resize_buffer_if_needed().

Referenced by dispatch_scan_requests_with_exception_handling().

1023  {
1024  auto& alloc_size = iterative_residual_buffer.alloc_size;
1025  auto& residual_buffer = iterative_residual_buffer.residual_data;
1026  auto& residual_buffer_size = iterative_residual_buffer.residual_buffer_size;
1027  auto& residual_buffer_alloc_size = iterative_residual_buffer.residual_buffer_alloc_size;
1028 
1029  if (is_first_file_scan_call) {
1030  alloc_size = buffer_size;
1031  residual_buffer = std::make_unique<char[]>(alloc_size);
1032  residual_buffer_size = 0;
1033  residual_buffer_alloc_size = alloc_size;
1034  } else if (!no_deferred_requests(multi_threading_params)) {
1035  dispatch_all_deferred_requests(multi_threading_params);
1036  }
1037 
1038  while (!file_reader.isScanFinished()) {
1039  {
1040  std::lock_guard<std::mutex> pending_requests_lock(
1041  multi_threading_params.pending_requests_mutex);
1042  if (!multi_threading_params.continue_processing) {
1043  break;
1044  }
1045  }
1046  auto request = get_request_from_pool(multi_threading_params);
1047  request.full_path = file_reader.getCurrentFilePath();
1048  resize_buffer_if_needed(request.buffer, request.buffer_alloc_size, alloc_size);
1049 
1050  if (residual_buffer_size > 0) {
1051  memcpy(request.buffer.get(), residual_buffer.get(), residual_buffer_size);
1052  }
1053  size_t size = residual_buffer_size;
1054  size += file_reader.read(request.buffer.get() + residual_buffer_size,
1055  alloc_size - residual_buffer_size);
1056 
1057  if (size == 0) {
1058  // In some cases at the end of a file we will read 0 bytes even when
1059  // file_reader.isScanFinished() is false. Also add request back to the pool to be
1060  // picked up again in the next iteration.
1061  add_request_to_pool(multi_threading_params, request);
1062  continue;
1063  } else if (size == 1 && request.buffer[0] == copy_params.line_delim) {
1064  // In some cases files with newlines at the end will be encoded with a second
1065  // newline that can end up being the only thing in the buffer. Also add request
1066  // back to the pool to be picked up again in the next iteration.
1067  current_file_offset++;
1068  add_request_to_pool(multi_threading_params, request);
1069  continue;
1070  }
1071  unsigned int num_rows_in_buffer = 0;
1072  request.end_pos = parser.findRowEndPosition(alloc_size,
1073  request.buffer,
1074  size,
1075  copy_params,
1076  first_row_index_in_buffer,
1077  num_rows_in_buffer,
1078  &file_reader);
1079  request.buffer_size = size;
1080  request.buffer_alloc_size = alloc_size;
1081  request.first_row_index = first_row_index_in_buffer;
1082  request.file_offset = current_file_offset;
1083  request.buffer_row_count = num_rows_in_buffer;
1084  request.processed_row_count = 0;
1085  request.begin_pos = 0;
1086 
1087  residual_buffer_size = size - request.end_pos;
1088  if (residual_buffer_size > 0) {
1089  resize_buffer_if_needed(residual_buffer, residual_buffer_alloc_size, alloc_size);
1090  memcpy(residual_buffer.get(),
1091  request.buffer.get() + request.end_pos,
1092  residual_buffer_size);
1093  }
1094 
1095  current_file_offset += request.end_pos;
1096  first_row_index_in_buffer += num_rows_in_buffer;
1097 
1098  if (num_rows_in_buffer > 0) {
1099  dispatch_scan_request(multi_threading_params, request);
1100  } else {
1101  add_request_to_pool(multi_threading_params, request);
1102  }
1103 
1104  if (file_scan_param) {
1105  const int32_t last_fragment_index =
1106  (first_row_index_in_buffer) / table->maxFragRows;
1107  if (last_fragment_index > file_scan_param->fragment_id) {
1108  break;
1109  }
1110  }
1111  }
1112 
1113  std::unique_lock<std::mutex> pending_requests_queue_lock(
1114  multi_threading_params.pending_requests_mutex);
1115  multi_threading_params.pending_requests_condition.wait(
1116  pending_requests_queue_lock, [&multi_threading_params] {
1117  return multi_threading_params.pending_requests.empty() ||
1118  (multi_threading_params.continue_processing == false);
1119  });
1120  multi_threading_params.continue_processing = false;
1121  pending_requests_queue_lock.unlock();
1122  multi_threading_params.pending_requests_condition.notify_all();
1123 }
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
bool no_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
void dispatch_all_deferred_requests(MetadataScanMultiThreadingParams &multi_threading_params)
ParseBufferRequest get_request_from_pool(MetadataScanMultiThreadingParams &multi_threading_params)
void resize_buffer_if_needed(std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
void dispatch_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_scan_requests_with_exception_handling ( const foreign_storage::ForeignTable table,
const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser,
const foreign_storage::IterativeFileScanParameters file_scan_param,
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer iterative_residual_buffer,
const bool  is_first_file_scan_call 
)

Definition at line 1125 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_scan_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan(), and foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

1138  {
1139  try {
1140  dispatch_scan_requests(table,
1141  buffer_size,
1142  file_path,
1143  file_reader,
1144  copy_params,
1145  multi_threading_params,
1146  first_row_index_in_buffer,
1147  current_file_offset,
1148  parser,
1149  file_scan_param,
1150  iterative_residual_buffer,
1151  is_first_file_scan_call);
1152  } catch (...) {
1153  {
1154  std::unique_lock<std::mutex> pending_requests_lock(
1155  multi_threading_params.pending_requests_mutex);
1156  multi_threading_params.continue_processing = false;
1157  }
1158  multi_threading_params.pending_requests_condition.notify_all();
1159  throw;
1160  }
1161 }
void dispatch_scan_requests(const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
ArrayDatum foreign_storage::encode_as_array_datum ( const std::vector< T > &  data)
inline

Definition at line 33 of file GeospatialEncoder.h.

References heavydb.dtypes::T.

Referenced by foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

33  {
34  const size_t num_bytes = data.size() * sizeof(T);
35  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
36  memcpy(buffer.get(), data.data(), num_bytes);
37  return ArrayDatum(num_bytes, buffer, false);
38 }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217

+ Here is the caller graph for this function:

bool foreign_storage::fragment_maps_to_leaf ( const ChunkKey key)

Definition at line 128 of file FsiChunkUtils.cpp.

References CHECK, g_distributed_leaf_idx, g_distributed_num_leaves, get_fragment(), dist::is_aggregator(), and dist::is_distributed().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

128  {
133 }
int get_fragment(const ChunkKey &key)
Definition: types.h:52
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
bool is_aggregator()
Definition: distributed.cpp:33
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
#define CHECK(condition)
Definition: Logger.h:222
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const parquet::ColumnDescriptor * foreign_storage::get_column_descriptor ( const parquet::arrow::FileReader *  reader,
const int  logical_column_index 
)

Definition at line 46 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

48  {
49  return reader->parquet_reader()->metadata()->schema()->Column(logical_column_index);
50 }

+ Here is the caller graph for this function:

std::set<ChunkKey> foreign_storage::get_column_key_set ( const ChunkKey destination_chunk_key)

Referenced by foreign_storage::CachingForeignStorageMgr::fetchBuffer(), foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit(), and foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize().

+ Here is the caller graph for this function:

std::vector<ChunkKey> foreign_storage::get_column_key_vec ( const ChunkKey destination_chunk_key)
std::string foreign_storage::get_db_name ( int32_t  db_id)

Definition at line 31 of file InternalSystemDataWrapper.cpp.

References Catalog_Namespace::DBMetadata::dbName, Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_dashboards(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_permissions(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_tables(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_users(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

31  {
33  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
34  if (sys_catalog.getMetadataForDBById(db_id, db_metadata)) {
35  return db_metadata.dbName;
36  } else {
37  // Database has been deleted.
39  }
40 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:341

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const foreign_storage::ForeignTable & foreign_storage::get_foreign_table_for_key ( const ChunkKey key)

Definition at line 101 of file FsiChunkUtils.cpp.

References CHECK, get_table_prefix(), Catalog_Namespace::SysCatalog::getCatalog(), and Catalog_Namespace::SysCatalog::instance().

Referenced by is_append_table_chunk_key(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

101  {
102  auto [db_id, tb_id] = get_table_prefix(key);
103  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
104  CHECK(catalog);
105  auto table = catalog->getForeignTable(tb_id);
106  CHECK(table);
107  return *table;
108 }
static SysCatalog & instance()
Definition: SysCatalog.h:341
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::get_max_chunk_size ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getBufferSize().

+ Here is the caller graph for this function:

template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > foreign_storage::get_min_max_bounds ( )
inline

Definition at line 31 of file SharedMetadataValidator.h.

31  {
32  static_assert(std::is_signed<D>::value,
33  "'get_min_max_bounds' is only valid for signed types");
34  return {get_null_value<D>() + 1, std::numeric_limits<D>::max()};
35 }
std::optional<ParseBufferRequest> foreign_storage::get_next_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets the next metadata scan request object from the pending requests queue. A null optional is returned if there are no further requests to be processed.

Definition at line 526 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by populate_chunks(), and scan_metadata().

527  {
528  std::unique_lock<std::mutex> pending_requests_lock(
529  multi_threading_params.pending_requests_mutex);
530  multi_threading_params.pending_requests_condition.wait(
531  pending_requests_lock, [&multi_threading_params] {
532  return !multi_threading_params.pending_requests.empty() ||
533  !multi_threading_params.continue_processing;
534  });
535  if (multi_threading_params.pending_requests.empty()) {
536  return {};
537  }
538  auto request = std::move(multi_threading_params.pending_requests.front());
539  multi_threading_params.pending_requests.pop();
540  pending_requests_lock.unlock();
541  multi_threading_params.pending_requests_condition.notify_all();
542  return std::move(request);
543 }

+ Here is the caller graph for this function:

template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
V foreign_storage::get_null_value ( )
inline

Definition at line 21 of file SharedMetadataValidator.h.

21  {
22  return inline_int_null_value<V>();
23 }
size_t foreign_storage::get_num_threads ( const ForeignTable &  table)

Definition at line 17 of file AbstractFileStorageDataWrapper.cpp.

References foreign_storage::OptionsContainer::getOption(), import_export::num_import_threads(), and foreign_storage::AbstractFileStorageDataWrapper::THREADS_KEY.

Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().

17  {
18  auto num_threads = 0;
19  if (auto opt = table.getOption(AbstractFileStorageDataWrapper::THREADS_KEY);
20  opt.has_value()) {
21  num_threads = std::stoi(opt.value());
22  }
23  return import_export::num_import_threads(num_threads);
24 }
size_t num_import_threads(const int32_t copy_params_threads)
Definition: thread_count.h:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< int, int > foreign_storage::get_parquet_table_size ( const ReaderPtr &  reader)

Definition at line 39 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), foreign_storage::LazyParquetChunkLoader::loadRowGroups(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

39  {
40  auto file_metadata = reader->parquet_reader()->metadata();
41  const auto num_row_groups = file_metadata->num_row_groups();
42  const auto num_columns = file_metadata->num_columns();
43  return std::make_pair(num_row_groups, num_columns);
44 }

+ Here is the caller graph for this function:

parquet::Type::type foreign_storage::get_physical_type ( ReaderPtr &  reader,
const int  logical_column_index 
)

Definition at line 52 of file ParquetShared.cpp.

Referenced by anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

52  {
53  return reader->parquet_reader()
54  ->metadata()
55  ->schema()
56  ->Column(logical_column_index)
57  ->physical_type();
58 }

+ Here is the caller graph for this function:

std::shared_ptr< ChunkMetadata > foreign_storage::get_placeholder_metadata ( const SQLTypeInfo type,
size_t  num_elements 
)

Definition at line 77 of file FsiChunkUtils.cpp.

References SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::InternalSystemDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::updateRolledOffChunks().

78  {
79  ForeignStorageBuffer empty_buffer;
80  // Use default encoder metadata as in parquet wrapper
81  empty_buffer.initEncoder(type);
82 
83  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(type);
84  chunk_metadata->numElements = num_elements;
85 
86  if (!type.is_varlen_indeed()) {
87  chunk_metadata->numBytes = type.get_size() * num_elements;
88  }
89  // min/max not set by default for arrays, so get from elem type encoder
90  if (type.is_array()) {
91  ForeignStorageBuffer scalar_buffer;
92  scalar_buffer.initEncoder(type.get_elem_type());
93  auto scalar_metadata = scalar_buffer.getEncoder()->getMetadata(type.get_elem_type());
94  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
95  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
96  }
97 
98  return chunk_metadata;
99 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231
bool is_varlen_indeed() const
Definition: sqltypes.h:621
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956
bool is_array() const
Definition: sqltypes.h:583

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ParseBufferRequest foreign_storage::get_request_from_pool ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets a request from the metadata scan request pool.

Definition at line 854 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_scan_requests().

855  {
856  std::unique_lock<std::mutex> request_pool_lock(
857  multi_threading_params.request_pool_mutex);
858  multi_threading_params.request_pool_condition.wait(
859  request_pool_lock,
860  [&multi_threading_params] { return !multi_threading_params.request_pool.empty(); });
861  auto request = std::move(multi_threading_params.request_pool.front());
862  multi_threading_params.request_pool.pop();
863  request_pool_lock.unlock();
864  CHECK(request.buffer);
865  return request;
866 }
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

std::unique_ptr< ColumnDescriptor > foreign_storage::get_sub_type_column_descriptor ( const ColumnDescriptor column)

Definition at line 76 of file ParquetShared.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::set_size(), and ColumnDescriptor::tableId.

Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_array_mapping().

77  {
78  auto column_type = column->columnType.get_elem_type();
79  if (column_type.get_size() == -1 && column_type.is_dict_encoded_string()) {
80  column_type.set_size(4); // override default size of -1
81  }
82  return std::make_unique<ColumnDescriptor>(
83  column->tableId, column->columnId, column->columnName, column_type);
84 }
void set_size(int s)
Definition: sqltypes.h:497
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string foreign_storage::get_table_name ( int32_t  db_id,
int32_t  table_id 
)

Definition at line 42 of file InternalSystemDataWrapper.cpp.

References CHECK, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.

Referenced by anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

42  {
44  CHECK(catalog);
45  auto table_name = catalog->getTableName(table_id);
46  if (table_name.has_value()) {
47  return table_name.value();
48  } else {
49  // It is possible for the table to be concurrently deleted while querying the system
50  // table.
52  }
53 }
constexpr const char * kDeletedValueIndicator
static SysCatalog & instance()
Definition: SysCatalog.h:341
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
FileRegion &  file_region 
)

Definition at line 44 of file CsvShared.cpp.

References CHECK, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::json_utils::get_value_from_object(), foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

44  {
45  CHECK(json_val.IsObject());
47  json_val, file_region.first_row_file_offset, "first_row_file_offset");
49  json_val, file_region.first_row_index, "first_row_index");
50  json_utils::get_value_from_object(json_val, file_region.region_size, "region_size");
51  json_utils::get_value_from_object(json_val, file_region.row_count, "row_count");
52  if (json_val.HasMember("filename")) {
53  json_utils::get_value_from_object(json_val, file_region.filename, "filename");
54  }
55 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:172
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
RowGroupInterval &  value 
)

Definition at line 671 of file ParquetDataWrapper.cpp.

References CHECK, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, foreign_storage::json_utils::get_value_from_object(), and foreign_storage::RowGroupInterval::start_index.

671  {
672  CHECK(json_val.IsObject());
673  json_utils::get_value_from_object(json_val, value.file_path, "file_path");
674  json_utils::get_value_from_object(json_val, value.start_index, "start_index");
675  json_utils::get_value_from_object(json_val, value.end_index, "end_index");
676 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:172
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

void foreign_storage::init_chunk_for_column ( const ChunkKey chunk_key,
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers,
Chunk_NS::Chunk chunk 
)

Definition at line 21 of file FsiChunkUtils.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Chunk_NS::Chunk::setPinnable(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMapForColumns().

25  {
26  auto catalog =
28  CHECK(catalog);
29 
30  ChunkKey data_chunk_key = chunk_key;
31  AbstractBuffer* data_buffer = nullptr;
32  AbstractBuffer* index_buffer = nullptr;
33  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
34  chunk_key[CHUNK_KEY_COLUMN_IDX]);
35 
36  if (column->columnType.is_varlen_indeed()) {
37  data_chunk_key.push_back(1);
38  ChunkKey index_chunk_key = chunk_key;
39  index_chunk_key.push_back(2);
40 
41  CHECK(buffers.find(data_chunk_key) != buffers.end());
42  CHECK(buffers.find(index_chunk_key) != buffers.end());
43 
44  data_buffer = buffers.find(data_chunk_key)->second;
45  index_buffer = buffers.find(index_chunk_key)->second;
46  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
47  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
48 
49  size_t index_offset_size{0};
50  if (column->columnType.is_string() || column->columnType.is_geometry()) {
51  index_offset_size = sizeof(StringOffsetT);
52  } else if (column->columnType.is_array()) {
53  index_offset_size = sizeof(ArrayOffsetT);
54  } else {
55  UNREACHABLE();
56  }
57  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
58  index_buffer->reserve(index_offset_size *
59  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
60  }
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68  }
69 
70  chunk.setPinnable(false);
71  chunk.setColumnDesc(column);
72  chunk.setBuffer(data_buffer);
73  chunk.setIndexBuffer(index_buffer);
74  chunk.initEncoder();
75 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:36
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:152
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define UNREACHABLE()
Definition: Logger.h:266
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
int32_t StringOffsetT
Definition: sqltypes.h:1224
static SysCatalog & instance()
Definition: SysCatalog.h:341
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1225
void initEncoder()
Definition: Chunk.cpp:290
#define CHECK(condition)
Definition: Logger.h:222
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_append_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 118 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and foreign_storage::ForeignTable::isAppendMode().

Referenced by foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

118  {
119  return get_foreign_table_for_key(chunk_key).isAppendMode();
120 }
bool isAppendMode() const
Checks if the table is in append mode.
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_replicated_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 114 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and table_is_replicated().

Referenced by is_shardable_key().

114  {
116 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)
bool table_is_replicated(const TableDescriptor *td)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_s3_uri ( const std::string &  file_path)

Definition at line 55 of file ForeignDataWrapperFactory.cpp.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), import_export::ForeignDataImporter::import(), and import_export::ForeignDataImporter::importGeneralS3().

55  {
56  const std::string s3_prefix = "s3://";
57  return file_path.find(s3_prefix) != std::string::npos;
58 }

+ Here is the caller graph for this function:

bool foreign_storage::is_shardable_key ( const ChunkKey key)

Definition at line 122 of file FsiChunkUtils.cpp.

References dist::is_aggregator(), dist::is_distributed(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().

Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().

122  {
123  return (dist::is_distributed() && !dist::is_aggregator() &&
125 }
bool is_system_table_chunk_key(const ChunkKey &chunk_key)
bool is_replicated_table_chunk_key(const ChunkKey &chunk_key)
bool is_aggregator()
Definition: distributed.cpp:33
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_system_table_chunk_key ( const ChunkKey chunk_key)

Definition at line 110 of file FsiChunkUtils.cpp.

References get_foreign_table_for_key(), and TableDescriptor::is_system_table.

Referenced by is_shardable_key().

110  {
111  return get_foreign_table_for_key(chunk_key).is_system_table;
112 }
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_table_enabled_on_node ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper().

+ Here is the caller graph for this function:

bool foreign_storage::is_valid_source_type ( const import_export::CopyParams copy_params)

Verify if source_type is valid.

Definition at line 124 of file ForeignDataWrapperFactory.cpp.

References import_export::kDelimitedFile, import_export::kParquetFile, import_export::kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and import_export::ForeignDataImporter::importGeneral().

+ Here is the caller graph for this function:

bool foreign_storage::key_does_not_shard_to_leaf ( const ChunkKey key)

Definition at line 135 of file FsiChunkUtils.cpp.

References fragment_maps_to_leaf(), and is_shardable_key().

Referenced by cache_blocks(), populate_string_dictionary(), and anonymous_namespace{RelAlgExecutor.cpp}::set_parallelism_hints().

135  {
136  return (is_shardable_key(key) && !fragment_maps_to_leaf(key));
137 }
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::num_rows_to_process ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  rows_remaining 
)

Number of rows to process given the current position defined by start_row_index, the max fragment size and the number of rows left to process.

Definition at line 480 of file AbstractTextFileDataWrapper.cpp.

Referenced by QueryExecutionContext::launchCpuCode(), and populate_chunks().

482  {
483  size_t start_position_in_fragment = start_row_index % max_fragment_size;
484  return max_fragment_size - start_position_in_fragment;
485 }

+ Here is the caller graph for this function:

UniqueReaderPtr foreign_storage::open_parquet_table ( const std::string &  file_path,
std::shared_ptr< arrow::fs::FileSystem > &  file_system 
)

Definition at line 26 of file ParquetShared.cpp.

Referenced by foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::FileReaderMap::insert(), and foreign_storage::LazyParquetChunkLoader::loadRowGroups().

27  {
28  UniqueReaderPtr reader;
29  auto file_result = file_system->OpenInputFile(file_path);
30  if (!file_result.ok()) {
31  throw std::runtime_error{"Unable to access " + file_system->type_name() + " file: " +
32  file_path + ". " + file_result.status().message()};
33  }
34  auto infile = file_result.ValueOrDie();
35  PARQUET_THROW_NOT_OK(OpenFile(infile, arrow::default_memory_pool(), &reader));
36  return reader;
37 }
std::unique_ptr< parquet::arrow::FileReader > UniqueReaderPtr
Definition: ParquetShared.h:32

+ Here is the caller graph for this function:

ParseFileRegionResult foreign_storage::parse_file_regions ( const FileRegions &  file_regions,
const size_t  start_index,
const size_t  end_index,
FileReader &  file_reader,
ParseBufferRequest &  parse_file_request,
const std::map< int, Chunk_NS::Chunk > &  column_id_to_chunk_map,
const TextFileBufferParser &  parser 
)

Parses a set of file regions given a handle to the file and range of indexes for the file regions to be parsed.

Definition at line 213 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, foreign_storage::ParseBufferRequest::buffer_size, CHECK, CHECK_EQ, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, DEBUG_TIMER, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseFileRegionResult::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getTableName(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::FileReader::readRegion(), foreign_storage::ParseBufferResult::rejected_rows, run_benchmark_import::result, foreign_storage::ParseBufferResult::row_count, and throw_unexpected_number_of_items().

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunks().

220  {
221  auto timer = DEBUG_TIMER(__func__);
222  ParseFileRegionResult load_file_region_result{};
223  load_file_region_result.file_offset = file_regions[start_index].first_row_file_offset;
224  load_file_region_result.row_count = 0;
225 
226  ParseBufferResult result;
227  for (size_t i = start_index; i <= end_index; i++) {
228  CHECK(file_regions[i].region_size <= parse_file_request.buffer_size);
229  auto read_size = file_reader.readRegion(parse_file_request.buffer.get(),
230  file_regions[i].first_row_file_offset,
231  file_regions[i].region_size);
232  if (file_regions[i].region_size != read_size) {
233  throw_unexpected_number_of_items(file_regions[i].region_size,
234  read_size,
235  "bytes",
236  parse_file_request.getTableName());
237  }
238  parse_file_request.begin_pos = 0;
239  parse_file_request.end_pos = file_regions[i].region_size;
240  parse_file_request.first_row_index = file_regions[i].first_row_index;
241  parse_file_request.file_offset = file_regions[i].first_row_file_offset;
242  parse_file_request.process_row_count = file_regions[i].row_count;
243 
244  result = parser.parseBuffer(parse_file_request, i == end_index);
245  CHECK_EQ(file_regions[i].row_count, result.row_count);
246  for (const auto& rejected_row_index : result.rejected_rows) {
247  load_file_region_result.rejected_row_indices.insert(
248  load_file_region_result.row_count + rejected_row_index);
249  }
250  load_file_region_result.row_count += result.row_count;
251  }
252  load_file_region_result.column_id_to_data_blocks_map =
253  result.column_id_to_data_blocks_map;
254  return load_file_region_result;
255 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
void throw_unexpected_number_of_items(const size_t num_expected, const size_t num_loaded, const std::string &item_type, const std::string &foreign_table_name)
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<size_t> foreign_storage::partition_by_fragment ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  buffer_row_count 
)

Given a start row index, maximum fragment size, and number of rows in a buffer, this function returns a vector indicating how the rows in the buffer should be partitioned in order to fill up available fragment slots while staying within the capacity of fragments.

Definition at line 493 of file AbstractTextFileDataWrapper.cpp.

References CHECK.

Referenced by scan_metadata().

495  {
496  CHECK(buffer_row_count > 0);
497  std::vector<size_t> partitions{};
498  size_t remaining_rows_in_last_fragment;
499  if (start_row_index % max_fragment_size == 0) {
500  remaining_rows_in_last_fragment = 0;
501  } else {
502  remaining_rows_in_last_fragment =
503  max_fragment_size - (start_row_index % max_fragment_size);
504  }
505  if (buffer_row_count <= remaining_rows_in_last_fragment) {
506  partitions.emplace_back(buffer_row_count);
507  } else {
508  if (remaining_rows_in_last_fragment > 0) {
509  partitions.emplace_back(remaining_rows_in_last_fragment);
510  }
511  size_t remaining_buffer_row_count =
512  buffer_row_count - remaining_rows_in_last_fragment;
513  while (remaining_buffer_row_count > 0) {
514  partitions.emplace_back(
515  std::min<size_t>(remaining_buffer_row_count, max_fragment_size));
516  remaining_buffer_row_count -= partitions.back();
517  }
518  }
519  return partitions;
520 }
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::set< T > &  items,
size_t  max_threads 
)

Definition at line 41 of file FsiChunkUtils.h.

Referenced by create_futures_for_workers(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

41  {
42  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
43  std::list<std::set<T>> items_by_thread;
44  auto i = 0U;
45  for (auto item : items) {
46  if (i++ % items_per_thread == 0) {
47  items_by_thread.emplace_back(std::set<T>{});
48  }
49  items_by_thread.back().emplace(item);
50  }
51  return items_by_thread;
52 }

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::vector< T > &  items,
size_t  max_threads 
)

Definition at line 60 of file FsiChunkUtils.h.

60  {
61  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
62  std::list<std::vector<T>> items_by_thread;
63  auto i = 0U;
64  for (auto item : items) {
65  if (i++ % items_per_thread == 0) {
66  items_by_thread.emplace_back(std::vector<T>{});
67  }
68  items_by_thread.back().emplace_back(item);
69  }
70  return items_by_thread;
71 }
void foreign_storage::populate_chunks ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser,
foreign_storage::IterativeFileScanParameters file_scan_param 
)

Consumes and processes scan requests from a pending requests queue and populates chunks during an iterative file scan

Definition at line 918 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer_row_count, CHECK_LE, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, defer_scan_request(), foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, get_next_scan_request(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getMaxFragRows(), foreign_storage::ParseBufferRequest::import_buffers, num_rows_to_process(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, populate_chunks_using_data_blocks(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::ParseBufferRequest::processed_row_count, and run_benchmark_import::result.

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

921  {
922  std::map<int, const ColumnDescriptor*> column_by_id{};
923  while (true) {
924  auto request_opt = get_next_scan_request(multi_threading_params);
925  if (!request_opt.has_value()) {
926  break;
927  }
928  ParseBufferRequest& request = request_opt.value();
929  try {
930  if (column_by_id.empty()) {
931  for (const auto column : request.getColumns()) {
932  column_by_id[column->columnId] = column;
933  }
934  }
935  CHECK_LE(request.processed_row_count, request.buffer_row_count);
936  for (size_t num_rows_left_to_process =
937  request.buffer_row_count - request.processed_row_count;
938  num_rows_left_to_process > 0;
939  num_rows_left_to_process =
940  request.buffer_row_count - request.processed_row_count) {
941  // NOTE: `request.begin_pos` state is required to be set correctly by this point
942  // in execution
943  size_t row_index = request.first_row_index + request.processed_row_count;
944  int fragment_id = row_index / request.getMaxFragRows();
945  if (fragment_id >
946  file_scan_param.fragment_id) { // processing must continue next iteration
947  defer_scan_request(multi_threading_params, request);
948  return;
949  }
950  request.process_row_count = num_rows_to_process(
951  row_index, request.getMaxFragRows(), num_rows_left_to_process);
952  for (const auto& import_buffer : request.import_buffers) {
953  if (import_buffer != nullptr) {
954  import_buffer->clear();
955  }
956  }
957  auto result = parser.parseBuffer(request, true);
958  populate_chunks_using_data_blocks(multi_threading_params,
959  fragment_id,
960  request,
961  result,
962  column_by_id,
963  fragment_id_to_file_regions_map,
964  file_scan_param);
965  request.processed_row_count += result.row_count;
966  request.begin_pos = result.row_offsets.back() - request.file_offset;
967  }
968 
969  } catch (...) {
970  // Re-add request to pool so we dont block any other threads
971  {
972  std::lock_guard<std::mutex> pending_requests_lock(
973  multi_threading_params.pending_requests_mutex);
974  multi_threading_params.continue_processing = false;
975  }
976  add_request_to_pool(multi_threading_params, request);
977  throw;
978  }
979  add_request_to_pool(multi_threading_params, request);
980  }
981 }
size_t num_rows_to_process(const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void populate_chunks_using_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param)
void defer_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
#define CHECK_LE(x, y)
Definition: Logger.h:233
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_chunks_using_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const foreign_storage::IterativeFileScanParameters file_scan_param 
)

Definition at line 679 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), append_data_block_to_chunk(), CHECK_EQ, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::rejected_rows, and foreign_storage::ParseBufferResult::row_count.

Referenced by populate_chunks().

686  {
687  std::unique_lock<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
688  // File regions should be added in same order as appendData
689  add_file_region(fragment_id_to_file_regions_map,
690  fragment_id,
691  request.first_row_index,
692  result,
693  request.getFilePath());
694  CHECK_EQ(fragment_id, file_scan_param.fragment_id);
695 
696  for (auto& [column_id, data_block] : result.column_id_to_data_blocks_map) {
697  ChunkKey chunk_key{request.db_id, request.getTableId(), column_id, fragment_id};
698  const auto column = column_by_id[column_id];
699  if (column->columnType.is_varlen_indeed()) {
700  chunk_key.emplace_back(1);
701  }
702  if (multi_threading_params.chunk_encoder_buffers.find(chunk_key) ==
703  multi_threading_params.chunk_encoder_buffers.end()) {
704  multi_threading_params.chunk_encoder_buffers[chunk_key] =
705  std::make_unique<ForeignStorageBuffer>();
706  multi_threading_params.chunk_encoder_buffers[chunk_key]->initEncoder(
707  column->columnType);
708  }
709  size_t current_element_count =
710  multi_threading_params.chunk_encoder_buffers[chunk_key]
711  ->getEncoder()
712  ->getNumElems();
713  size_t num_elements = current_element_count + result.row_count;
714  multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder()->setNumElems(
715  num_elements);
716  lock.unlock(); // unlock the fragment based lock in order to achieve better
717  // performance
718  append_data_block_to_chunk(file_scan_param,
719  data_block,
720  result.row_count,
721  chunk_key,
722  column,
723  result.rejected_rows,
724  current_element_count);
725  lock.lock();
726  }
727 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< int > ChunkKey
Definition: types.h:36
void append_data_block_to_chunk(const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const ChunkKey &chunk_key, const ColumnDescriptor *column, const std::set< size_t > &rejected_row_indices, const size_t element_count_required)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::populate_string_dictionary ( const int32_t  table_id,
const int32_t  col_id,
const Catalog_Namespace::Catalog catalog 
)

Definition at line 205 of file Execute.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, Chunk_NS::Chunk::getChunk(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), Catalog_Namespace::Catalog::getMetadataForTable(), anonymous_namespace{Execute.cpp}::is_empty_table(), and key_does_not_shard_to_leaf().

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::prepare_string_dictionaries().

207  {
208  if (const auto foreign_table = dynamic_cast<const ForeignTable*>(
209  catalog.getMetadataForTable(table_id, false))) {
210  const auto col_desc = catalog.getMetadataForColumn(table_id, col_id);
211  if (col_desc->columnType.is_dict_encoded_type()) {
212  auto& fragmenter = foreign_table->fragmenter;
213  CHECK(fragmenter != nullptr);
214  if (is_empty_table(fragmenter.get())) {
215  return;
216  }
217  for (const auto& frag : fragmenter->getFragmentsForQuery().fragments) {
218  ChunkKey chunk_key = {catalog.getDatabaseId(), table_id, col_id, frag.fragmentId};
219  // If the key is sharded across leaves, only populate fragments that are sharded
220  // to this leaf.
221  if (key_does_not_shard_to_leaf(chunk_key)) {
222  continue;
223  }
224 
225  const ChunkMetadataMap& metadata_map = frag.getChunkMetadataMap();
226  CHECK(metadata_map.find(col_id) != metadata_map.end());
227  if (auto& meta = metadata_map.at(col_id); meta->isPlaceholder()) {
228  // When this goes out of scope it will stay in CPU cache but become
229  // evictable
230  auto chunk = Chunk_NS::Chunk::getChunk(col_desc,
231  &(catalog.getDataMgr()),
232  chunk_key,
234  0,
235  0,
236  0);
237  }
238  }
239  }
240  }
241 }
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:243
bool is_empty_table(Fragmenter_Namespace::AbstractFragmenter *fragmenter)
Definition: Execute.cpp:195
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
bool key_does_not_shard_to_leaf(const ChunkKey &key)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
int getDatabaseId() const
Definition: Catalog.h:298
#define CHECK(condition)
Definition: Logger.h:222
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::process_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map 
)

Updates metadata encapsulated in encoders for all table columns given new data blocks gotten from parsing a new set of rows in a file buffer. If cache is available, also append the data_blocks to chunks in the cache

Definition at line 734 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), cache_blocks(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::row_count, and update_stats().

Referenced by scan_metadata().

739  {
740  std::lock_guard<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
741  // File regions should be added in same order as appendData
742  add_file_region(fragment_id_to_file_regions_map,
743  fragment_id,
744  request.first_row_index,
745  result,
746  request.getFilePath());
747 
748  for (auto& [column_id, data_block] : result.column_id_to_data_blocks_map) {
749  ChunkKey chunk_key{request.db_id, request.getTableId(), column_id, fragment_id};
750  const auto column = column_by_id[column_id];
751  if (column->columnType.is_varlen_indeed()) {
752  chunk_key.emplace_back(1);
753  }
754  if (multi_threading_params.chunk_encoder_buffers.find(chunk_key) ==
755  multi_threading_params.chunk_encoder_buffers.end()) {
756  multi_threading_params.chunk_encoder_buffers[chunk_key] =
757  std::make_unique<ForeignStorageBuffer>();
758  multi_threading_params.chunk_encoder_buffers[chunk_key]->initEncoder(
759  column->columnType);
760  }
761  update_stats(multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder(),
762  column->columnType,
763  data_block,
764  result.row_count);
765  size_t num_elements = multi_threading_params.chunk_encoder_buffers[chunk_key]
766  ->getEncoder()
767  ->getNumElems() +
768  result.row_count;
769  multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder()->setNumElems(
770  num_elements);
771  cache_blocks(
772  multi_threading_params.cached_chunks,
773  data_block,
774  result.row_count,
775  chunk_key,
776  column,
777  (num_elements - result.row_count) == 0, // Is the first block added to this chunk
778  multi_threading_params.disable_cache);
779  }
780 }
std::vector< int > ChunkKey
Definition: types.h:36
void update_stats(Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
void cache_blocks(std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table ( Catalog_Namespace::Catalog catalog,
const std::string &  table_name,
const bool  evict_cached_entries 
)

Definition at line 103 of file ForeignTableRefresh.cpp.

References CHECK, shared::contains(), StorageType::FOREIGN_TABLE, dist::is_leaf_node(), Catalog_Namespace::kAggregatorOnlySystemTables, and refresh_foreign_table_unlocked().

Referenced by RefreshForeignTablesCommand::execute(), and foreign_storage::ForeignTableRefreshScheduler::start().

105  {
106  if (dist::is_leaf_node() &&
108  // Skip aggregator only system tables on leaf nodes.
109  return;
110  }
111  auto table_lock =
112  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
114  catalog, table_name, false));
115 
116  const TableDescriptor* td = (*table_lock)();
117  if (td->storageType != StorageType::FOREIGN_TABLE) {
118  throw std::runtime_error{
119  table_name +
120  " is not a foreign table. Refreshes are applicable to only foreign tables."};
121  }
122 
123  auto foreign_table = dynamic_cast<const ForeignTable*>(td);
124  CHECK(foreign_table);
125  refresh_foreign_table_unlocked(catalog, *foreign_table, evict_cached_entries);
126 }
bool contains(const T &container, const U &element)
Definition: misc.h:195
bool is_leaf_node()
Definition: distributed.cpp:29
#define CHECK(condition)
Definition: Logger.h:222
static const std::array< std::string, 4 > kAggregatorOnlySystemTables
Definition: Catalog.h:120
void refresh_foreign_table_unlocked(Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries)
static constexpr char const * FOREIGN_TABLE

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table_unlocked ( Catalog_Namespace::Catalog catalog,
const ForeignTable &  td,
const bool  evict_cached_entries 
)

Definition at line 35 of file ForeignTableRefresh.cpp.

References CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, foreign_storage::anonymous_namespace{ForeignTableRefresh.cpp}::clear_cpu_and_gpu_cache(), Catalog_Namespace::DBMetadata::dbId, Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), PostEvictionRefreshException::getOriginalException(), logger::INFO, CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable(), foreign_storage::ForeignTable::isAppendMode(), LOG, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, TableDescriptor::tableName, and Catalog_Namespace::Catalog::updateForeignTableRefreshTimes().

Referenced by refresh_foreign_table().

37  {
38  LOG(INFO) << "Starting refresh for table: " << td.tableName;
39  auto& data_mgr = catalog.getDataMgr();
40  ChunkKey table_key{catalog.getCurrentDB().dbId, td.tableId};
41  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
42  catalog.removeFragmenterForTable(td.tableId);
43 
44  auto fsm = data_mgr.getPersistentStorageMgr()->getForeignStorageMgr();
45  CHECK(fsm);
46  if (auto cfm = dynamic_cast<CachingForeignStorageMgr*>(fsm)) {
47  if (!cfm->hasStoredDataWrapper(table_key[CHUNK_KEY_DB_IDX],
48  table_key[CHUNK_KEY_TABLE_IDX])) {
49  // If there is no wrapper stored on disk, then we have not populated the metadata
50  // for this table and we are free to skip the refresh.
51  catalog.updateForeignTableRefreshTimes(td.tableId);
52  return;
53  }
54  }
55 
56  std::map<ChunkKey, std::shared_ptr<ChunkMetadata>> old_chunk_metadata_by_chunk_key;
57  if (td.isAppendMode() && !evict_cached_entries) {
58  ChunkMetadataVector metadata_vec;
59  data_mgr.getChunkMetadataVecForKeyPrefix(metadata_vec, table_key);
60  int last_fragment_id = 0;
61  for (const auto& [key, metadata] : metadata_vec) {
62  if (key[CHUNK_KEY_FRAGMENT_IDX] > last_fragment_id) {
63  last_fragment_id = key[CHUNK_KEY_FRAGMENT_IDX];
64  }
65  old_chunk_metadata_by_chunk_key[key] = metadata;
66  }
67  for (const auto& [key, metadata] : metadata_vec) {
68  if (key[CHUNK_KEY_FRAGMENT_IDX] == last_fragment_id) {
69  clear_cpu_and_gpu_cache(data_mgr, key);
70  }
71  }
72  } else {
73  clear_cpu_and_gpu_cache(data_mgr, table_key);
74  }
75 
76  try {
77  fsm->refreshTable(table_key, evict_cached_entries);
78  catalog.updateForeignTableRefreshTimes(td.tableId);
79  } catch (PostEvictionRefreshException& e) {
80  catalog.updateForeignTableRefreshTimes(td.tableId);
81  clear_cpu_and_gpu_cache(data_mgr, table_key);
82  throw e.getOriginalException();
83  } catch (...) {
84  clear_cpu_and_gpu_cache(data_mgr, table_key);
85  throw;
86  }
87 
88  // Delete cached rolled off/updated chunks.
89  if (!old_chunk_metadata_by_chunk_key.empty()) {
90  ChunkMetadataVector new_metadata_vec;
91  data_mgr.getChunkMetadataVecForKeyPrefix(new_metadata_vec, table_key);
92  for (const auto& [key, metadata] : new_metadata_vec) {
93  auto it = old_chunk_metadata_by_chunk_key.find(key);
94  if (it != old_chunk_metadata_by_chunk_key.end() &&
95  it->second->numElements != metadata->numElements) {
96  clear_cpu_and_gpu_cache(data_mgr, key);
97  }
98  }
99  }
100  LOG(INFO) << "Completed refresh for table: " << td.tableName;
101 }
static void invalidateCachesByTable(size_t table_key)
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:243
#define LOG(tag)
Definition: Logger.h:216
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::runtime_error getOriginalException()
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:242
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:3994
void clear_cpu_and_gpu_cache(Data_Namespace::DataMgr &data_mgr, const ChunkKey &key_prefix)
#define CHECK(condition)
Definition: Logger.h:222
void updateForeignTableRefreshTimes(const int32_t table_id)
Definition: Catalog.cpp:5398

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::reset_multithreading_params ( foreign_storage::MetadataScanMultiThreadingParams multi_threading_params)

Definition at line 997 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, and foreign_storage::MetadataScanMultiThreadingParams::request_pool.

Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().

998  {
999  multi_threading_params.request_pool = {};
1000  multi_threading_params.cached_chunks = {};
1001  multi_threading_params.pending_requests = {};
1002  multi_threading_params.deferred_requests = {};
1003  multi_threading_params.chunk_encoder_buffers.clear();
1004 }
std::map< ChunkKey, std::unique_ptr< ForeignStorageBuffer > > chunk_encoder_buffers

+ Here is the caller graph for this function:

void foreign_storage::resize_buffer_if_needed ( std::unique_ptr< char[]> &  buffer,
size_t &  buffer_size,
const size_t  alloc_size 
)

Optionally resizes the given buffer if the buffer size is less than the current buffer allocation size.

Definition at line 987 of file AbstractTextFileDataWrapper.cpp.

References CHECK_LE.

Referenced by dispatch_scan_requests().

989  {
990  CHECK_LE(buffer_size, alloc_size);
991  if (buffer_size < alloc_size) {
992  buffer = std::make_unique<char[]>(alloc_size);
993  buffer_size = alloc_size;
994  }
995 }
#define CHECK_LE(x, y)
Definition: Logger.h:233

+ Here is the caller graph for this function:

std::vector< Aws::S3::Model::Object > foreign_storage::s3_objects_filter_sort_files ( const std::vector< Aws::S3::Model::Object > &  file_paths,
const shared::FilePathOptions options 
)

Definition at line 22 of file S3FilePathUtil.cpp.

References shared::FilePathOptions::filter_regex, shared::PATHNAME_ORDER_TYPE, foreign_storage::anonymous_namespace{S3FilePathUtil.cpp}::s3_objects_regex_file_filter(), and shared::FilePathOptions::sort_by.

24  {
25  auto result_files =
26  options.filter_regex.has_value()
27  ? s3_objects_regex_file_filter(options.filter_regex.value(), file_paths)
28  : file_paths;
29  // initial lexicographical order ensures a determinisitc ordering for files not matching
30  // sort_regex
31  shared::FilePathOptions temp_options;
32  temp_options.sort_by = shared::PATHNAME_ORDER_TYPE;
33  auto initial_file_order = FileOrderS3(temp_options);
34  auto lexi_comp = initial_file_order.getFileComparator();
35  std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
36 
37  auto file_order = FileOrderS3(options);
38  auto comp = file_order.getFileComparator();
39  std::stable_sort(result_files.begin(), result_files.end(), comp);
40  return result_files;
41 }
std::optional< std::string > filter_regex
std::vector< Aws::S3::Model::Object > s3_objects_regex_file_filter(const std::string &pattern, const std::vector< Aws::S3::Model::Object > &objects_list)
const std::string PATHNAME_ORDER_TYPE
std::optional< std::string > sort_by

+ Here is the call graph for this function:

void foreign_storage::scan_metadata ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser 
)

Consumes and processes metadata scan requests from a pending requests queue and updates existing metadata objects based on newly scanned metadata.

Definition at line 799 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::MetadataScanMultiThreadingParams::continue_processing, get_next_scan_request(), foreign_storage::TextFileBufferParser::parseBuffer(), partition_by_fragment(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, process_data_blocks(), and run_benchmark_import::result.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

801  {
802  std::map<int, const ColumnDescriptor*> column_by_id{};
803  while (true) {
804  auto request_opt = get_next_scan_request(multi_threading_params);
805  if (!request_opt.has_value()) {
806  break;
807  }
808  auto& request = request_opt.value();
809  try {
810  if (column_by_id.empty()) {
811  for (const auto column : request.getColumns()) {
812  column_by_id[column->columnId] = column;
813  }
814  }
815  auto partitions = partition_by_fragment(
816  request.first_row_index, request.getMaxFragRows(), request.buffer_row_count);
817  request.begin_pos = 0;
818  size_t row_index = request.first_row_index;
819  for (const auto partition : partitions) {
820  request.process_row_count = partition;
821  for (const auto& import_buffer : request.import_buffers) {
822  if (import_buffer != nullptr) {
823  import_buffer->clear();
824  }
825  }
826  auto result = parser.parseBuffer(request, true);
827  int fragment_id = row_index / request.getMaxFragRows();
828  process_data_blocks(multi_threading_params,
829  fragment_id,
830  request,
831  result,
832  column_by_id,
833  fragment_id_to_file_regions_map);
834  row_index += result.row_count;
835  request.begin_pos = result.row_offsets.back() - request.file_offset;
836  }
837  } catch (...) {
838  // Re-add request to pool so we dont block any other threads
839  {
840  std::lock_guard<std::mutex> pending_requests_lock(
841  multi_threading_params.pending_requests_mutex);
842  multi_threading_params.continue_processing = false;
843  }
844  add_request_to_pool(multi_threading_params, request);
845  throw;
846  }
847  add_request_to_pool(multi_threading_params, request);
848  }
849 }
std::vector< size_t > partition_by_fragment(const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
void process_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
std::optional< ParseBufferRequest > get_next_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::set_comp ( const ChunkKey left,
const ChunkKey right 
)
void foreign_storage::set_node_name ( std::map< std::string, import_export::TypedImportBuffer * > &  import_buffers)

Definition at line 55 of file InternalSystemDataWrapper.cpp.

References g_distributed_leaf_idx, dist::is_leaf_node(), and to_string().

Referenced by foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_summary(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().

56  {
57  if (import_buffers.find("node") != import_buffers.end()) {
58  if (dist::is_leaf_node()) {
59  std::string leaf_string{"Leaf " + to_string(g_distributed_leaf_idx)};
60  import_buffers["node"]->addString(leaf_string);
61  } else {
62  import_buffers["node"]->addString("Server");
63  }
64  }
65 }
std::string to_string(char const *&&v)
bool is_leaf_node()
Definition: distributed.cpp:29
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const FileRegion &  file_region,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 26 of file CsvShared.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

28  {
29  json_val.SetObject();
31  json_val, file_region.first_row_file_offset, "first_row_file_offset", allocator);
33  json_val, file_region.first_row_index, "first_row_index", allocator);
35  json_val, file_region.region_size, "region_size", allocator);
37  json_val, file_region.row_count, "row_count", allocator);
38  if (file_region.filename.size()) {
40  json_val, file_region.filename, "filename", allocator);
41  }
42 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:157

+ Here is the call graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const RowGroupInterval &  value,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 662 of file ParquetDataWrapper.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, and foreign_storage::RowGroupInterval::start_index.

664  {
665  json_val.SetObject();
666  json_utils::add_value_to_object(json_val, value.file_path, "file_path", allocator);
667  json_utils::add_value_to_object(json_val, value.start_index, "start_index", allocator);
668  json_utils::add_value_to_object(json_val, value.end_index, "end_index", allocator);
669 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:157

+ Here is the call graph for this function:

void foreign_storage::throw_file_access_error ( const std::string &  file_path,
const std::string &  message 
)
inline

Definition at line 89 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

90  {
91  std::string error_message{"Unable to access file \"" + file_path + "\". " + message};
92  throw ForeignStorageException{error_message};
93 }

+ Here is the caller graph for this function:

void foreign_storage::throw_file_not_found_error ( const std::string &  file_path)
inline

Definition at line 95 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

95  {
96  throw ForeignStorageException{"File or directory \"" + file_path +
97  "\" does not exist."};
98 }

+ Here is the caller graph for this function:

void foreign_storage::throw_number_of_columns_mismatch_error ( size_t  num_table_cols,
size_t  num_file_cols,
const std::string &  file_path 
)
inline

Definition at line 80 of file ForeignStorageException.h.

References to_string().

Referenced by foreign_storage::RegexFileBufferParser::regexMatchColumns(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_expected_column_count(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().

82  {
83  throw ForeignStorageException{"Mismatched number of logical columns: (expected " +
84  std::to_string(num_table_cols) + " columns, has " +
85  std::to_string(num_file_cols) + "): in file '" +
86  file_path + "'"};
87 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::throw_parquet_metadata_out_of_bounds_error ( const std::string &  min_value,
const std::string &  max_value,
const std::string &  encountered_value 
)
inline

Definition at line 46 of file ParquetMetadataValidator.h.

Referenced by foreign_storage::TimestampBoundsValidator< T >::validateValue(), foreign_storage::IntegralFixedLengthBoundsValidator< T >::validateValue(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue(), and foreign_storage::FloatPointValidator< T >::validateValue().

49  {
50  std::stringstream error_message;
51  error_message << "Parquet column contains values that are outside the range of the "
52  "HeavyDB column "
53  "type. Consider using a wider column type. Min allowed value: "
54  << min_value << ". Max allowed value: " << max_value
55  << ". Encountered value: " << encountered_value << ".";
56  throw std::runtime_error(error_message.str());
57 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_file_error ( const std::string &  file_path)
inline

Definition at line 73 of file ForeignStorageException.h.

Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

73  {
74  throw ForeignStorageException{
75  "Refresh of foreign table created with \"APPEND\" update type failed as "
76  "file \"" +
77  file_path + "\" was removed."};
78 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_file_error ( const std::string &  file_path)
inline

Definition at line 66 of file ForeignStorageException.h.

Referenced by foreign_storage::SingleTextFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

66  {
67  throw ForeignStorageException{
68  "Refresh of foreign table created with \"APPEND\" update type failed as file "
69  "reduced in size: \"" +
70  file_path + "\""};
71 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_result_set_error ( const std::string &  select_statement)
inline

Definition at line 58 of file ForeignStorageException.h.

58  {
59  throw ForeignStorageException{
60  "Refresh of foreign table created with \"APPEND\" update type failed as result set "
61  "of select statement "
62  "reduced in size: \"" +
63  select_statement + "\""};
64 }
void foreign_storage::throw_s3_compressed_extension ( const std::string &  file_path,
const std::string &  ext_type 
)
inline

Definition at line 107 of file ForeignStorageException.h.

108  {
109  throw ForeignStorageException{
110  "File \"" + file_path + "\" has extension type \"" + ext_type +
111  "\", compressed file formats are not supported by S3 Foreign Tables."};
112 }
void foreign_storage::throw_s3_compressed_mime_type ( const std::string &  file_path,
const std::string &  mime_type 
)
inline

Definition at line 100 of file ForeignStorageException.h.

101  {
102  throw ForeignStorageException{
103  "File \"" + file_path + "\" has mime type \"" + mime_type +
104  "\", compressed file formats are not supported by S3 Foreign Tables."};
105 }
void foreign_storage::throw_unexpected_number_of_items ( const size_t &  num_expected,
const size_t &  num_loaded,
const std::string &  item_type 
)
inline

Definition at line 40 of file ForeignStorageException.h.

References to_string().

Referenced by parse_file_regions(), and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_unexpected_number_of_items().

42  {
43  throw ForeignStorageException(
44  "Unexpected number of " + item_type +
45  " while loading from foreign data source: expected " +
46  std::to_string(num_expected) + " , obtained " + std::to_string(num_loaded) + " " +
47  item_type +
48  ". Please use the \"REFRESH FOREIGN TABLES\" command on the foreign table "
49  "if data source has been updated.");
50 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::update_stats ( Encoder encoder,
const SQLTypeInfo column_type,
DataBlockPtr  data_block,
const size_t  row_count 
)

Updates the statistics metadata encapsulated in the encoder given new data in a data block.

Definition at line 567 of file AbstractTextFileDataWrapper.cpp.

References DataBlockPtr::arraysPtr, SQLTypeInfo::is_array(), SQLTypeInfo::is_varlen(), DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and Encoder::updateStats().

Referenced by process_data_blocks(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata(), and StorageIOFacility::yieldUpdateCallback().

570  {
571  if (column_type.is_array()) {
572  encoder->updateStats(data_block.arraysPtr, 0, row_count);
573  } else if (!column_type.is_varlen()) {
574  encoder->updateStats(data_block.numbersPtr, row_count);
575  } else {
576  encoder->updateStats(data_block.stringsPtr, 0, row_count);
577  }
578 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:222
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:223
bool is_varlen() const
Definition: sqltypes.h:615
int8_t * numbersPtr
Definition: sqltypes.h:221
virtual void updateStats(const int64_t val, const bool is_null)=0
bool is_array() const
Definition: sqltypes.h:583

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< parquet::Statistics > foreign_storage::validate_and_get_column_metadata_statistics ( const parquet::ColumnChunkMetaData *  column_metadata)

Definition at line 86 of file ParquetShared.cpp.

References CHECK.

Referenced by foreign_storage::ParquetEncoder::getRowGroupMetadata(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_definition_levels().

87  {
88  CHECK(column_metadata->is_stats_set());
89  std::shared_ptr<parquet::Statistics> stats = column_metadata->statistics();
90  return stats;
91 }
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

void foreign_storage::validate_equal_column_descriptor ( const parquet::ColumnDescriptor *  reference_descriptor,
const parquet::ColumnDescriptor *  new_descriptor,
const std::string &  reference_file_path,
const std::string &  new_file_path 
)

Definition at line 60 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

64  {
65  if (!reference_descriptor->Equals(*new_descriptor)) {
66  throw std::runtime_error{"Parquet file \"" + new_file_path +
67  "\" has a different schema. Please ensure that all Parquet "
68  "files use the same schema. Reference Parquet file: " +
69  reference_file_path +
70  ", column name: " + reference_descriptor->name() +
71  ". New Parquet file: " + new_file_path +
72  ", column name: " + new_descriptor->name() + "."};
73  }
74 }

+ Here is the caller graph for this function:

void foreign_storage::validate_non_foreign_table_write ( const TableDescriptor table_descriptor)
inline

Definition at line 22 of file FsiUtils.h.

References StorageType::FOREIGN_TABLE, and TableDescriptor::storageType.

Referenced by Parser::InsertStmt::analyze(), Parser::TruncateTableStmt::execute(), Parser::InsertValuesStmt::execute(), Parser::InsertIntoTableAsSelectStmt::populateData(), and RelModify::RelModify().

22  {
23  if (table_descriptor && table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
24  throw std::runtime_error{
25  "DELETE, INSERT, TRUNCATE, OR UPDATE commands are not supported for foreign "
26  "tables."};
27  }
28 }
std::string storageType
static constexpr char const * FOREIGN_TABLE

+ Here is the caller graph for this function:

void foreign_storage::validate_regex_parser_options ( const import_export::CopyParams copy_params)

Definition at line 118 of file ForeignDataWrapperFactory.cpp.

References import_export::CopyParams::line_regex.

Referenced by anonymous_namespace{ForeignDataImporter.cpp}::validate_copy_params().

118  {
119  if (copy_params.line_regex.empty()) {
120  throw std::runtime_error{"Regex parser options must contain a line regex."};
121  }
122 }

+ Here is the caller graph for this function:

Variable Documentation