OmniSciDB
085a039ca4
|
#include <AbstractTextFileDataWrapper.h>
Public Member Functions | |
AbstractTextFileDataWrapper () | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table) | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping, const bool disable_cache) | |
void | populateChunkMetadata (ChunkMetadataVector &chunk_metadata_vector) override |
void | populateChunkBuffers (const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers, AbstractBuffer *delete_buffer) override |
std::string | getSerializedDataWrapper () const override |
void | restoreDataWrapperInternals (const std::string &file_path, const ChunkMetadataVector &chunk_metadata) override |
bool | isRestored () const override |
ParallelismLevel | getCachedParallelismLevel () const override |
ParallelismLevel | getNonCachedParallelismLevel () const override |
void | createRenderGroupAnalyzers () override |
Create RenderGroupAnalyzers for poly columns. More... | |
![]() | |
AbstractFileStorageDataWrapper () | |
void | validateServerOptions (const ForeignServer *foreign_server) const override |
void | validateTableOptions (const ForeignTable *foreign_table) const override |
const std::set < std::string_view > & | getSupportedTableOptions () const override |
void | validateUserMappingOptions (const UserMapping *user_mapping, const ForeignServer *foreign_server) const override |
const std::set < std::string_view > & | getSupportedUserMappingOptions () const override |
![]() | |
ForeignDataWrapper ()=default | |
virtual | ~ForeignDataWrapper ()=default |
virtual const std::set < std::string > | getAlterableTableOptions () const |
virtual void | validateSchema (const std::list< ColumnDescriptor > &columns) const |
Protected Member Functions | |
virtual const TextFileBufferParser & | getFileBufferParser () const =0 |
Private Member Functions | |
AbstractTextFileDataWrapper (const ForeignTable *foreign_table) | |
void | populateChunks (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id, AbstractBuffer *delete_buffer) |
void | populateChunkMapForColumns (const std::set< const ColumnDescriptor * > &columns, const int fragment_id, const ChunkToBufferMap &buffers, std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map) |
void | updateMetadata (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id) |
Private Attributes | |
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > | chunk_metadata_map_ |
std::map< int, FileRegions > | fragment_id_to_file_regions_map_ |
std::unique_ptr< FileReader > | file_reader_ |
const int | db_id_ |
const ForeignTable * | foreign_table_ |
std::map< ChunkKey, std::unique_ptr < ForeignStorageBuffer > > | chunk_encoder_buffers_ |
size_t | num_rows_ |
size_t | append_start_offset_ |
bool | is_restored_ |
const UserMapping * | user_mapping_ |
const bool | disable_cache_ |
RenderGroupAnalyzerMap | render_group_analyzer_map_ |
Additional Inherited Members | |
![]() | |
enum | ParallelismLevel { NONE, INTRA_FRAGMENT, INTER_FRAGMENT } |
![]() | |
static const std::string | STORAGE_TYPE_KEY = "STORAGE_TYPE" |
static const std::string | BASE_PATH_KEY = "BASE_PATH" |
static const std::string | FILE_PATH_KEY = "FILE_PATH" |
static const std::string | REGEX_PATH_FILTER_KEY = "REGEX_PATH_FILTER" |
static const std::string | LOCAL_FILE_STORAGE_TYPE = "LOCAL_FILE" |
static const std::string | S3_STORAGE_TYPE = "AWS_S3" |
static const std::string | FILE_SORT_ORDER_BY_KEY = shared::FILE_SORT_ORDER_BY_KEY |
static const std::string | FILE_SORT_REGEX_KEY = shared::FILE_SORT_REGEX_KEY |
static const std::array < std::string, 1 > | supported_storage_types |
![]() | |
static std::string | getFullFilePath (const ForeignTable *foreign_table) |
Returns the path to the source file/dir of the table. Depending on options this may result from a concatenation of server and table path options. More... | |
Definition at line 32 of file AbstractTextFileDataWrapper.h.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | ) |
Definition at line 36 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table | ||
) |
Definition at line 42 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table, | ||
const UserMapping * | user_mapping, | ||
const bool | disable_cache | ||
) |
Definition at line 51 of file AbstractTextFileDataWrapper.cpp.
|
private |
|
overridevirtual |
Create RenderGroupAnalyzers for poly columns.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 1071 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHECK_GE, db_id_, foreign_table_, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), IS_GEO_POLY, render_group_analyzer_map_, and TableDescriptor::tableId.
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when a cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 55 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
protectedpure virtual |
Implemented in foreign_storage::CsvDataWrapper, and foreign_storage::RegexParserDataWrapper.
Referenced by populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when no cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 57 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
overridevirtual |
Serialize internal state of wrapper into file at given path if implemented
Implements foreign_storage::ForeignDataWrapper.
Definition at line 995 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::json_utils::add_value_to_object(), append_start_offset_, file_reader_, fragment_id_to_file_regions_map_, num_rows_, and foreign_storage::json_utils::write_to_string().
|
overridevirtual |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1064 of file AbstractTextFileDataWrapper.cpp.
References is_restored_.
|
overridevirtual |
Populates given chunk buffers identified by chunk keys. All provided chunk buffers are expected to be for the same fragment.
required_buffers | - chunk buffers that must always be populated |
optional_buffers | - chunk buffers that can be optionally populated, if the data wrapper has to scan through chunk data anyways (typically for row wise data formats) |
delete_buffer | - chunk buffer for fragment's delete column, if non-null data wrapper is expected to mark deleted rows in buffer and continue processing |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 98 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_FRAGMENT_IDX, db_id_, DEBUG_TIMER, foreign_table_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_columns(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), populateChunkMapForColumns(), populateChunks(), TableDescriptor::tableId, and updateMetadata().
|
private |
Definition at line 83 of file AbstractTextFileDataWrapper.cpp.
References chunk_metadata_map_, db_id_, foreign_table_, foreign_storage::init_chunk_for_column(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
overridevirtual |
Populates provided chunk metadata vector with metadata for table specified in given chunk key. Metadata scan for text file(s) configured for foreign table occurs in parallel whenever appropriate. Parallel processing involves the main thread creating ParseBufferRequest objects, which contain buffers with text content read from file and adding these request objects to a queue that is consumed by a fixed number of threads. After request processing, request objects are put back into a pool for reuse for subsequent requests in order to avoid unnecessary allocation of new buffers.
chunk_metadata_vector | - vector to be populated with chunk metadata |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 840 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), append_start_offset_, threading_serial::async(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, CHECK, CHECK_EQ, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, chunk_encoder_buffers_, CHUNK_KEY_COLUMN_IDX, chunk_metadata_map_, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, db_id_, DEBUG_TIMER, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, disable_cache_, foreign_storage::dispatch_metadata_scan_requests(), file_reader_, foreign_storage::AbstractFileStorageDataWrapper::FILE_SORT_ORDER_BY_KEY, foreign_storage::AbstractFileStorageDataWrapper::FILE_SORT_REGEX_KEY, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::get_buffer_size(), foreign_storage::get_thread_count(), Catalog_Namespace::SysCatalog::getCatalog(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), foreign_storage::OptionsContainer::getOption(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, run_benchmark_import::parser, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::AbstractFileStorageDataWrapper::REGEX_PATH_FILTER_KEY, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::scan_metadata(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, TableDescriptor::tableId, UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Populates provided chunks with appropriate data by parsing all file regions containing chunk data.
column_id_to_chunk_map | - map of column id to chunks to be populated |
fragment_id | - fragment id of given chunks |
delete_buffer | - optional buffer to store deleted row indices |
Definition at line 270 of file AbstractTextFileDataWrapper.cpp.
References Data_Namespace::AbstractBuffer::append(), threading_serial::async(), CHECK, db_id_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::get_buffer_size(), foreign_storage::get_thread_count(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, foreign_storage::OptionsContainer::options, foreign_storage::parse_file_regions(), run_benchmark_import::parser, render_group_analyzer_map_, Data_Namespace::AbstractBuffer::reserve(), run_benchmark_import::result, foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
Referenced by populateChunkBuffers().
|
overridevirtual |
Restore internal state of datawrapper
file_path | - location of file created by serializeMetadata |
chunk_metadata_vector | - vector of chunk metadata recovered from disk |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1017 of file AbstractTextFileDataWrapper.cpp.
References append_start_offset_, CHECK, chunk_encoder_buffers_, chunk_metadata_map_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::json_utils::get_value_from_object(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), is_restored_, foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, foreign_storage::json_utils::read_from_file(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Definition at line 125 of file AbstractTextFileDataWrapper.cpp.
References CHECK, chunk_metadata_map_, db_id_, foreign_table_, shared::get_from_map(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
private |
Definition at line 102 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 98 of file AbstractTextFileDataWrapper.h.
Referenced by populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 89 of file AbstractTextFileDataWrapper.h.
Referenced by populateChunkMapForColumns(), populateChunkMetadata(), restoreDataWrapperInternals(), and updateMetadata().
|
private |
Definition at line 94 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), and updateMetadata().
|
private |
Definition at line 109 of file AbstractTextFileDataWrapper.h.
Referenced by populateChunkMetadata().
|
private |
Definition at line 92 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
private |
Definition at line 95 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), restoreDataWrapperInternals(), and updateMetadata().
|
private |
Definition at line 90 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
private |
Definition at line 104 of file AbstractTextFileDataWrapper.h.
Referenced by isRestored(), and restoreDataWrapperInternals().
|
private |
Definition at line 100 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 114 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), and populateChunks().
|
private |
Definition at line 106 of file AbstractTextFileDataWrapper.h.