OmniSciDB
c1a53651b2
|
#include <AbstractTextFileDataWrapper.h>
Classes | |
struct | ResidualBuffer |
Public Member Functions | |
AbstractTextFileDataWrapper () | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table) | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping, const bool disable_cache) | |
void | populateChunkMetadata (ChunkMetadataVector &chunk_metadata_vector) override |
void | populateChunkBuffers (const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers, AbstractBuffer *delete_buffer) override |
std::string | getSerializedDataWrapper () const override |
void | restoreDataWrapperInternals (const std::string &file_path, const ChunkMetadataVector &chunk_metadata) override |
bool | isRestored () const override |
ParallelismLevel | getCachedParallelismLevel () const override |
ParallelismLevel | getNonCachedParallelismLevel () const override |
void | createRenderGroupAnalyzers () override |
Create RenderGroupAnalyzers for poly columns. More... | |
bool | isLazyFragmentFetchingEnabled () const override |
![]() | |
AbstractFileStorageDataWrapper () | |
void | validateServerOptions (const ForeignServer *foreign_server) const override |
void | validateTableOptions (const ForeignTable *foreign_table) const override |
const std::set < std::string_view > & | getSupportedTableOptions () const override |
void | validateUserMappingOptions (const UserMapping *user_mapping, const ForeignServer *foreign_server) const override |
const std::set < std::string_view > & | getSupportedUserMappingOptions () const override |
const std::set< std::string > | getAlterableTableOptions () const override |
![]() | |
ForeignDataWrapper ()=default | |
virtual | ~ForeignDataWrapper ()=default |
virtual void | validateSchema (const std::list< ColumnDescriptor > &columns) const |
Protected Member Functions | |
virtual const TextFileBufferParser & | getFileBufferParser () const =0 |
virtual std::optional< size_t > | getMaxFileCount () const |
Private Member Functions | |
AbstractTextFileDataWrapper (const ForeignTable *foreign_table) | |
void | iterativeFileScan (ChunkMetadataVector &chunk_metadata_vector, IterativeFileScanParameters &file_scan_param) |
void | populateChunks (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id, AbstractBuffer *delete_buffer) |
void | populateChunkMapForColumns (const std::set< const ColumnDescriptor * > &columns, const int fragment_id, const ChunkToBufferMap &buffers, std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map) |
void | updateMetadata (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id) |
void | updateRolledOffChunks (const std::set< std::string > &rolled_off_files, const std::map< int32_t, const ColumnDescriptor * > &column_by_id) |
Private Attributes | |
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > | chunk_metadata_map_ |
std::map< int, FileRegions > | fragment_id_to_file_regions_map_ |
std::unique_ptr< FileReader > | file_reader_ |
const int | db_id_ |
const ForeignTable * | foreign_table_ |
std::map< ChunkKey, std::unique_ptr < ForeignStorageBuffer > > | chunk_encoder_buffers_ |
size_t | num_rows_ |
size_t | append_start_offset_ |
bool | is_restored_ |
const UserMapping * | user_mapping_ |
const bool | disable_cache_ |
bool | is_first_file_scan_call_ |
bool | is_file_scan_in_progress_ |
int | iterative_scan_last_fragment_id_ |
RenderGroupAnalyzerMap | render_group_analyzer_map_ |
MetadataScanMultiThreadingParams | multi_threading_params_ |
size_t | buffer_size_ |
size_t | thread_count_ |
ResidualBuffer | residual_buffer_ |
Additional Inherited Members | |
![]() | |
enum | ParallelismLevel { NONE, INTRA_FRAGMENT, INTER_FRAGMENT } |
![]() | |
static shared::FilePathOptions | getFilePathOptions (const ForeignTable *foreign_table) |
![]() | |
static const std::string | STORAGE_TYPE_KEY = "STORAGE_TYPE" |
static const std::string | BASE_PATH_KEY = "BASE_PATH" |
static const std::string | FILE_PATH_KEY = "FILE_PATH" |
static const std::string | REGEX_PATH_FILTER_KEY = "REGEX_PATH_FILTER" |
static const std::string | LOCAL_FILE_STORAGE_TYPE = "LOCAL_FILE" |
static const std::string | S3_STORAGE_TYPE = "AWS_S3" |
static const std::string | FILE_SORT_ORDER_BY_KEY = shared::FILE_SORT_ORDER_BY_KEY |
static const std::string | FILE_SORT_REGEX_KEY = shared::FILE_SORT_REGEX_KEY |
static const std::string | ALLOW_FILE_ROLL_OFF_KEY = "ALLOW_FILE_ROLL_OFF" |
static const std::string | THREADS_KEY = "THREADS" |
static const std::array < std::string, 1 > | supported_storage_types |
![]() | |
static std::string | getFullFilePath (const ForeignTable *foreign_table) |
Returns the path to the source file/dir of the table. Depending on options this may result from a concatenation of server and table path options. More... | |
static bool | allowFileRollOff (const ForeignTable *foreign_table) |
Definition at line 92 of file AbstractTextFileDataWrapper.h.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | ) |
Definition at line 37 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table | ||
) |
Definition at line 46 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table, | ||
const UserMapping * | user_mapping, | ||
const bool | disable_cache | ||
) |
Definition at line 58 of file AbstractTextFileDataWrapper.cpp.
|
private |
|
overridevirtual |
Create RenderGroupAnalyzers for poly columns.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 1756 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHECK_GE, db_id_, foreign_table_, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), IS_GEO_POLY, render_group_analyzer_map_, and TableDescriptor::tableId.
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when a cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 115 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
protectedpure virtual |
Implemented in foreign_storage::CsvDataWrapper, foreign_storage::RegexParserDataWrapper, and foreign_storage::InternalLogsDataWrapper.
Referenced by iterativeFileScan(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
protectedvirtual |
Reimplemented in foreign_storage::InternalLogsDataWrapper.
Definition at line 1776 of file AbstractTextFileDataWrapper.cpp.
Referenced by iterativeFileScan(), and populateChunkMetadata().
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when no cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 117 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
overridevirtual |
Serialize internal state of wrapper into file at given path if implemented
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1680 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::json_utils::add_value_to_object(), append_start_offset_, file_reader_, fragment_id_to_file_regions_map_, num_rows_, and foreign_storage::json_utils::write_to_string().
|
inlineoverridevirtual |
If true
data wrapper implements a lazy fragment fetching mode. This mode allows requests for fragments to be issued to populateChunks
without the prerequisite that populateChunkMetadata
has successfully finished execution. This is an optimization that has some specific use-cases and is not required.
NOTE: this mode is not guaranteed to work as expected when combined with certain types of refresh modes such as append. This is subject to change in the future, but has no impact on the intended use-cases of this mode.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 123 of file AbstractTextFileDataWrapper.h.
|
overridevirtual |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1749 of file AbstractTextFileDataWrapper.cpp.
References is_restored_.
|
private |
Implements an iterative file scan that enables populating chunks fragment by fragment.
Definition at line 1516 of file AbstractTextFileDataWrapper.cpp.
References append_start_offset_, threading_serial::async(), buffer_size_, CHECK, chunk_metadata_map_, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, db_id_, DEBUG_TIMER, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, disable_cache_, foreign_storage::dispatch_scan_requests_with_exception_handling(), file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), Catalog_Namespace::SysCatalog::getCatalog(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFilePathOptions(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), getMaxFileCount(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::initialize_non_append_mode_scan(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::is_file_scan_finished(), is_file_scan_in_progress_, is_first_file_scan_call_, foreign_storage::ForeignTable::isAppendMode(), iterative_scan_last_fragment_id_, multi_threading_params_, num_rows_, foreign_storage::OptionsContainer::options, run_benchmark_import::parser, foreign_storage::populate_chunks(), render_group_analyzer_map_, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::reset_multithreading_params(), residual_buffer_, TableDescriptor::tableId, thread_count_, user_mapping_, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
Referenced by populateChunks().
|
overridevirtual |
Populates given chunk buffers identified by chunk keys. All provided chunk buffers are expected to be for the same fragment.
required_buffers | - chunk buffers that must always be populated |
optional_buffers | - chunk buffers that can be optionally populated, if the data wrapper has to scan through chunk data anyways (typically for row wise data formats) |
delete_buffer | - chunk buffer for fragment's delete column, if non-null data wrapper is expected to mark deleted rows in buffer and continue processing |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 119 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_FRAGMENT_IDX, db_id_, DEBUG_TIMER, foreign_table_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_columns(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), is_file_scan_in_progress_, populateChunkMapForColumns(), populateChunks(), TableDescriptor::tableId, and updateMetadata().
|
private |
Definition at line 104 of file AbstractTextFileDataWrapper.cpp.
References chunk_metadata_map_, db_id_, foreign_table_, foreign_storage::init_chunk_for_column(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
overridevirtual |
Populates provided chunk metadata vector with metadata for table specified in given chunk key. Metadata scan for text file(s) configured for foreign table occurs in parallel whenever appropriate. Parallel processing involves the main thread creating ParseBufferRequest objects, which contain buffers with text content read from file and adding these request objects to a queue that is consumed by a fixed number of threads. After request processing, request objects are put back into a pool for reuse for subsequent requests in order to avoid unnecessary allocation of new buffers.
chunk_metadata_vector | - vector to be populated with chunk metadata |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1363 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::AbstractFileStorageDataWrapper::allowFileRollOff(), append_start_offset_, threading_serial::async(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, CHECK, CHECK_EQ, foreign_storage::MultiFileReader::checkForRolledOffFiles(), foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, chunk_encoder_buffers_, CHUNK_KEY_COLUMN_IDX, chunk_metadata_map_, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, db_id_, DEBUG_TIMER, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, disable_cache_, foreign_storage::dispatch_scan_requests_with_exception_handling(), file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), Catalog_Namespace::SysCatalog::getCatalog(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFilePathOptions(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), getMaxFileCount(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::initialize_non_append_mode_scan(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, run_benchmark_import::parser, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::scan_metadata(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, TableDescriptor::tableId, UNREACHABLE, updateRolledOffChunks(), user_mapping_, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Populates provided chunks with appropriate data by parsing all file regions containing chunk data.
column_id_to_chunk_map | - map of column id to chunks to be populated |
fragment_id | - fragment id of given chunks |
delete_buffer | - optional buffer to store deleted row indices |
Definition at line 339 of file AbstractTextFileDataWrapper.cpp.
References threading_serial::async(), CHECK, db_id_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), Data_Namespace::AbstractBuffer::getMemoryPtr(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::is_file_scan_finished(), is_file_scan_in_progress_, is_first_file_scan_call_, iterativeFileScan(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, multi_threading_params_, foreign_storage::OptionsContainer::options, foreign_storage::parse_file_regions(), run_benchmark_import::parser, render_group_analyzer_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer(), run_benchmark_import::result, foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_fragment_id_out_of_bounds_error(), UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
Referenced by populateChunkBuffers().
|
overridevirtual |
Restore internal state of datawrapper
file_path | - location of file created by serializeMetadata |
chunk_metadata_vector | - vector of chunk metadata recovered from disk |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1702 of file AbstractTextFileDataWrapper.cpp.
References append_start_offset_, CHECK, chunk_encoder_buffers_, chunk_metadata_map_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::json_utils::get_value_from_object(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), is_restored_, foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, foreign_storage::json_utils::read_from_file(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Definition at line 148 of file AbstractTextFileDataWrapper.cpp.
References CHECK, chunk_metadata_map_, db_id_, foreign_table_, shared::get_from_map(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
private |
Definition at line 1635 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_FRAGMENT_IDX, chunk_metadata_map_, shared::contains(), fragment_id_to_file_regions_map_, shared::get_from_map(), and foreign_storage::get_placeholder_metadata().
Referenced by populateChunkMetadata().
|
private |
Definition at line 183 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 205 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 179 of file AbstractTextFileDataWrapper.h.
Referenced by populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 170 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkMapForColumns(), populateChunkMetadata(), restoreDataWrapperInternals(), updateMetadata(), and updateRolledOffChunks().
|
private |
Definition at line 175 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), iterativeFileScan(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), and updateMetadata().
|
private |
Definition at line 190 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunkMetadata().
|
private |
Definition at line 173 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
private |
Definition at line 176 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), iterativeFileScan(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), restoreDataWrapperInternals(), and updateMetadata().
|
private |
Definition at line 171 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), populateChunks(), restoreDataWrapperInternals(), and updateRolledOffChunks().
|
private |
Definition at line 193 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkBuffers(), and populateChunks().
|
private |
Definition at line 192 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunks().
|
private |
Definition at line 185 of file AbstractTextFileDataWrapper.h.
Referenced by isRestored(), and restoreDataWrapperInternals().
|
private |
Definition at line 196 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 204 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunks().
|
private |
Definition at line 181 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 201 of file AbstractTextFileDataWrapper.h.
Referenced by createRenderGroupAnalyzers(), iterativeFileScan(), and populateChunks().
|
private |
Definition at line 208 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 206 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 187 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunkMetadata().