OmniSciDB
085a039ca4
|
#include <RegexFileBufferParser.h>
Public Member Functions | |
RegexFileBufferParser (const ForeignTable *foreign_table) | |
ParseBufferResult | parseBuffer (ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false) const override |
import_export::CopyParams | validateAndGetCopyParams (const ForeignTable *foreign_table) const override |
size_t | findRowEndPosition (size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams ©_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const override |
void | validateFiles (const FileReader *file_reader, const ForeignTable *foreign_table) const override |
Static Public Member Functions | |
static void | setMaxBufferResize (size_t max_buffer_resize) |
![]() | |
static std::map< int, DataBlockPtr > | convertImportBuffersToDataBlocks (const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers) |
static bool | isCoordinateScalar (const std::string_view datum) |
static void | processGeoColumn (std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams ©_params, std::list< const ColumnDescriptor * >::iterator &cd_it, std::vector< std::string_view > &row, size_t &import_idx, bool is_null, size_t first_row_index, size_t row_index_plus_one, std::shared_ptr< Catalog_Namespace::Catalog > catalog, const RenderGroupAnalyzerMap *render_group_analyzer_map) |
static void | fillRejectedRowWithInvalidData (const std::list< const ColumnDescriptor * > &columns, std::list< const ColumnDescriptor * >::iterator &cd_it, const size_t col_idx, ParseBufferRequest &request) |
static bool | isNullDatum (const std::string_view datum, const ColumnDescriptor *column, const std::string &null_indicator) |
Static Public Attributes | |
static const std::string | LINE_REGEX_KEY = "LINE_REGEX" |
static const std::string | LINE_START_REGEX_KEY = "LINE_START_REGEX" |
static const std::string | HEADER_KEY = "HEADER" |
![]() | |
static const std::string | THREADS_KEY = "THREADS" |
static const std::string | BUFFER_SIZE_KEY = "BUFFER_SIZE" |
Static Private Member Functions | |
static size_t | getMaxBufferResize () |
Private Attributes | |
boost::regex | line_regex_ |
std::optional< boost::regex > | line_start_regex_ |
Static Private Attributes | |
static size_t | max_buffer_resize_ |
static bool | skip_first_line_ {false} |
Definition at line 23 of file RegexFileBufferParser.h.
foreign_storage::RegexFileBufferParser::RegexFileBufferParser | ( | const ForeignTable * | foreign_table | ) |
Definition at line 172 of file RegexFileBufferParser.cpp.
|
overridevirtual |
Finds and returns the offset of the end of the last row in the given buffer. If the buffer does not contain at least one row, the buffer is extended with more content from the file until a row is read. An exception is thrown if the buffer is extended to a maximum threshold and at least one row has still not been read.
Implements foreign_storage::TextFileBufferParser.
Definition at line 362 of file RegexFileBufferParser.cpp.
References CHECK, CHECK_EQ, CHECK_GT, import_export::delimited_parser::extend_buffer(), foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::find_last_end_of_line(), foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_row_count(), getMaxBufferResize(), foreign_storage::FileReader::isEndOfLastFile(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, line_start_regex_, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex(), and import_export::delimited_parser::max_buffer_resize.
|
staticprivate |
Definition at line 447 of file RegexFileBufferParser.cpp.
References max_buffer_resize_.
Referenced by findRowEndPosition().
|
overridevirtual |
Parses a given file buffer and returns data blocks for each column in the file along with metadata related to rows and row offsets within the buffer.
Implements foreign_storage::TextFileBufferParser.
Definition at line 180 of file RegexFileBufferParser.cpp.
References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, CHECK, foreign_storage::TextFileBufferParser::convertImportBuffersToDataBlocks(), foreign_storage::ParseBufferRequest::copy_params, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::TextFileBufferParser::fillRejectedRowWithInvalidData(), foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::foreign_table_schema, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_next_row(), foreign_storage::ParseBufferRequest::getCatalog(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::import_buffers, is_null(), foreign_storage::TextFileBufferParser::isNullDatum(), import_export::CopyParams::line_delim, line_regex_, line_start_regex_, import_export::CopyParams::null_str, foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::TextFileBufferParser::processGeoColumn(), foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::regex_match_columns(), foreign_storage::ParseBufferRequest::render_group_analyzer_map, run_benchmark_import::result, and foreign_storage::ParseBufferRequest::track_rejected_rows.
|
static |
Definition at line 443 of file RegexFileBufferParser.cpp.
References import_export::delimited_parser::max_buffer_resize, and max_buffer_resize_.
|
overridevirtual |
Validates foreign table parse options and returns a CopyParams object upon successful validation. An exception is thrown if validation fails.
Implements foreign_storage::TextFileBufferParser.
Definition at line 339 of file RegexFileBufferParser.cpp.
References foreign_storage::TextFileBufferParser::BUFFER_SIZE_KEY, HEADER_KEY, import_export::kHasHeader, import_export::kNoHeader, foreign_storage::OptionsContainer::options, import_export::CopyParams::plain_text, foreign_storage::TextFileBufferParser::THREADS_KEY, and foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_and_get_bool_value().
|
overridevirtual |
Performs basic validation of files to be parsed.
Implements foreign_storage::TextFileBufferParser.
Definition at line 424 of file RegexFileBufferParser.cpp.
References CHECK, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_start_regex(), foreign_storage::FileReader::getFirstLineForEachFile(), parse_ast::line, line_start_regex_, and foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex().
|
inlinestatic |
Definition at line 50 of file RegexFileBufferParser.h.
Referenced by validateAndGetCopyParams().
|
private |
Definition at line 61 of file RegexFileBufferParser.h.
Referenced by parseBuffer().
|
inlinestatic |
|
private |
Definition at line 62 of file RegexFileBufferParser.h.
Referenced by findRowEndPosition(), parseBuffer(), and validateFiles().
|
inlinestatic |
|
inlinestaticprivate |
Definition at line 55 of file RegexFileBufferParser.h.
Referenced by getMaxBufferResize(), and setMaxBufferResize().
|
inlinestaticprivate |
Definition at line 59 of file RegexFileBufferParser.h.