19 #include <arrow/filesystem/filesystem.h>
20 #include <parquet/schema.h>
32 namespace foreign_storage {
47 const std::string& foreign_table_name);
72 std::list<std::unique_ptr<ChunkMetadata>>
loadChunk(
73 const std::vector<RowGroupInterval>& row_group_intervals,
74 const int parquet_column_index,
75 std::list<Chunk_NS::Chunk>& chunks,
90 const std::vector<std::string>& file_paths,
92 const bool do_metadata_stats_validation =
true);
104 const parquet::ColumnDescriptor* parquet_column);
126 const std::map<int, Chunk_NS::Chunk>& chunks,
128 const std::map<int, StringDictionary*>& column_dictionaries,
129 const int num_threads = 1);
142 const size_t max_num_rows,
159 const parquet::ColumnDescriptor* parquet_column);
162 const std::vector<RowGroupInterval>& row_group_intervals,
163 const int parquet_column_index,
165 std::list<Chunk_NS::Chunk>& chunks,
168 const bool is_for_detect =
false,
169 const std::optional<int64_t> max_levels_read = std::nullopt);
static bool isColumnMappingSupported(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
const RenderGroupAnalyzerMap * render_group_analyzer_map_
std::string foreign_table_name_
std::list< std::unique_ptr< ChunkMetadata > > loadChunk(const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary=nullptr, RejectedRowIndices *rejected_row_indices=nullptr)
LazyParquetChunkLoader(std::shared_ptr< arrow::fs::FileSystem > file_system, FileReaderMap *file_reader_cache, const RenderGroupAnalyzerMap *render_group_analyzer_map, const std::string &foreign_table_name)
specifies the content in-memory of a row in the column metadata table
std::pair< size_t, size_t > loadRowGroups(const RowGroupInterval &row_group_interval, const std::map< int, Chunk_NS::Chunk > &chunks, const ForeignTableSchema &schema, const std::map< int, StringDictionary * > &column_dictionaries, const int num_threads=1)
Load row groups of data into given chunks.
std::list< RowGroupMetadata > metadataScan(const std::vector< std::string > &file_paths, const ForeignTableSchema &schema, const bool do_metadata_stats_validation=true)
Perform a metadata scan for the paths specified.
static const int batch_reader_num_elements
std::set< int64_t > RejectedRowIndices
DataPreview previewFiles(const std::vector< std::string > &files, const size_t max_num_rows, const ForeignTable &table)
Preview rows of data and column types in a set of files.
FileReaderMap * file_reader_cache_
std::list< std::unique_ptr< ChunkMetadata > > appendRowGroups(const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, const ColumnDescriptor *column_descriptor, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, RejectedRowIndices *rejected_row_indices, const bool is_for_detect=false, const std::optional< int64_t > max_levels_read=std::nullopt)
std::shared_ptr< arrow::fs::FileSystem > file_system_
std::map< int, std::unique_ptr< import_export::RenderGroupAnalyzer >> RenderGroupAnalyzerMap
size_t g_max_import_threads
static SQLTypeInfo suggestColumnMapping(const parquet::ColumnDescriptor *parquet_column)