OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::LazyParquetChunkLoader Class Reference

#include <LazyParquetChunkLoader.h>

+ Collaboration diagram for foreign_storage::LazyParquetChunkLoader:

Public Member Functions

 LazyParquetChunkLoader (std::shared_ptr< arrow::fs::FileSystem > file_system, FileReaderMap *file_reader_cache, const ForeignTable *foreign_table)
 
std::list< std::unique_ptr
< ChunkMetadata > > 
loadChunk (const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary=nullptr, RejectedRowIndices *rejected_row_indices=nullptr)
 
std::list< RowGroupMetadatametadataScan (const std::vector< std::string > &file_paths, const ForeignTableSchema &schema, const bool do_metadata_stats_validation=true)
 Perform a metadata scan for the paths specified. More...
 
std::pair< size_t, size_t > loadRowGroups (const RowGroupInterval &row_group_interval, const std::map< int, Chunk_NS::Chunk > &chunks, const ForeignTableSchema &schema, const std::map< int, StringDictionary * > &column_dictionaries, const int num_threads=1)
 Load row groups of data into given chunks. More...
 
DataPreview previewFiles (const std::vector< std::string > &files, const size_t max_num_rows, const ForeignTable &table)
 Preview rows of data and column types in a set of files. More...
 

Static Public Member Functions

static bool isColumnMappingSupported (const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
 

Static Public Attributes

static const int batch_reader_num_elements = 4096
 

Private Member Functions

std::list< std::unique_ptr
< ChunkMetadata > > 
appendRowGroups (const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, const ColumnDescriptor *column_descriptor, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, RejectedRowIndices *rejected_row_indices, const bool is_for_detect=false, const std::optional< int64_t > max_levels_read=std::nullopt)
 

Static Private Member Functions

static SQLTypeInfo suggestColumnMapping (const parquet::ColumnDescriptor *parquet_column)
 

Private Attributes

std::shared_ptr
< arrow::fs::FileSystem > 
file_system_
 
FileReaderMapfile_reader_cache_
 
const ForeignTableforeign_table_
 

Detailed Description

A lazy parquet to chunk loader

Definition at line 37 of file LazyParquetChunkLoader.h.

Constructor & Destructor Documentation

foreign_storage::LazyParquetChunkLoader::LazyParquetChunkLoader ( std::shared_ptr< arrow::fs::FileSystem >  file_system,
FileReaderMap file_reader_cache,
const ForeignTable foreign_table 
)

Definition at line 2024 of file LazyParquetChunkLoader.cpp.

References CHECK, and foreign_table_.

2028  : file_system_(file_system)
2029  , file_reader_cache_(file_map)
2030  , foreign_table_(foreign_table) {
2031  CHECK(foreign_table_) << "LazyParquetChunkLoader: null Foreign Table ptr";
2032 }
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< arrow::fs::FileSystem > file_system_

Member Function Documentation

std::list< std::unique_ptr< ChunkMetadata > > foreign_storage::LazyParquetChunkLoader::appendRowGroups ( const std::vector< RowGroupInterval > &  row_group_intervals,
const int  parquet_column_index,
const ColumnDescriptor column_descriptor,
std::list< Chunk_NS::Chunk > &  chunks,
StringDictionary string_dictionary,
RejectedRowIndices rejected_row_indices,
const bool  is_for_detect = false,
const std::optional< int64_t >  max_levels_read = std::nullopt 
)
private

Definition at line 1824 of file LazyParquetChunkLoader.cpp.

References batch_reader_num_elements, CHECK, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_encoder(), DEBUG_TIMER, file_reader_cache_, file_system_, foreign_table_, foreign_storage::ForeignTable::GEO_VALIDATE_GEOMETRY_KEY, foreign_storage::get_column_descriptor(), foreign_storage::get_parquet_table_size(), foreign_storage::OptionsContainer::getOptionAsBool(), foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::resize_values_buffer(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::set_definition_levels_for_zero_max_definition_level_case(), TableDescriptor::tableName, to_string(), foreign_storage::validate_equal_column_descriptor(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_list_column_metadata_statistics(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_max_repetition_and_definition_level().

Referenced by loadChunk(), and previewFiles().

1832  {
1833  auto timer = DEBUG_TIMER(__func__);
1834  std::list<std::unique_ptr<ChunkMetadata>> chunk_metadata;
1835  // `def_levels` and `rep_levels` below are used to store the read definition
1836  // and repetition levels of the Dremel encoding implemented by the Parquet
1837  // format
1838  std::vector<int16_t> def_levels(LazyParquetChunkLoader::batch_reader_num_elements);
1839  std::vector<int16_t> rep_levels(LazyParquetChunkLoader::batch_reader_num_elements);
1840  std::vector<int8_t> values;
1841 
1842  CHECK(!row_group_intervals.empty());
1843  const auto& first_file_path = row_group_intervals.front().file_path;
1844 
1845  auto first_file_reader = file_reader_cache_->getOrInsert(first_file_path, file_system_);
1846  auto first_parquet_column_descriptor =
1847  get_column_descriptor(first_file_reader, parquet_column_index);
1848  resize_values_buffer(column_descriptor, first_parquet_column_descriptor, values);
1849 
1850  const bool geo_validate_geometry =
1852  auto encoder = create_parquet_encoder(column_descriptor,
1853  first_parquet_column_descriptor,
1854  chunks,
1855  string_dictionary,
1856  chunk_metadata,
1857  false,
1858  false,
1859  is_for_detect,
1860  geo_validate_geometry);
1861  CHECK(encoder.get());
1862 
1863  if (rejected_row_indices) { // error tracking is enabled
1864  encoder->initializeErrorTracking();
1865  }
1866  encoder->initializeColumnType(column_descriptor->columnType);
1867 
1868  bool early_exit = false;
1869  int64_t total_levels_read = 0;
1870  for (const auto& row_group_interval : row_group_intervals) {
1871  const auto& file_path = row_group_interval.file_path;
1872  auto file_reader = file_reader_cache_->getOrInsert(file_path, file_system_);
1873 
1874  auto [num_row_groups, num_columns] = get_parquet_table_size(file_reader);
1875  CHECK(row_group_interval.start_index >= 0 &&
1876  row_group_interval.end_index < num_row_groups);
1877  CHECK(parquet_column_index >= 0 && parquet_column_index < num_columns);
1878 
1879  parquet::ParquetFileReader* parquet_reader = file_reader->parquet_reader();
1880  auto parquet_column_descriptor =
1881  get_column_descriptor(file_reader, parquet_column_index);
1882  validate_equal_column_descriptor(first_parquet_column_descriptor,
1883  parquet_column_descriptor,
1884  first_file_path,
1885  file_path);
1886 
1888  parquet_column_descriptor);
1890  def_levels);
1891 
1892  int64_t values_read = 0;
1893  for (int row_group_index = row_group_interval.start_index;
1894  row_group_index <= row_group_interval.end_index;
1895  ++row_group_index) {
1896  auto group_reader = parquet_reader->RowGroup(row_group_index);
1897  std::shared_ptr<parquet::ColumnReader> col_reader =
1898  group_reader->Column(parquet_column_index);
1899 
1900  try {
1901  while (col_reader->HasNext()) {
1902  int64_t levels_read =
1904  def_levels.data(),
1905  rep_levels.data(),
1906  reinterpret_cast<uint8_t*>(values.data()),
1907  &values_read,
1908  col_reader.get());
1909 
1910  if (rejected_row_indices) { // error tracking is enabled
1911  encoder->appendDataTrackErrors(def_levels.data(),
1912  rep_levels.data(),
1913  values_read,
1914  levels_read,
1915  values.data());
1916  } else { // no error tracking enabled
1918  parquet_reader, // this validation only in effect for foreign tables
1919  row_group_index,
1920  parquet_column_index,
1921  def_levels.data(),
1922  levels_read,
1923  parquet_column_descriptor);
1924 
1925  encoder->appendData(def_levels.data(),
1926  rep_levels.data(),
1927  values_read,
1928  levels_read,
1929  values.data());
1930  }
1931 
1932  if (max_levels_read.has_value()) {
1933  total_levels_read += levels_read;
1934  if (total_levels_read >= max_levels_read.value()) {
1935  early_exit = true;
1936  break;
1937  }
1938  }
1939  }
1940  if (auto array_encoder = dynamic_cast<ParquetArrayEncoder*>(encoder.get())) {
1941  array_encoder->finalizeRowGroup();
1942  }
1943  } catch (const std::exception& error) {
1944  // check for a specific error to detect a possible unexpected switch of data
1945  // source in order to respond with informative error message
1946  if (boost::regex_search(error.what(),
1947  boost::regex{"Deserializing page header failed."})) {
1948  throw ForeignStorageException(
1949  "Unable to read from foreign data source, possible cause is an unexpected "
1950  "change of source. Please use the \"REFRESH FOREIGN TABLES\" command on "
1951  "the "
1952  "foreign table "
1953  "if data source has been updated. Foreign table: " +
1955  }
1956 
1957  throw ForeignStorageException(
1958  std::string(error.what()) + " Row group: " + std::to_string(row_group_index) +
1959  ", Parquet column: '" + col_reader->descr()->path()->ToDotString() +
1960  "', Parquet file: '" + file_path + "'");
1961  }
1962  if (max_levels_read.has_value() && early_exit) {
1963  break;
1964  }
1965  }
1966  if (max_levels_read.has_value() && early_exit) {
1967  break;
1968  }
1969  }
1970 
1971  if (rejected_row_indices) { // error tracking is enabled
1972  *rejected_row_indices = encoder->getRejectedRowIndices();
1973  }
1974  return chunk_metadata;
1975 }
std::string tableName
std::pair< int, int > get_parquet_table_size(const ReaderPtr &reader)
void validate_equal_column_descriptor(const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
std::shared_ptr< ParquetEncoder > create_parquet_encoder(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool is_metadata_scan, const bool is_for_import, const bool is_for_detect, const bool geo_validate_geometry)
Create a Parquet specific encoder for a Parquet to OmniSci mapping.
void validate_list_column_metadata_statistics(const parquet::ParquetFileReader *reader, const int row_group_index, const int column_index, const int16_t *def_levels, const int64_t num_levels, const parquet::ColumnDescriptor *parquet_column_descriptor)
std::string to_string(char const *&&v)
void set_definition_levels_for_zero_max_definition_level_case(const parquet::ColumnDescriptor *parquet_column_descriptor, std::vector< int16_t > &def_levels)
const parquet::ColumnDescriptor * get_column_descriptor(const parquet::arrow::FileReader *reader, const int logical_column_index)
const ReaderPtr getOrInsert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:70
static constexpr const char * GEO_VALIDATE_GEOMETRY_KEY
Definition: ForeignTable.h:49
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
void validate_max_repetition_and_definition_level(const ColumnDescriptor *omnisci_column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
std::shared_ptr< arrow::fs::FileSystem > file_system_
void resize_values_buffer(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column, std::vector< int8_t > &values)
SQLTypeInfo columnType
bool getOptionAsBool(const std::string_view &key) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::LazyParquetChunkLoader::isColumnMappingSupported ( const ColumnDescriptor omnisci_column,
const parquet::ColumnDescriptor *  parquet_column 
)
static

Determine if a Parquet to OmniSci column mapping is supported.

Parameters
omnisci_column- the column descriptor of the OmniSci column
parquet_column- the column descriptor of the Parquet column
Returns
true if the column mapping is supported by LazyParquetChunkLoader, false otherwise

Definition at line 1989 of file LazyParquetChunkLoader.cpp.

References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::is_array(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_date_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_decimal_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_floating_point_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_geospatial_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_integral_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_none_type_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_string_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_time_mapping(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_timestamp_mapping().

Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping().

1991  {
1992  CHECK(!omnisci_column->columnType.is_array())
1993  << "isColumnMappingSupported should not be called on arrays";
1994  if (validate_geospatial_mapping(omnisci_column, parquet_column)) {
1995  return true;
1996  }
1997  if (validate_decimal_mapping(omnisci_column, parquet_column)) {
1998  return true;
1999  }
2000  if (validate_floating_point_mapping(omnisci_column, parquet_column)) {
2001  return true;
2002  }
2003  if (validate_integral_mapping(omnisci_column, parquet_column)) {
2004  return true;
2005  }
2006  if (validate_none_type_mapping(omnisci_column, parquet_column)) {
2007  return true;
2008  }
2009  if (validate_timestamp_mapping(omnisci_column, parquet_column)) {
2010  return true;
2011  }
2012  if (validate_time_mapping(omnisci_column, parquet_column)) {
2013  return true;
2014  }
2015  if (validate_date_mapping(omnisci_column, parquet_column)) {
2016  return true;
2017  }
2018  if (validate_string_mapping(omnisci_column, parquet_column)) {
2019  return true;
2020  }
2021  return false;
2022 }
bool validate_time_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_integral_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_date_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_timestamp_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_geospatial_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_decimal_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool validate_none_type_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
#define CHECK(condition)
Definition: Logger.h:291
bool validate_floating_point_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
SQLTypeInfo columnType
bool validate_string_mapping(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
bool is_array() const
Definition: sqltypes.h:583

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::list< std::unique_ptr< ChunkMetadata > > foreign_storage::LazyParquetChunkLoader::loadChunk ( const std::vector< RowGroupInterval > &  row_group_intervals,
const int  parquet_column_index,
std::list< Chunk_NS::Chunk > &  chunks,
StringDictionary string_dictionary = nullptr,
RejectedRowIndices rejected_row_indices = nullptr 
)

Load a number of row groups of a column in a parquet file into a chunk

Parameters
row_group_interval- an inclusive interval [start,end] that specifies row groups to load
parquet_column_index- the logical column index in the parquet file (and omnisci db) of column to load
chunks- a list containing the chunks to load
string_dictionary- a string dictionary for the column corresponding to the column, if applicable
rejected_row_indices- optional, if specified errors will be tracked in this data structure while loading
Returns
An empty list when no metadata update is applicable, otherwise a list of ChunkMetadata shared pointers with which to update the corresponding column chunk metadata.

NOTE: if more than one chunk is supplied, the first chunk is required to be the chunk corresponding to the logical column, while the remaining chunks correspond to physical columns (in ascending order of column id.) Similarly, if a metada update is expected, the list of ChunkMetadata shared pointers returned will correspond directly to the list chunks.

Definition at line 2034 of file LazyParquetChunkLoader.cpp.

References appendRowGroups(), and CHECK.

Referenced by foreign_storage::ParquetDataWrapper::loadBuffersUsingLazyParquetChunkLoader().

2039  {
2040  CHECK(!chunks.empty());
2041  auto const& chunk = *chunks.begin();
2042  auto column_descriptor = chunk.getColumnDesc();
2043  auto buffer = chunk.getBuffer();
2044  CHECK(buffer);
2045 
2046  try {
2047  auto metadata = appendRowGroups(row_group_intervals,
2048  parquet_column_index,
2049  column_descriptor,
2050  chunks,
2051  string_dictionary,
2052  rejected_row_indices);
2053  return metadata;
2054  } catch (const std::exception& error) {
2055  throw ForeignStorageException(error.what());
2056  }
2057 
2058  return {};
2059 }
#define CHECK(condition)
Definition: Logger.h:291
std::list< std::unique_ptr< ChunkMetadata > > appendRowGroups(const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, const ColumnDescriptor *column_descriptor, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, RejectedRowIndices *rejected_row_indices, const bool is_for_detect=false, const std::optional< int64_t > max_levels_read=std::nullopt)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< size_t, size_t > foreign_storage::LazyParquetChunkLoader::loadRowGroups ( const RowGroupInterval row_group_interval,
const std::map< int, Chunk_NS::Chunk > &  chunks,
const ForeignTableSchema schema,
const std::map< int, StringDictionary * > &  column_dictionaries,
const int  num_threads = 1 
)

Load row groups of data into given chunks.

Parameters
row_group_interval- specifies which row groups to load
chunks- map of column index to chunk which data will be loaded into
schema- schema of the foreign table to perform metadata scan for
column_dictionaries- a map of string dictionaries for columns that require it
num_threads- number of threads to utilize while reading (if applicale)
Returns
[num_rows_completed,num_rows_rejected] - returns number of rows loaded and rejected while loading

Note that only logical chunks are expected because the data is read into an intermediate form into the underlying buffers. This member is intended to be used for import.

NOTE: Currently, loading one row group at a time is required.

Definition at line 2144 of file LazyParquetChunkLoader.cpp.

References threading_serial::async(), CHECK, DEBUG_TIMER, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, file_system_, foreign_table_, foreign_storage::ForeignTable::GEO_VALIDATE_GEOMETRY_KEY, shared::get_from_map(), foreign_storage::get_parquet_table_size(), foreign_storage::ForeignTableSchema::getColumnDescriptor(), foreign_storage::ForeignTableSchema::getLogicalColumns(), foreign_storage::OptionsContainer::getOptionAsBool(), foreign_storage::ForeignTableSchema::getParquetColumnIndex(), foreign_storage::open_parquet_table(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::populate_encoder_map_for_import(), foreign_storage::RowGroupInterval::start_index, logger::thread_id(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_max_repetition_and_definition_level(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().

2149  {
2150  auto timer = DEBUG_TIMER(__func__);
2151 
2152  const auto& file_path = row_group_interval.file_path;
2153 
2154  // do not use caching with file-readers, open a new one for every request
2155  auto file_reader_owner = open_parquet_table(file_path, file_system_);
2156  auto file_reader = file_reader_owner.get();
2157  auto file_metadata = file_reader->parquet_reader()->metadata();
2158 
2159  validate_number_of_columns(file_metadata, file_path, schema);
2160 
2161  // check for fixed length encoded columns and indicate to the user
2162  // they should not be used
2163  for (const auto column_descriptor : schema.getLogicalColumns()) {
2164  auto parquet_column_index = schema.getParquetColumnIndex(column_descriptor->columnId);
2165  auto parquet_column = file_metadata->schema()->Column(parquet_column_index);
2166  try {
2167  validate_allowed_mapping(parquet_column, column_descriptor);
2168  } catch (std::runtime_error& e) {
2169  std::stringstream error_message;
2170  error_message << e.what()
2171  << " Parquet column: " << parquet_column->path()->ToDotString()
2172  << ", HeavyDB column: " << column_descriptor->columnName
2173  << ", Parquet file: " << file_path << ".";
2174  throw std::runtime_error(error_message.str());
2175  }
2176  }
2177 
2178  CHECK(row_group_interval.start_index == row_group_interval.end_index);
2179  auto row_group_index = row_group_interval.start_index;
2180  std::map<int, ParquetRowGroupReader> row_group_reader_map;
2181 
2182  parquet::ParquetFileReader* parquet_reader = file_reader->parquet_reader();
2183  auto group_reader = parquet_reader->RowGroup(row_group_index);
2184 
2185  std::vector<InvalidRowGroupIndices> invalid_indices_per_thread(num_threads);
2186 
2187  const bool geo_validate_geometry =
2189  auto encoder_map = populate_encoder_map_for_import(chunks,
2190  schema,
2191  file_reader,
2192  column_dictionaries,
2193  group_reader->metadata()->num_rows(),
2194  geo_validate_geometry);
2195 
2196  std::vector<std::set<int>> partitions(num_threads);
2197  std::map<int, int> column_id_to_thread;
2198  for (auto& [column_id, encoder] : encoder_map) {
2199  auto thread_id = column_id % num_threads;
2200  column_id_to_thread[column_id] = thread_id;
2201  partitions[thread_id].insert(column_id);
2202  }
2203 
2204  for (auto& [column_id, encoder] : encoder_map) {
2205  const auto& column_descriptor = schema.getColumnDescriptor(column_id);
2206  const auto parquet_column_index = schema.getParquetColumnIndex(column_id);
2207  auto parquet_column_descriptor =
2208  file_metadata->schema()->Column(parquet_column_index);
2209 
2210  // validate
2211  auto [num_row_groups, num_columns] = get_parquet_table_size(file_reader);
2212  CHECK(row_group_interval.start_index >= 0 &&
2213  row_group_interval.end_index < num_row_groups);
2214  CHECK(parquet_column_index >= 0 && parquet_column_index < num_columns);
2216  parquet_column_descriptor);
2217 
2218  std::shared_ptr<parquet::ColumnReader> col_reader =
2219  group_reader->Column(parquet_column_index);
2220 
2221  row_group_reader_map.insert(
2222  {column_id,
2223  ParquetRowGroupReader(col_reader,
2224  column_descriptor,
2225  parquet_column_descriptor,
2226  shared::get_from_map(encoder_map, column_id).get(),
2227  invalid_indices_per_thread[shared::get_from_map(
2228  column_id_to_thread, column_id)],
2229  row_group_index,
2230  parquet_column_index,
2231  parquet_reader)});
2232  }
2233 
2234  std::vector<std::future<void>> futures;
2235  for (int ithread = 0; ithread < num_threads; ++ithread) {
2236  auto column_ids_for_thread = partitions[ithread];
2237  futures.emplace_back(
2238  std::async(std::launch::async, [&row_group_reader_map, column_ids_for_thread] {
2239  for (const auto column_id : column_ids_for_thread) {
2240  shared::get_from_map(row_group_reader_map, column_id)
2241  .readAndValidateRowGroup(); // reads and validate entire row group per
2242  // column
2243  }
2244  }));
2245  }
2246 
2247  for (auto& future : futures) {
2248  future.wait();
2249  }
2250 
2251  for (auto& future : futures) {
2252  future.get();
2253  }
2254 
2255  // merge/reduce invalid indices
2256  InvalidRowGroupIndices invalid_indices;
2257  for (auto& thread_invalid_indices : invalid_indices_per_thread) {
2258  invalid_indices.merge(thread_invalid_indices);
2259  }
2260 
2261  for (auto& [_, reader] : row_group_reader_map) {
2262  reader.eraseInvalidRowGroupData(
2263  invalid_indices); // removes invalid encoded data in buffers
2264  }
2265 
2266  // update the element count for each encoder
2267  for (const auto column_descriptor : schema.getLogicalColumns()) {
2268  auto column_id = column_descriptor->columnId;
2269  auto db_encoder = shared::get_from_map(chunks, column_id).getBuffer()->getEncoder();
2270  CHECK(static_cast<size_t>(group_reader->metadata()->num_rows()) >=
2271  invalid_indices.size());
2272  size_t updated_num_elems = db_encoder->getNumElems() +
2273  group_reader->metadata()->num_rows() -
2274  invalid_indices.size();
2275  db_encoder->setNumElems(updated_num_elems);
2276  if (column_descriptor->columnType.is_geometry()) {
2277  for (int i = 0; i < column_descriptor->columnType.get_physical_cols(); ++i) {
2278  auto db_encoder =
2279  shared::get_from_map(chunks, column_id + i + 1).getBuffer()->getEncoder();
2280  db_encoder->setNumElems(updated_num_elems);
2281  }
2282  }
2283  }
2284 
2285  return {group_reader->metadata()->num_rows() - invalid_indices.size(),
2286  invalid_indices.size()};
2287 }
std::pair< int, int > get_parquet_table_size(const ReaderPtr &reader)
UniqueReaderPtr open_parquet_table(const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
future< Result > async(Fn &&fn, Args &&...args)
std::set< int64_t > InvalidRowGroupIndices
void validate_allowed_mapping(const parquet::ColumnDescriptor *parquet_column, const ColumnDescriptor *omnisci_column)
void validate_number_of_columns(const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema)
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61
static constexpr const char * GEO_VALIDATE_GEOMETRY_KEY
Definition: ForeignTable.h:49
std::map< int, std::shared_ptr< ParquetEncoder > > populate_encoder_map_for_import(const std::map< int, Chunk_NS::Chunk > chunks, const ForeignTableSchema &schema, const ReaderPtr &reader, const std::map< int, StringDictionary * > column_dictionaries, const int64_t num_rows, const bool geo_validate_geometry)
ThreadId thread_id()
Definition: Logger.cpp:877
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
void validate_max_repetition_and_definition_level(const ColumnDescriptor *omnisci_column_descriptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
std::shared_ptr< arrow::fs::FileSystem > file_system_
bool getOptionAsBool(const std::string_view &key) const

+ Here is the call graph for this function:

std::list< RowGroupMetadata > foreign_storage::LazyParquetChunkLoader::metadataScan ( const std::vector< std::string > &  file_paths,
const ForeignTableSchema schema,
const bool  do_metadata_stats_validation = true 
)

Perform a metadata scan for the paths specified.

Parameters
file_paths- (ordered) files of the metadata scan
schema- schema of the foreign table to perform metadata scan for
do_metadata_stats_validation- validate stats in metadata of parquet files if true
Returns
a list of the row group metadata extracted from file_paths

Definition at line 2455 of file LazyParquetChunkLoader.cpp.

References threading_serial::async(), CHECK, DEBUG_TIMER, file_reader_cache_, file_system_, foreign_table_, foreign_storage::ForeignTable::GEO_VALIDATE_GEOMETRY_KEY, foreign_storage::get_num_threads(), foreign_storage::get_parquet_table_size(), foreign_storage::ForeignTableSchema::getLogicalAndPhysicalColumns(), foreign_storage::OptionsContainer::getOptionAsBool(), foreign_storage::FileReaderMap::initializeIfEmpty(), foreign_storage::FileReaderMap::insert(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::metadata_scan_rowgroup_interval(), foreign_storage::partition_for_threads(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::populate_encoder_map_for_metadata_scan(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::throw_row_group_larger_than_fragment_size_error(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_parquet_metadata().

Referenced by foreign_storage::ParquetDataWrapper::getRowGroupMetadataForFilePaths().

2458  {
2459  auto timer = DEBUG_TIMER(__func__);
2460  auto column_interval =
2461  Interval<ColumnType>{schema.getLogicalAndPhysicalColumns().front()->columnId,
2462  schema.getLogicalAndPhysicalColumns().back()->columnId};
2463  CHECK(!file_paths.empty());
2464 
2465  // The encoder map needs to be populated before we can start scanning rowgroups, so we
2466  // peel the first file_path out of the async loop below to perform population.
2467  const auto& first_path = *file_paths.begin();
2468  auto first_reader = file_reader_cache_->insert(first_path, file_system_);
2469  auto max_row_group_stats =
2470  validate_parquet_metadata(first_reader->parquet_reader()->metadata(),
2471  first_path,
2472  schema,
2473  do_metadata_stats_validation);
2474 
2475  const bool geo_validate_geometry =
2477  auto encoder_map = populate_encoder_map_for_metadata_scan(column_interval,
2478  schema,
2479  first_reader,
2480  do_metadata_stats_validation,
2481  geo_validate_geometry);
2482  const auto num_row_groups = get_parquet_table_size(first_reader).first;
2483  auto row_group_metadata = metadata_scan_rowgroup_interval(
2484  encoder_map, {first_path, 0, num_row_groups - 1}, first_reader, schema);
2485 
2486  // We want each (filepath->FileReader) pair in the cache to be initialized before we
2487  // multithread so that we are not adding keys in a concurrent environment, so we add
2488  // cache entries for each path and initialize to an empty unique_ptr if the file has not
2489  // yet been opened.
2490  // Since we have already performed the first iteration, we skip it in the thread groups
2491  // so as not to process it twice.
2492  std::vector<std::string> cache_subset;
2493  for (auto path_it = ++(file_paths.begin()); path_it != file_paths.end(); ++path_it) {
2495  cache_subset.emplace_back(*path_it);
2496  }
2497 
2498  // Iterate asyncronously over any paths beyond the first.
2499  auto table_ptr = schema.getForeignTable();
2500  CHECK(table_ptr);
2501  auto num_threads = foreign_storage::get_num_threads(*table_ptr);
2502  auto paths_per_thread = partition_for_threads(cache_subset, num_threads);
2503  std::vector<std::future<std::pair<std::list<RowGroupMetadata>, MaxRowGroupSizeStats>>>
2504  futures;
2505  for (const auto& path_group : paths_per_thread) {
2506  futures.emplace_back(std::async(
2508  [&](const auto& paths, const auto& file_reader_cache)
2509  -> std::pair<std::list<RowGroupMetadata>, MaxRowGroupSizeStats> {
2510  std::list<RowGroupMetadata> reduced_metadata;
2511  MaxRowGroupSizeStats max_row_group_stats{0, 0};
2512  for (const auto& path : paths.get()) {
2513  auto reader = file_reader_cache.get().getOrInsert(path, file_system_);
2514  validate_equal_schema(first_reader, reader, first_path, path);
2515  auto local_max_row_group_stats =
2516  validate_parquet_metadata(reader->parquet_reader()->metadata(),
2517  path,
2518  schema,
2519  do_metadata_stats_validation);
2520  if (local_max_row_group_stats.max_row_group_size >
2521  max_row_group_stats.max_row_group_size) {
2522  max_row_group_stats = local_max_row_group_stats;
2523  }
2524  const auto num_row_groups = get_parquet_table_size(reader).first;
2525  const auto interval = RowGroupInterval{path, 0, num_row_groups - 1};
2526  reduced_metadata.splice(
2527  reduced_metadata.end(),
2528  metadata_scan_rowgroup_interval(encoder_map, interval, reader, schema));
2529  }
2530  return {reduced_metadata, max_row_group_stats};
2531  },
2532  std::ref(path_group),
2533  std::ref(*file_reader_cache_)));
2534  }
2535 
2536  // Reduce all the row_group results.
2537  for (auto& future : futures) {
2538  auto [metadata, local_max_row_group_stats] = future.get();
2539  row_group_metadata.splice(row_group_metadata.end(), metadata);
2540  if (local_max_row_group_stats.max_row_group_size >
2541  max_row_group_stats.max_row_group_size) {
2542  max_row_group_stats = local_max_row_group_stats;
2543  }
2544  }
2545 
2546  if (max_row_group_stats.max_row_group_size > schema.getForeignTable()->maxFragRows) {
2548  max_row_group_stats, schema.getForeignTable()->maxFragRows);
2549  }
2550 
2551  return row_group_metadata;
2552 }
auto partition_for_threads(const std::set< T > &items, size_t max_threads)
Definition: FsiChunkUtils.h:41
std::pair< int, int > get_parquet_table_size(const ReaderPtr &reader)
size_t get_num_threads(const ForeignTable &table)
void validate_equal_schema(const parquet::arrow::FileReader *reference_file_reader, const parquet::arrow::FileReader *new_file_reader, const std::string &reference_file_path, const std::string &new_file_path)
future< Result > async(Fn &&fn, Args &&...args)
static constexpr const char * GEO_VALIDATE_GEOMETRY_KEY
Definition: ForeignTable.h:49
std::list< RowGroupMetadata > metadata_scan_rowgroup_interval(const std::map< int, std::shared_ptr< ParquetEncoder >> &encoder_map, const RowGroupInterval &row_group_interval, const ReaderPtr &reader, const ForeignTableSchema &schema)
const ReaderPtr insert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:79
void initializeIfEmpty(const std::string &path)
Definition: ParquetShared.h:86
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
std::map< int, std::shared_ptr< ParquetEncoder > > populate_encoder_map_for_metadata_scan(const Interval< ColumnType > &column_interval, const ForeignTableSchema &schema, const ReaderPtr &reader, const bool do_metadata_stats_validation, const bool geo_validate_geometry)
std::shared_ptr< arrow::fs::FileSystem > file_system_
MaxRowGroupSizeStats validate_parquet_metadata(const std::shared_ptr< parquet::FileMetaData > &file_metadata, const std::string &file_path, const ForeignTableSchema &schema, const bool do_metadata_stats_validation)
void throw_row_group_larger_than_fragment_size_error(const MaxRowGroupSizeStats max_row_group_stats, const int fragment_size)
bool getOptionAsBool(const std::string_view &key) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

DataPreview foreign_storage::LazyParquetChunkLoader::previewFiles ( const std::vector< std::string > &  files,
const size_t  max_num_rows,
const ForeignTable table 
)

Preview rows of data and column types in a set of files.

Parameters
files- files to preview
max_num_rows- maximum number of rows to preview
table- foreign table for preview
Returns
a DataPreview instance that contains relevant preview information

Definition at line 2296 of file LazyParquetChunkLoader.cpp.

References appendRowGroups(), CHECK, CHECK_EQ, CHECK_GE, foreign_storage::PreviewContext::column_chunks, foreign_storage::PreviewContext::column_descriptors, foreign_storage::DataPreview::column_names, foreign_storage::DataPreview::column_types, ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, foreign_storage::create_futures_for_workers(), foreign_storage::PreviewContext::detect_buffers, foreign_storage::detect_geo_type(), file_reader_cache_, file_system_, foreign_storage::get_num_threads(), foreign_storage::FileReaderMap::getOrInsert(), gpu_enabled::iota(), ColumnDescriptor::isSystemCol, ColumnDescriptor::isVirtualCol, kENCODING_NONE, foreign_storage::DataPreview::num_rejected_rows, foreign_storage::PreviewContext::rejected_row_indices_per_column, foreign_storage::DataPreview::sample_rows, suggestColumnMapping(), ColumnDescriptor::tableId, and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

2298  {
2299  CHECK(!files.empty());
2300 
2301  auto first_file = *files.begin();
2302  auto first_file_reader = file_reader_cache_->getOrInsert(*files.begin(), file_system_);
2303 
2304  for (auto current_file_it = ++files.begin(); current_file_it != files.end();
2305  ++current_file_it) {
2306  auto file_reader = file_reader_cache_->getOrInsert(*current_file_it, file_system_);
2307  validate_equal_schema(first_file_reader, file_reader, first_file, *current_file_it);
2308  }
2309 
2310  auto first_file_metadata = first_file_reader->parquet_reader()->metadata();
2311  auto num_columns = first_file_metadata->num_columns();
2312 
2313  DataPreview data_preview;
2314  data_preview.num_rejected_rows = 0;
2315 
2316  auto current_file_it = files.begin();
2317  while (data_preview.sample_rows.size() < max_num_rows &&
2318  current_file_it != files.end()) {
2319  size_t total_num_rows = data_preview.sample_rows.size();
2320  size_t max_num_rows_to_append = max_num_rows - data_preview.sample_rows.size();
2321 
2322  // gather enough rows in row groups to produce required samples
2323  std::vector<RowGroupInterval> row_group_intervals;
2324  for (; current_file_it != files.end(); ++current_file_it) {
2325  const auto& file_path = *current_file_it;
2326  auto file_reader = file_reader_cache_->getOrInsert(file_path, file_system_);
2327  auto file_metadata = file_reader->parquet_reader()->metadata();
2328  auto num_row_groups = file_metadata->num_row_groups();
2329  int end_row_group = 0;
2330  for (int i = 0; i < num_row_groups && total_num_rows < max_num_rows; ++i) {
2331  const size_t next_num_rows = file_metadata->RowGroup(i)->num_rows();
2332  total_num_rows += next_num_rows;
2333  end_row_group = i;
2334  }
2335  row_group_intervals.push_back(RowGroupInterval{file_path, 0, end_row_group});
2336  }
2337 
2338  PreviewContext preview_context;
2339  for (int i = 0; i < num_columns; ++i) {
2340  auto col = first_file_metadata->schema()->Column(i);
2341  ColumnDescriptor& cd = preview_context.column_descriptors.emplace_back();
2342  auto sql_type = LazyParquetChunkLoader::suggestColumnMapping(col);
2343  cd.columnType = sql_type;
2344  cd.columnName =
2345  sql_type.is_array() ? col->path()->ToDotVector()[0] + "_array" : col->name();
2346  cd.isSystemCol = false;
2347  cd.isVirtualCol = false;
2348  cd.tableId = -1;
2349  cd.columnId = i + 1;
2350  data_preview.column_names.emplace_back(cd.columnName);
2351  data_preview.column_types.emplace_back(sql_type);
2352  preview_context.detect_buffers.push_back(
2353  std::make_unique<TypedParquetDetectBuffer>());
2354  preview_context.rejected_row_indices_per_column.push_back(
2355  std::make_unique<RejectedRowIndices>());
2356  auto& detect_buffer = preview_context.detect_buffers.back();
2357  auto& chunk = preview_context.column_chunks.emplace_back(&cd);
2358  chunk.setPinnable(false);
2359  chunk.setBuffer(detect_buffer.get());
2360  }
2361 
2362  std::function<void(const std::vector<int>&)> append_row_groups_for_column =
2363  [&](const std::vector<int>& column_indices) {
2364  for (const auto& column_index : column_indices) {
2365  auto& chunk = preview_context.column_chunks[column_index];
2366  auto chunk_list = std::list<Chunk_NS::Chunk>{chunk};
2367  auto& rejected_row_indices =
2368  preview_context.rejected_row_indices_per_column[column_index];
2369  appendRowGroups(row_group_intervals,
2370  column_index,
2371  chunk.getColumnDesc(),
2372  chunk_list,
2373  nullptr,
2374  rejected_row_indices.get(),
2375  true,
2376  max_num_rows_to_append);
2377  }
2378  };
2379 
2380  auto num_threads = foreign_storage::get_num_threads(foreign_table);
2381 
2382  std::vector<int> columns(num_columns);
2383  std::iota(columns.begin(), columns.end(), 0);
2384  auto futures =
2385  create_futures_for_workers(columns, num_threads, append_row_groups_for_column);
2386  for (auto& future : futures) {
2387  future.wait();
2388  }
2389  for (auto& future : futures) {
2390  future.get();
2391  }
2392 
2393  // merge all `rejected_row_indices_per_column`
2394  auto rejected_row_indices = std::make_unique<RejectedRowIndices>();
2395  for (int i = 0; i < num_columns; ++i) {
2396  rejected_row_indices->insert(
2397  preview_context.rejected_row_indices_per_column[i]->begin(),
2398  preview_context.rejected_row_indices_per_column[i]->end());
2399  }
2400 
2401  size_t num_rows = 0;
2402  auto buffers_it = preview_context.detect_buffers.begin();
2403  for (int i = 0; i < num_columns; ++i, ++buffers_it) {
2404  CHECK(buffers_it != preview_context.detect_buffers.end());
2405  auto& strings = buffers_it->get()->getStrings();
2406  if (i == 0) {
2407  num_rows = strings.size();
2408  } else {
2409  CHECK_EQ(num_rows, strings.size());
2410  }
2411  }
2412 
2413  size_t num_rejected_rows = rejected_row_indices->size();
2414  data_preview.num_rejected_rows += num_rejected_rows;
2415  CHECK_GE(num_rows, num_rejected_rows);
2416  auto row_count = num_rows - num_rejected_rows;
2417 
2418  auto offset_row = data_preview.sample_rows.size();
2419  data_preview.sample_rows.resize(std::min(offset_row + row_count, max_num_rows));
2420 
2421  for (size_t irow = 0, rows_appended = 0;
2422  irow < num_rows && offset_row + rows_appended < max_num_rows;
2423  ++irow) {
2424  if (rejected_row_indices->find(irow) != rejected_row_indices->end()) {
2425  continue;
2426  }
2427  auto& row_data = data_preview.sample_rows[offset_row + rows_appended];
2428  row_data.resize(num_columns);
2429  auto buffers_it = preview_context.detect_buffers.begin();
2430  for (int i = 0; i < num_columns; ++i, ++buffers_it) {
2431  CHECK(buffers_it != preview_context.detect_buffers.end());
2432  auto& strings = buffers_it->get()->getStrings();
2433  row_data[i] = strings[irow];
2434  }
2435  ++rows_appended;
2436  }
2437  }
2438 
2439  // attempt to detect geo columns
2440  for (int i = 0; i < num_columns; ++i) {
2441  auto type_info = data_preview.column_types[i];
2442  if (type_info.is_string()) {
2443  auto tentative_geo_type =
2444  foreign_storage::detect_geo_type(data_preview.sample_rows, i);
2445  if (tentative_geo_type.has_value()) {
2446  data_preview.column_types[i].set_type(tentative_geo_type.value());
2447  data_preview.column_types[i].set_compression(kENCODING_NONE);
2448  }
2449  }
2450  }
2451 
2452  return data_preview;
2453 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::optional< SQLTypes > detect_geo_type(const SampleRows &sample_rows, size_t column_index)
Definition: DataPreview.cpp:22
size_t get_num_threads(const ForeignTable &table)
#define CHECK_GE(x, y)
Definition: Logger.h:306
void validate_equal_schema(const parquet::arrow::FileReader *reference_file_reader, const parquet::arrow::FileReader *new_file_reader, const std::string &reference_file_path, const std::string &new_file_path)
std::vector< std::future< void > > create_futures_for_workers(const Container &items, size_t max_threads, std::function< void(const Container &)> lambda)
Definition: FsiChunkUtils.h:74
const ReaderPtr getOrInsert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:70
specifies the content in-memory of a row in the column metadata table
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
#define CHECK(condition)
Definition: Logger.h:291
std::list< std::unique_ptr< ChunkMetadata > > appendRowGroups(const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, const ColumnDescriptor *column_descriptor, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary, RejectedRowIndices *rejected_row_indices, const bool is_for_detect=false, const std::optional< int64_t > max_levels_read=std::nullopt)
std::shared_ptr< arrow::fs::FileSystem > file_system_
SQLTypeInfo columnType
std::string columnName
static SQLTypeInfo suggestColumnMapping(const parquet::ColumnDescriptor *parquet_column)

+ Here is the call graph for this function:

SQLTypeInfo foreign_storage::LazyParquetChunkLoader::suggestColumnMapping ( const parquet::ColumnDescriptor *  parquet_column)
staticprivate

Suggest a possible Parquet to OmniSci column mapping based on heuristics.

Parameters
parquet_column- the column descriptor of the Parquet column
Returns
a supported OmniSci SQLTypeInfo given the Parquet column type

NOTE: the suggested type may be entirely inappropriate given a specific use-case; however, it is guaranteed to be an allowed mapping. For example, geo-types are never attempted to be detected and instead strings are always suggested in their place.

Definition at line 1977 of file LazyParquetChunkLoader.cpp.

References foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::is_valid_parquet_list_column(), foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::suggest_column_scalar_type(), and run_benchmark_import::type.

Referenced by previewFiles().

1978  {
1979  auto type = suggest_column_scalar_type(parquet_column);
1980 
1981  // array case
1982  if (is_valid_parquet_list_column(parquet_column)) {
1983  return type.get_array_type();
1984  }
1985 
1986  return type;
1987 }
bool is_valid_parquet_list_column(const parquet::ColumnDescriptor *parquet_column)
Detect a valid list parquet column.
SQLTypeInfo suggest_column_scalar_type(const parquet::ColumnDescriptor *parquet_column)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

const int foreign_storage::LazyParquetChunkLoader::batch_reader_num_elements = 4096
static
FileReaderMap* foreign_storage::LazyParquetChunkLoader::file_reader_cache_
private

Definition at line 171 of file LazyParquetChunkLoader.h.

Referenced by appendRowGroups(), metadataScan(), and previewFiles().

std::shared_ptr<arrow::fs::FileSystem> foreign_storage::LazyParquetChunkLoader::file_system_
private
const ForeignTable* foreign_storage::LazyParquetChunkLoader::foreign_table_
private

The documentation for this class was generated from the following files: