OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::RowGroupIntervalTracker Class Reference
+ Inheritance diagram for foreign_storage::RowGroupIntervalTracker:
+ Collaboration diagram for foreign_storage::RowGroupIntervalTracker:

Public Member Functions

 RowGroupIntervalTracker (const std::set< std::string > &file_paths, FileReaderMap *file_reader_cache, std::shared_ptr< arrow::fs::FileSystem > file_system)
 
std::optional< RowGroupIntervalgetNextRowGroupInterval () override
 
- Public Member Functions inherited from foreign_storage::AbstractRowGroupIntervalTracker
virtual ~AbstractRowGroupIntervalTracker ()=default
 

Private Member Functions

bool filesAreExhausted ()
 
void advanceToNextRowGroup ()
 

Private Attributes

std::set< std::string > file_paths_
 
FileReaderMapfile_reader_cache_
 
std::shared_ptr
< arrow::fs::FileSystem > 
file_system_
 
bool is_initialized_
 
int num_row_groups_
 
int current_row_group_index_
 
std::set< std::string >
::const_iterator 
current_file_iter_
 

Detailed Description

Definition at line 37 of file ParquetImporter.cpp.

Constructor & Destructor Documentation

foreign_storage::RowGroupIntervalTracker::RowGroupIntervalTracker ( const std::set< std::string > &  file_paths,
FileReaderMap file_reader_cache,
std::shared_ptr< arrow::fs::FileSystem >  file_system 
)
inline

Definition at line 39 of file ParquetImporter.cpp.

42  : file_paths_(file_paths)
43  , file_reader_cache_(file_reader_cache)
44  , file_system_(file_system)
45  , is_initialized_(false)
46  , num_row_groups_(0)
48  , current_file_iter_(file_paths_.begin()) {}
std::set< std::string >::const_iterator current_file_iter_
std::shared_ptr< arrow::fs::FileSystem > file_system_

Member Function Documentation

void foreign_storage::RowGroupIntervalTracker::advanceToNextRowGroup ( )
inlineprivate

Definition at line 62 of file ParquetImporter.cpp.

References current_file_iter_, current_row_group_index_, file_paths_, file_reader_cache_, file_system_, filesAreExhausted(), foreign_storage::FileReaderMap::getOrInsert(), is_initialized_, and num_row_groups_.

Referenced by getNextRowGroupInterval().

62  {
65  return;
66  }
67  if (!is_initialized_) {
69  is_initialized_ = true;
70  } else {
71  if (filesAreExhausted()) { // can be possible if many concurrent requests
72  return;
73  }
74  current_file_iter_++; // advance iterator
75  }
77  if (filesAreExhausted()) {
78  num_row_groups_ = 0;
79  } else {
80  auto file_reader =
82  num_row_groups_ = file_reader->parquet_reader()->metadata()->num_row_groups();
83  }
84  }
std::set< std::string >::const_iterator current_file_iter_
const ReaderPtr getOrInsert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:70
std::shared_ptr< arrow::fs::FileSystem > file_system_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::RowGroupIntervalTracker::filesAreExhausted ( )
inlineprivate

Definition at line 60 of file ParquetImporter.cpp.

References current_file_iter_, and file_paths_.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

60 { return current_file_iter_ == file_paths_.end(); }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the caller graph for this function:

std::optional<RowGroupInterval> foreign_storage::RowGroupIntervalTracker::getNextRowGroupInterval ( )
inlineoverridevirtual

Implements foreign_storage::AbstractRowGroupIntervalTracker.

Definition at line 50 of file ParquetImporter.cpp.

References advanceToNextRowGroup(), current_file_iter_, current_row_group_index_, and filesAreExhausted().

50  {
52  if (filesAreExhausted()) {
53  return {};
54  }
55  return RowGroupInterval{
56  *current_file_iter_, current_row_group_index_, current_row_group_index_};
57  }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the call graph for this function:

Member Data Documentation

std::set<std::string>::const_iterator foreign_storage::RowGroupIntervalTracker::current_file_iter_
private
int foreign_storage::RowGroupIntervalTracker::current_row_group_index_
private

Definition at line 92 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

std::set<std::string> foreign_storage::RowGroupIntervalTracker::file_paths_
private

Definition at line 86 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and filesAreExhausted().

FileReaderMap* foreign_storage::RowGroupIntervalTracker::file_reader_cache_
private

Definition at line 87 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

std::shared_ptr<arrow::fs::FileSystem> foreign_storage::RowGroupIntervalTracker::file_system_
private

Definition at line 88 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

bool foreign_storage::RowGroupIntervalTracker::is_initialized_
private

Definition at line 90 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

int foreign_storage::RowGroupIntervalTracker::num_row_groups_
private

Definition at line 91 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().


The documentation for this class was generated from the following file: