OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::RowGroupIntervalTracker Class Reference
+ Inheritance diagram for foreign_storage::RowGroupIntervalTracker:
+ Collaboration diagram for foreign_storage::RowGroupIntervalTracker:

Public Member Functions

 RowGroupIntervalTracker (const std::set< std::string > &file_paths, FileReaderMap *file_reader_cache, std::shared_ptr< arrow::fs::FileSystem > file_system)
 
std::optional< RowGroupIntervalgetNextRowGroupInterval () override
 
- Public Member Functions inherited from foreign_storage::AbstractRowGroupIntervalTracker
virtual ~AbstractRowGroupIntervalTracker ()=default
 

Private Member Functions

bool filesAreExhausted ()
 
void advanceToNextRowGroup ()
 

Private Attributes

std::set< std::string > file_paths_
 
FileReaderMapfile_reader_cache_
 
std::shared_ptr
< arrow::fs::FileSystem > 
file_system_
 
bool is_initialized_
 
int num_row_groups_
 
int current_row_group_index_
 
std::set< std::string >
::const_iterator 
current_file_iter_
 

Detailed Description

Definition at line 33 of file ParquetImporter.cpp.

Constructor & Destructor Documentation

foreign_storage::RowGroupIntervalTracker::RowGroupIntervalTracker ( const std::set< std::string > &  file_paths,
FileReaderMap file_reader_cache,
std::shared_ptr< arrow::fs::FileSystem >  file_system 
)
inline

Definition at line 35 of file ParquetImporter.cpp.

38  : file_paths_(file_paths)
39  , file_reader_cache_(file_reader_cache)
40  , file_system_(file_system)
41  , is_initialized_(false)
42  , num_row_groups_(0)
44  , current_file_iter_(file_paths_.begin()) {}
std::set< std::string >::const_iterator current_file_iter_
std::shared_ptr< arrow::fs::FileSystem > file_system_

Member Function Documentation

void foreign_storage::RowGroupIntervalTracker::advanceToNextRowGroup ( )
inlineprivate

Definition at line 58 of file ParquetImporter.cpp.

References CHECK, current_file_iter_, current_row_group_index_, file_paths_, file_reader_cache_, file_system_, filesAreExhausted(), foreign_storage::FileReaderMap::getOrInsert(), is_initialized_, and num_row_groups_.

Referenced by getNextRowGroupInterval().

58  {
61  return;
62  }
63  if (!is_initialized_) {
65  is_initialized_ = true;
66  } else {
68  current_file_iter_++; // advance iterator
69  }
71  if (filesAreExhausted()) {
72  num_row_groups_ = 0;
73  } else {
74  auto file_reader =
76  num_row_groups_ = file_reader->parquet_reader()->metadata()->num_row_groups();
77  }
78  }
std::set< std::string >::const_iterator current_file_iter_
const ReaderPtr getOrInsert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:70
#define CHECK(condition)
Definition: Logger.h:209
std::shared_ptr< arrow::fs::FileSystem > file_system_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::RowGroupIntervalTracker::filesAreExhausted ( )
inlineprivate

Definition at line 56 of file ParquetImporter.cpp.

References current_file_iter_, and file_paths_.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

56 { return current_file_iter_ == file_paths_.end(); }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the caller graph for this function:

std::optional<RowGroupInterval> foreign_storage::RowGroupIntervalTracker::getNextRowGroupInterval ( )
inlineoverridevirtual

Implements foreign_storage::AbstractRowGroupIntervalTracker.

Definition at line 46 of file ParquetImporter.cpp.

References advanceToNextRowGroup(), current_file_iter_, current_row_group_index_, and filesAreExhausted().

46  {
48  if (filesAreExhausted()) {
49  return {};
50  }
51  return RowGroupInterval{
52  *current_file_iter_, current_row_group_index_, current_row_group_index_};
53  }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the call graph for this function:

Member Data Documentation

std::set<std::string>::const_iterator foreign_storage::RowGroupIntervalTracker::current_file_iter_
private
int foreign_storage::RowGroupIntervalTracker::current_row_group_index_
private

Definition at line 86 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

std::set<std::string> foreign_storage::RowGroupIntervalTracker::file_paths_
private

Definition at line 80 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and filesAreExhausted().

FileReaderMap* foreign_storage::RowGroupIntervalTracker::file_reader_cache_
private

Definition at line 81 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

std::shared_ptr<arrow::fs::FileSystem> foreign_storage::RowGroupIntervalTracker::file_system_
private

Definition at line 82 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

bool foreign_storage::RowGroupIntervalTracker::is_initialized_
private

Definition at line 84 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

int foreign_storage::RowGroupIntervalTracker::num_row_groups_
private

Definition at line 85 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().


The documentation for this class was generated from the following file: