OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage Namespace Reference

Namespaces

 anonymous_namespace{AbstractFileStorageDataWrapper.cpp}
 
 anonymous_namespace{AbstractTextFileDataWrapper.cpp}
 
 anonymous_namespace{CachingForeignStorageMgr.cpp}
 
 anonymous_namespace{CsvFileBufferParser.cpp}
 
 anonymous_namespace{FileReader.cpp}
 
 anonymous_namespace{ForeignStorageCache.cpp}
 
 anonymous_namespace{InternalCatalogDataWrapper.cpp}
 
 anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalStorageStatsDataWrapper.cpp}
 
 anonymous_namespace{InternalSystemDataWrapper.cpp}
 
 anonymous_namespace{LazyParquetChunkLoader.cpp}
 
 anonymous_namespace{ParquetDataWrapper.cpp}
 
 anonymous_namespace{RefreshTimeCalculator.cpp}
 
 anonymous_namespace{RegexFileBufferParser.cpp}
 
 anonymous_namespace{RegexParserDataWrapper.cpp}
 
 anonymous_namespace{S3FilePathUtil.cpp}
 
 anonymous_namespace{TextFileBufferParser.cpp}
 
 Csv
 
 json_utils
 

Classes

struct  ForeignServer
 
struct  ForeignTable
 
struct  OptionsContainer
 
struct  UserMappingType
 
struct  UserMapping
 
class  RefreshTimeCalculator
 
class  AbstractFileStorageDataWrapper
 
struct  ParseFileRegionResult
 
struct  MetadataScanMultiThreadingParams
 
class  AbstractTextFileDataWrapper
 
class  CachingForeignStorageMgr
 
class  CsvDataWrapper
 
class  CsvFileBufferParser
 
class  FileReader
 
class  SingleFileReader
 
class  SingleTextFileReader
 
class  ArchiveWrapper
 
class  CompressedFileReader
 
class  MultiFileReader
 
class  LocalMultiFileReader
 
struct  FileRegion
 
class  ForeignDataWrapper
 
struct  DataWrapperType
 Encapsulates an enumeration of foreign data wrapper type strings. More...
 
class  ForeignDataWrapperFactory
 
class  ForeignStorageBuffer
 
class  ForeignStorageCache
 
class  ForeignStorageException
 
class  MetadataScanInfeasibleFragmentSizeException
 
class  ChunkSizeValidator
 
class  MockForeignDataWrapper
 
class  ForeignStorageMgr
 
class  ForeignTableSchema
 
class  GeospatialEncoder
 
class  InternalCatalogDataWrapper
 
class  InternalMemoryStatsDataWrapper
 
struct  StorageDetails
 
class  InternalStorageStatsDataWrapper
 
class  InternalSystemDataWrapper
 
struct  ColumnType
 
struct  FragmentType
 
struct  Interval
 
struct  ParquetBatchData
 
class  ParquetRowGroupReader
 
class  LazyParquetChunkLoader
 
class  OdbcGeospatialEncoder
 
class  ParquetArrayEncoder
 
class  ParquetArrayImportEncoder
 
class  ParquetDataWrapper
 
class  ParquetDateFromTimestampEncoder
 
class  ParquetDateInSecondsEncoder
 
class  ParquetDecimalEncoder
 
class  ParquetEncoder
 
class  ParquetImportEncoder
 
class  ParquetScalarEncoder
 
class  ParquetFixedLengthArrayEncoder
 
class  ParquetFixedLengthEncoder
 
class  ParquetUnsignedFixedLengthEncoder
 
class  ParquetGeospatialEncoder
 
class  ParquetGeospatialImportEncoder
 
class  RowGroupIntervalTracker
 
class  ParquetImportBatchResult
 
class  AbstractRowGroupIntervalTracker
 
class  ParquetImporter
 
class  ParquetInPlaceEncoder
 
class  TypedParquetInPlaceEncoder
 
class  ParquetMetadataValidator
 
class  TimestampBoundsValidator
 
class  IntegralFixedLengthBoundsValidator
 
class  DateInSecondsBoundsValidator
 
class  FloatPointValidator
 
struct  RowGroupInterval
 
struct  RowGroupMetadata
 
class  FileReaderMap
 
class  ParquetStringEncoder
 
class  ParquetStringImportEncoder
 
class  ParquetStringNoneEncoder
 
class  ParquetTimeEncoder
 
class  ParquetTimestampEncoder
 
class  ParquetVariableLengthArrayEncoder
 
class  RegexFileBufferParser
 
class  RegexParserDataWrapper
 
class  FileOrderS3
 
struct  ParseBufferRequest
 
struct  ParseBufferResult
 
class  TextFileBufferParser
 
class  TypedParquetStorageBuffer
 
class  ForeignTableRefreshScheduler
 

Typedefs

using OptionsMap = std::map< std::string, std::string, std::less<>>
 
using FirstLineByFilePath = std::map< std::string, std::string >
 
using FileRegions = std::vector< FileRegion >
 
using ChunkToBufferMap = std::map< ChunkKey, AbstractBuffer * >
 
using read_lock = mapd_shared_lock< mapd_shared_mutex >
 
using write_lock = mapd_unique_lock< mapd_shared_mutex >
 
using InvalidRowGroupIndices = std::set< int64_t >
 
using UniqueReaderPtr = std::unique_ptr< parquet::arrow::FileReader >
 
using ReaderPtr = parquet::arrow::FileReader *
 
using S3ObjectComparator = std::function< bool(const Aws::S3::Model::Object &, const Aws::S3::Model::Object &)>
 

Functions

ParseFileRegionResult parse_file_regions (const FileRegions &file_regions, const size_t start_index, const size_t end_index, FileReader &file_reader, ParseBufferRequest &parse_file_request, const std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, const TextFileBufferParser &parser)
 
size_t get_buffer_size (const import_export::CopyParams &copy_params, const bool size_known, const size_t file_size)
 
size_t get_buffer_size (const FileRegions &file_regions)
 
size_t get_thread_count (const import_export::CopyParams &copy_params, const bool size_known, const size_t file_size, const size_t buffer_size)
 
size_t get_thread_count (const import_export::CopyParams &copy_params, const FileRegions &file_regions)
 
std::vector< size_t > partition_by_fragment (const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
 
std::optional< ParseBufferRequestget_next_metadata_scan_request (MetadataScanMultiThreadingParams &multi_threading_params)
 
void add_file_region (std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)
 
void update_stats (Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
 
void cache_blocks (std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool is_last_block, bool disable_cache)
 
void process_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)
 
void add_request_to_pool (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void scan_metadata (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser)
 
ParseBufferRequest get_request_from_pool (MetadataScanMultiThreadingParams &multi_threading_params)
 
void dispatch_metadata_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
 
void resize_buffer_if_needed (std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
 
void dispatch_metadata_scan_requests (const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams &copy_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t &current_file_offset, const TextFileBufferParser &parser)
 
void set_value (rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, FileRegion &file_region)
 
void throw_removed_row_in_result_set_error (const std::string &select_statement)
 
void throw_removed_row_in_file_error (const std::string &file_path)
 
void throw_removed_file_error (const std::string &file_path)
 
void throw_number_of_columns_mismatch_error (size_t num_table_cols, size_t num_file_cols, const std::string &file_path)
 
void throw_file_access_error (const std::string &file_path, const std::string &message)
 
void throw_file_not_found_error (const std::string &file_path)
 
void throw_s3_compressed_mime_type (const std::string &file_path, const std::string &mime_type)
 
void throw_s3_compressed_extension (const std::string &file_path, const std::string &ext_type)
 
bool set_comp (const ChunkKey &left, const ChunkKey &right)
 
std::vector< ChunkKeyget_column_key_vec (const ChunkKey &destination_chunk_key)
 
std::set< ChunkKeyget_column_key_set (const ChunkKey &destination_chunk_key)
 
size_t get_max_chunk_size (const ChunkKey &key)
 
bool contains_fragment_key (const std::set< ChunkKey > &key_set, const ChunkKey &target_key)
 
void refresh_foreign_table (Catalog_Namespace::Catalog &catalog, const std::string &table_name, const bool evict_cached_entries)
 
void init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const ColumnDescriptor *column, size_t num_elements)
 
template<typename T >
auto partition_for_threads (const std::set< T > &items, size_t max_threads)
 
template<typename T >
auto partition_for_threads (const std::vector< T > &items, size_t max_threads)
 
template<typename T >
ArrayDatum encode_as_array_datum (const std::vector< T > &data)
 
bool is_metadata_placeholder (const ChunkMetadata &metadata)
 
void set_value (rapidjson::Value &json_val, const RowGroupInterval &value, rapidjson::Document::AllocatorType &allocator)
 
void get_value (const rapidjson::Value &json_val, RowGroupInterval &value)
 
template<typename D , typename T >
bool check_bounds (const T &value)
 
template<typename D >
std::string datetime_to_string (const D &timestamp, const SQLTypeInfo &column_type)
 
void throw_parquet_metadata_out_of_bounds_error (const std::string &min_value, const std::string &max_value, const std::string &encountered_value)
 
UniqueReaderPtr open_parquet_table (const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
 
std::pair< int, int > get_parquet_table_size (const ReaderPtr &reader)
 
const parquet::ColumnDescriptor * get_column_descriptor (const parquet::arrow::FileReader *reader, const int logical_column_index)
 
parquet::Type::type get_physical_type (ReaderPtr &reader, const int logical_column_index)
 
void validate_equal_column_descriptor (const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path)
 
std::unique_ptr< ColumnDescriptorget_sub_type_column_descriptor (const ColumnDescriptor *column)
 
std::shared_ptr
< parquet::Statistics > 
validate_and_get_column_metadata_statistics (const parquet::ColumnChunkMetaData *column_metadata)
 
std::vector
< Aws::S3::Model::Object > 
s3_objects_filter_sort_files (const std::vector< Aws::S3::Model::Object > &file_paths, const std::optional< std::string > &filter_regex, const std::optional< std::string > &sort_by, const std::optional< std::string > &sort_regex)
 
template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
get_null_value ()
 
template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > get_min_max_bounds ()
 
void validate_non_foreign_table_write (const TableDescriptor *table_descriptor)
 

Typedef Documentation

Definition at line 28 of file ForeignDataWrapper.h.

using foreign_storage::FileRegions = typedef std::vector<FileRegion>

Definition at line 67 of file FileRegions.h.

using foreign_storage::FirstLineByFilePath = typedef std::map<std::string, std::string>

Definition at line 33 of file FileReader.h.

using foreign_storage::InvalidRowGroupIndices = typedef std::set<int64_t>

Definition at line 89 of file ParquetEncoder.h.

using foreign_storage::OptionsMap = typedef std::map<std::string, std::string, std::less<>>

Definition at line 48 of file OptionsContainer.h.

using foreign_storage::ReaderPtr = typedef parquet::arrow::FileReader*

Definition at line 33 of file ParquetShared.h.

using foreign_storage::S3ObjectComparator = typedef std::function<bool(const Aws::S3::Model::Object&, const Aws::S3::Model::Object&)>

Definition at line 19 of file S3FilePathUtil.h.

using foreign_storage::UniqueReaderPtr = typedef std::unique_ptr<parquet::arrow::FileReader>

Definition at line 32 of file ParquetShared.h.

Function Documentation

void foreign_storage::add_file_region ( std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
int  fragment_id,
size_t  first_row_index,
const ParseBufferResult &  result,
const std::string &  file_path 
)

Creates a new file region based on metadata from parsed file buffers and adds the new region to the fragment id to file regions map.

Definition at line 416 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferResult::row_count, and foreign_storage::ParseBufferResult::row_offsets.

Referenced by process_data_blocks().

420  {
421  fragment_id_to_file_regions_map[fragment_id].emplace_back(
422  // file naming is handled by FileReader
423  FileRegion(result.row_offsets.front(),
424  first_row_index,
425  result.row_count,
426  result.row_offsets.back() - result.row_offsets.front()));
427 }

+ Here is the caller graph for this function:

void foreign_storage::add_request_to_pool ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Adds the request object for a processed request back to the request pool for reuse in subsequent requests.

Definition at line 554 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by scan_metadata().

555  {
556  std::unique_lock<std::mutex> completed_requests_queue_lock(
557  multi_threading_params.request_pool_mutex);
558  multi_threading_params.request_pool.emplace(std::move(request));
559  completed_requests_queue_lock.unlock();
560  multi_threading_params.request_pool_condition.notify_all();
561 }

+ Here is the caller graph for this function:

void foreign_storage::cache_blocks ( std::map< ChunkKey, Chunk_NS::Chunk > &  cached_chunks,
DataBlockPtr  data_block,
size_t  row_count,
ChunkKey chunk_key,
const ColumnDescriptor column,
bool  is_first_block,
bool  is_last_block,
bool  disable_cache 
)

Definition at line 461 of file AbstractTextFileDataWrapper.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_cache_if_enabled(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and SQLTypeInfo::is_varlen_indeed().

Referenced by process_data_blocks().

468  {
469  auto catalog =
471  CHECK(catalog);
472  auto cache = get_cache_if_enabled(catalog, disable_cache);
473  if (cache) {
474  ChunkKey index_key = {chunk_key[CHUNK_KEY_DB_IDX],
475  chunk_key[CHUNK_KEY_TABLE_IDX],
476  chunk_key[CHUNK_KEY_COLUMN_IDX],
477  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
478  2};
479  // Create actual data chunks to prepopulate cache
480  if (cached_chunks.find(chunk_key) == cached_chunks.end()) {
481  cached_chunks[chunk_key] = Chunk_NS::Chunk{column, false};
482  cached_chunks[chunk_key].setBuffer(
483  cache->getChunkBufferForPrecaching(chunk_key, is_first_block));
484  if (column->columnType.is_varlen_indeed()) {
485  cached_chunks[chunk_key].setIndexBuffer(
486  cache->getChunkBufferForPrecaching(index_key, is_first_block));
487  }
488  if (is_first_block) {
489  cached_chunks[chunk_key].initEncoder();
490  }
491  }
492  cached_chunks[chunk_key].appendData(data_block, row_count, 0);
493  }
494 }
std::vector< int > ChunkKey
Definition: types.h:37
foreign_storage::ForeignStorageCache * get_cache_if_enabled(std::shared_ptr< Catalog_Namespace::Catalog > &catalog, const bool disable_cache)
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
static SysCatalog & instance()
Definition: SysCatalog.h:312
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:211
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:551
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename D , typename T >
bool foreign_storage::check_bounds ( const T &  value)
inline

Definition at line 32 of file ParquetMetadataValidator.h.

32  {
33  auto [min_value, max_value] = get_min_max_bounds<D>();
34  return value >= min_value && value <= max_value;
35 }
bool foreign_storage::contains_fragment_key ( const std::set< ChunkKey > &  key_set,
const ChunkKey target_key 
)
template<typename D >
std::string foreign_storage::datetime_to_string ( const D &  timestamp,
const SQLTypeInfo column_type 
)
inline

Definition at line 38 of file ParquetMetadataValidator.h.

References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfo::is_date(), and SQLTypeInfo::is_timestamp().

Referenced by foreign_storage::TimestampBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::DateInSecondsBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::TimestampBoundsValidator< T >::validateValue(), and foreign_storage::DateInSecondsBoundsValidator< T >::validateValue().

39  {
40  CHECK(column_type.is_timestamp() || column_type.is_date());
41  Datum d;
42  d.bigintval = timestamp;
43  return DatumToString(d, column_type);
44 }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:392
bool is_timestamp() const
Definition: sqltypes.h:891
int64_t bigintval
Definition: sqltypes.h:215
#define CHECK(condition)
Definition: Logger.h:211
bool is_date() const
Definition: sqltypes.h:879

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::dispatch_metadata_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params,
ParseBufferRequest &  request 
)

Dispatches a new metadata scan request by adding the request to the pending requests queue to be consumed by a worker thread.

Definition at line 640 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by dispatch_metadata_scan_requests().

642  {
643  {
644  std::unique_lock<std::mutex> pending_requests_lock(
645  multi_threading_params.pending_requests_mutex);
646  multi_threading_params.pending_requests.emplace(std::move(request));
647  }
648  multi_threading_params.pending_requests_condition.notify_all();
649 }

+ Here is the caller graph for this function:

void foreign_storage::dispatch_metadata_scan_requests ( const size_t &  buffer_size,
const std::string &  file_path,
FileReader &  file_reader,
const import_export::CopyParams copy_params,
MetadataScanMultiThreadingParams &  multi_threading_params,
size_t &  first_row_index_in_buffer,
size_t &  current_file_offset,
const TextFileBufferParser &  parser 
)

Reads from a text file iteratively and dispatches metadata scan requests that are processed by worker threads.

Definition at line 669 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_metadata_scan_request(), foreign_storage::TextFileBufferParser::findRowEndPosition(), get_request_from_pool(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::FileReader::read(), and resize_buffer_if_needed().

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

677  {
678  auto alloc_size = buffer_size;
679  auto residual_buffer = std::make_unique<char[]>(alloc_size);
680  size_t residual_buffer_size = 0;
681  size_t residual_buffer_alloc_size = alloc_size;
682 
683  while (!file_reader.isScanFinished()) {
684  {
685  std::lock_guard<std::mutex> pending_requests_lock(
686  multi_threading_params.pending_requests_mutex);
687  if (!multi_threading_params.continue_processing) {
688  break;
689  }
690  }
691  auto request = get_request_from_pool(multi_threading_params);
692  resize_buffer_if_needed(request.buffer, request.buffer_alloc_size, alloc_size);
693 
694  if (residual_buffer_size > 0) {
695  memcpy(request.buffer.get(), residual_buffer.get(), residual_buffer_size);
696  }
697  size_t size = residual_buffer_size;
698  size += file_reader.read(request.buffer.get() + residual_buffer_size,
699  alloc_size - residual_buffer_size);
700 
701  if (size == 0) {
702  // In some cases at the end of a file we will read 0 bytes even when
703  // file_reader.isScanFinished() is false
704  continue;
705  } else if (size == 1 && request.buffer[0] == copy_params.line_delim) {
706  // In some cases files with newlines at the end will be encoded with a second
707  // newline that can end up being the only thing in the buffer
708  current_file_offset++;
709  continue;
710  }
711  unsigned int num_rows_in_buffer = 0;
712  request.end_pos = parser.findRowEndPosition(alloc_size,
713  request.buffer,
714  size,
715  copy_params,
716  first_row_index_in_buffer,
717  num_rows_in_buffer,
718  &file_reader);
719  request.buffer_size = size;
720  request.buffer_alloc_size = alloc_size;
721  request.first_row_index = first_row_index_in_buffer;
722  request.file_offset = current_file_offset;
723  request.buffer_row_count = num_rows_in_buffer;
724 
725  residual_buffer_size = size - request.end_pos;
726  if (residual_buffer_size > 0) {
727  resize_buffer_if_needed(residual_buffer, residual_buffer_alloc_size, alloc_size);
728  memcpy(residual_buffer.get(),
729  request.buffer.get() + request.end_pos,
730  residual_buffer_size);
731  }
732 
733  current_file_offset += request.end_pos;
734  first_row_index_in_buffer += num_rows_in_buffer;
735 
736  dispatch_metadata_scan_request(multi_threading_params, request);
737  }
738 
739  std::unique_lock<std::mutex> pending_requests_queue_lock(
740  multi_threading_params.pending_requests_mutex);
741  multi_threading_params.pending_requests_condition.wait(
742  pending_requests_queue_lock, [&multi_threading_params] {
743  return multi_threading_params.pending_requests.empty() ||
744  (multi_threading_params.continue_processing == false);
745  });
746  multi_threading_params.continue_processing = false;
747  pending_requests_queue_lock.unlock();
748  multi_threading_params.pending_requests_condition.notify_all();
749 }
ParseBufferRequest get_request_from_pool(MetadataScanMultiThreadingParams &multi_threading_params)
void resize_buffer_if_needed(std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size)
void dispatch_metadata_scan_request(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
ArrayDatum foreign_storage::encode_as_array_datum ( const std::vector< T > &  data)
inline

Definition at line 32 of file GeospatialEncoder.h.

References omnisci.dtypes::T.

Referenced by foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

32  {
33  const size_t num_bytes = data.size() * sizeof(T);
34  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes], std::default_delete<int8_t[]>());
35  memcpy(buffer.get(), data.data(), num_bytes);
36  return ArrayDatum(num_bytes, buffer, false);
37 }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208

+ Here is the caller graph for this function:

size_t foreign_storage::get_buffer_size ( const import_export::CopyParams copy_params,
const bool  size_known,
const size_t  file_size 
)

Gets the appropriate buffer size to be used when processing file(s).

Definition at line 211 of file AbstractTextFileDataWrapper.cpp.

References import_export::CopyParams::buffer_size.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::populateChunks().

213  {
214  size_t buffer_size = copy_params.buffer_size;
215  if (size_known && file_size < buffer_size) {
216  buffer_size = file_size + 1; // +1 for end of line character, if missing
217  }
218  return buffer_size;
219 }
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the caller graph for this function:

size_t foreign_storage::get_buffer_size ( const FileRegions &  file_regions)

Definition at line 221 of file AbstractTextFileDataWrapper.cpp.

References CHECK.

221  {
222  size_t buffer_size = 0;
223  for (const auto& file_region : file_regions) {
224  buffer_size = std::max(buffer_size, file_region.region_size);
225  }
226  CHECK(buffer_size);
227  return buffer_size;
228 }
#define CHECK(condition)
Definition: Logger.h:211
const parquet::ColumnDescriptor * foreign_storage::get_column_descriptor ( const parquet::arrow::FileReader *  reader,
const int  logical_column_index 
)

Definition at line 46 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

48  {
49  return reader->parquet_reader()->metadata()->schema()->Column(logical_column_index);
50 }

+ Here is the caller graph for this function:

std::set<ChunkKey> foreign_storage::get_column_key_set ( const ChunkKey destination_chunk_key)

Referenced by foreign_storage::CachingForeignStorageMgr::fetchBuffer(), foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit(), and foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize().

+ Here is the caller graph for this function:

std::vector<ChunkKey> foreign_storage::get_column_key_vec ( const ChunkKey destination_chunk_key)
size_t foreign_storage::get_max_chunk_size ( const ChunkKey key)

Referenced by foreign_storage::CachingForeignStorageMgr::getBufferSize().

+ Here is the caller graph for this function:

template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0>
std::pair< D, D > foreign_storage::get_min_max_bounds ( )
inline

Definition at line 31 of file SharedMetadataValidator.h.

31  {
32  static_assert(std::is_signed<D>::value,
33  "'get_min_max_bounds' is only valid for signed types");
34  return {get_null_value<D>() + 1, std::numeric_limits<D>::max()};
35 }
std::optional<ParseBufferRequest> foreign_storage::get_next_metadata_scan_request ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets the next metadata scan request object from the pending requests queue. A null optional is returned if there are no further requests to be processed.

Definition at line 393 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.

Referenced by scan_metadata().

394  {
395  std::unique_lock<std::mutex> pending_requests_lock(
396  multi_threading_params.pending_requests_mutex);
397  multi_threading_params.pending_requests_condition.wait(
398  pending_requests_lock, [&multi_threading_params] {
399  return !multi_threading_params.pending_requests.empty() ||
400  !multi_threading_params.continue_processing;
401  });
402  if (multi_threading_params.pending_requests.empty()) {
403  return {};
404  }
405  auto request = std::move(multi_threading_params.pending_requests.front());
406  multi_threading_params.pending_requests.pop();
407  pending_requests_lock.unlock();
408  multi_threading_params.pending_requests_condition.notify_all();
409  return std::move(request);
410 }

+ Here is the caller graph for this function:

template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0>
V foreign_storage::get_null_value ( )
inline

Definition at line 21 of file SharedMetadataValidator.h.

21  {
22  return inline_int_null_value<V>();
23 }
std::pair< int, int > foreign_storage::get_parquet_table_size ( const ReaderPtr &  reader)

Definition at line 39 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), foreign_storage::LazyParquetChunkLoader::loadRowGroups(), and foreign_storage::LazyParquetChunkLoader::metadataScan().

39  {
40  auto file_metadata = reader->parquet_reader()->metadata();
41  const auto num_row_groups = file_metadata->num_row_groups();
42  const auto num_columns = file_metadata->num_columns();
43  return std::make_pair(num_row_groups, num_columns);
44 }

+ Here is the caller graph for this function:

parquet::Type::type foreign_storage::get_physical_type ( ReaderPtr &  reader,
const int  logical_column_index 
)

Definition at line 52 of file ParquetShared.cpp.

Referenced by anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().

52  {
53  return reader->parquet_reader()
54  ->metadata()
55  ->schema()
56  ->Column(logical_column_index)
57  ->physical_type();
58 }

+ Here is the caller graph for this function:

std::shared_ptr< ChunkMetadata > foreign_storage::get_placeholder_metadata ( const ColumnDescriptor column,
size_t  num_elements 
)

Definition at line 76 of file FsiChunkUtils.cpp.

References ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata().

77  {
78  ForeignStorageBuffer empty_buffer;
79  // Use default encoder metadata as in parquet wrapper
80  empty_buffer.initEncoder(column->columnType);
81  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
82  chunk_metadata->numElements = num_elements;
83 
84  if (!column->columnType.is_varlen_indeed()) {
85  chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
86  }
87  // min/max not set by default for arrays, so get from elem type encoder
88  if (column->columnType.is_array()) {
89  ForeignStorageBuffer scalar_buffer;
90  scalar_buffer.initEncoder(column->columnType.get_elem_type());
91  auto scalar_metadata =
92  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
93  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
94  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
95  }
96  chunk_metadata->chunkStats.has_nulls = true;
97  return chunk_metadata;
98 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:551
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
bool is_array() const
Definition: sqltypes.h:527

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ParseBufferRequest foreign_storage::get_request_from_pool ( MetadataScanMultiThreadingParams &  multi_threading_params)

Gets a request from the metadata scan request pool.

Definition at line 622 of file AbstractTextFileDataWrapper.cpp.

References CHECK, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.

Referenced by dispatch_metadata_scan_requests().

623  {
624  std::unique_lock<std::mutex> request_pool_lock(
625  multi_threading_params.request_pool_mutex);
626  multi_threading_params.request_pool_condition.wait(
627  request_pool_lock,
628  [&multi_threading_params] { return !multi_threading_params.request_pool.empty(); });
629  auto request = std::move(multi_threading_params.request_pool.front());
630  multi_threading_params.request_pool.pop();
631  request_pool_lock.unlock();
632  CHECK(request.buffer);
633  return request;
634 }
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

std::unique_ptr< ColumnDescriptor > foreign_storage::get_sub_type_column_descriptor ( const ColumnDescriptor column)

Definition at line 76 of file ParquetShared.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::set_size(), and ColumnDescriptor::tableId.

Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_array_mapping().

77  {
78  auto column_type = column->columnType.get_elem_type();
79  if (column_type.get_size() == -1 && column_type.is_dict_encoded_string()) {
80  column_type.set_size(4); // override default size of -1
81  }
82  return std::make_unique<ColumnDescriptor>(
83  column->tableId, column->columnId, column->columnName, column_type);
84 }
void set_size(int s)
Definition: sqltypes.h:437
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t foreign_storage::get_thread_count ( const import_export::CopyParams copy_params,
const bool  size_known,
const size_t  file_size,
const size_t  buffer_size 
)

Gets the appropriate number of threads to be used for concurrent processing within the data wrapper.

Definition at line 234 of file AbstractTextFileDataWrapper.cpp.

References CHECK_GT, and import_export::CopyParams::threads.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::populateChunks().

237  {
238  size_t thread_count = copy_params.threads;
239  if (thread_count == 0) {
240  thread_count = std::thread::hardware_concurrency();
241  }
242  if (size_known) {
243  size_t num_buffers_in_file = (file_size + buffer_size - 1) / buffer_size;
244  if (num_buffers_in_file < thread_count) {
245  thread_count = num_buffers_in_file;
246  }
247  }
248  CHECK_GT(thread_count, static_cast<size_t>(0));
249  return thread_count;
250 }
#define CHECK_GT(x, y)
Definition: Logger.h:223
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the caller graph for this function:

size_t foreign_storage::get_thread_count ( const import_export::CopyParams copy_params,
const FileRegions &  file_regions 
)

Definition at line 252 of file AbstractTextFileDataWrapper.cpp.

References CHECK_GT, and import_export::CopyParams::threads.

253  {
254  size_t thread_count = copy_params.threads;
255  if (thread_count == 0) {
256  thread_count =
257  std::min<size_t>(std::thread::hardware_concurrency(), file_regions.size());
258  }
259  CHECK_GT(thread_count, static_cast<size_t>(0));
260  return thread_count;
261 }
#define CHECK_GT(x, y)
Definition: Logger.h:223
void foreign_storage::get_value ( const rapidjson::Value &  json_val,
FileRegion &  file_region 
)

Definition at line 44 of file CsvShared.cpp.

References CHECK, foreign_storage::FileRegion::filename, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::json_utils::get_value_from_object(), foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

44  {
45  CHECK(json_val.IsObject());
47  json_val, file_region.first_row_file_offset, "first_row_file_offset");
49  json_val, file_region.first_row_index, "first_row_index");
50  json_utils::get_value_from_object(json_val, file_region.region_size, "region_size");
51  json_utils::get_value_from_object(json_val, file_region.row_count, "row_count");
52  if (json_val.HasMember("filename")) {
53  json_utils::get_value_from_object(json_val, file_region.filename, "filename");
54  }
55 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:164
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

void foreign_storage::get_value ( const rapidjson::Value &  json_val,
RowGroupInterval &  value 
)

Definition at line 526 of file ParquetDataWrapper.cpp.

References CHECK, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, foreign_storage::json_utils::get_value_from_object(), and foreign_storage::RowGroupInterval::start_index.

526  {
527  CHECK(json_val.IsObject());
528  json_utils::get_value_from_object(json_val, value.file_path, "file_path");
529  json_utils::get_value_from_object(json_val, value.start_index, "start_index");
530  json_utils::get_value_from_object(json_val, value.end_index, "end_index");
531 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:164
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

void foreign_storage::init_chunk_for_column ( const ChunkKey chunk_key,
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers,
Chunk_NS::Chunk chunk 
)

Definition at line 22 of file FsiChunkUtils.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Chunk_NS::Chunk::setPinnable(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMapForColumns().

26  {
27  auto catalog =
29  CHECK(catalog);
30 
31  ChunkKey data_chunk_key = chunk_key;
32  AbstractBuffer* data_buffer = nullptr;
33  AbstractBuffer* index_buffer = nullptr;
34  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
35  chunk_key[CHUNK_KEY_COLUMN_IDX]);
36 
37  if (column->columnType.is_varlen_indeed()) {
38  data_chunk_key.push_back(1);
39  ChunkKey index_chunk_key = chunk_key;
40  index_chunk_key.push_back(2);
41 
42  CHECK(buffers.find(data_chunk_key) != buffers.end());
43  CHECK(buffers.find(index_chunk_key) != buffers.end());
44 
45  data_buffer = buffers.find(data_chunk_key)->second;
46  index_buffer = buffers.find(index_chunk_key)->second;
47  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
48  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
49 
50  size_t index_offset_size{0};
51  if (column->columnType.is_string() || column->columnType.is_geometry()) {
52  index_offset_size = sizeof(StringOffsetT);
53  } else if (column->columnType.is_array()) {
54  index_offset_size = sizeof(ArrayOffsetT);
55  } else {
56  UNREACHABLE();
57  }
58  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
59  index_buffer->reserve(index_offset_size *
60  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68 
69  chunk.setPinnable(false);
70  chunk.setColumnDesc(column);
71  chunk.setBuffer(data_buffer);
72  chunk.setIndexBuffer(index_buffer);
73  chunk.initEncoder();
74 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:37
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:151
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define UNREACHABLE()
Definition: Logger.h:255
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:149
int32_t StringOffsetT
Definition: sqltypes.h:1090
static SysCatalog & instance()
Definition: SysCatalog.h:312
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1091
void initEncoder()
Definition: Chunk.cpp:282
#define CHECK(condition)
Definition: Logger.h:211
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::is_metadata_placeholder ( const ChunkMetadata metadata)
inline

Check if ChunkMetadata corresponds to a chunk for which metadata must be populated.

Definition at line 27 of file MetadataPlaceholder.h.

References ChunkMetadata::chunkStats, Datum::intval, SQLTypeInfo::is_dict_encoded_type(), ChunkStats::max, ChunkStats::min, and ChunkMetadata::sqlType.

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::prepare_string_dictionaries().

27  {
28  return metadata.chunkStats.min.intval > metadata.chunkStats.max.intval &&
29  metadata.sqlType.is_dict_encoded_type(); // Only supported type for now
30 }
int32_t intval
Definition: sqltypes.h:214
ChunkStats chunkStats
Definition: ChunkMetadata.h:35
bool is_dict_encoded_type() const
Definition: sqltypes.h:565
SQLTypeInfo sqlType
Definition: ChunkMetadata.h:32

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

UniqueReaderPtr foreign_storage::open_parquet_table ( const std::string &  file_path,
std::shared_ptr< arrow::fs::FileSystem > &  file_system 
)

Definition at line 26 of file ParquetShared.cpp.

Referenced by foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::FileReaderMap::insert(), and foreign_storage::LazyParquetChunkLoader::loadRowGroups().

27  {
28  UniqueReaderPtr reader;
29  auto file_result = file_system->OpenInputFile(file_path);
30  if (!file_result.ok()) {
31  throw std::runtime_error{"Unable to access " + file_system->type_name() + " file: " +
32  file_path + ". " + file_result.status().message()};
33  }
34  auto infile = file_result.ValueOrDie();
35  PARQUET_THROW_NOT_OK(OpenFile(infile, arrow::default_memory_pool(), &reader));
36  return reader;
37 }
std::unique_ptr< parquet::arrow::FileReader > UniqueReaderPtr
Definition: ParquetShared.h:32

+ Here is the caller graph for this function:

ParseFileRegionResult foreign_storage::parse_file_regions ( const FileRegions &  file_regions,
const size_t  start_index,
const size_t  end_index,
FileReader &  file_reader,
ParseBufferRequest &  parse_file_request,
const std::map< int, Chunk_NS::Chunk > &  column_id_to_chunk_map,
const TextFileBufferParser &  parser 
)

Parses a set of file regions given a handle to the file and range of indexes for the file regions to be parsed.

Definition at line 173 of file AbstractTextFileDataWrapper.cpp.

References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, foreign_storage::ParseBufferRequest::buffer_size, CHECK, CHECK_EQ, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, DEBUG_TIMER, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseFileRegionResult::file_offset, foreign_storage::ParseBufferRequest::first_row_index, i, foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::FileReader::readRegion(), run_benchmark_import::result, and foreign_storage::ParseBufferResult::row_count.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunks().

180  {
181  auto timer = DEBUG_TIMER(__func__);
182  ParseFileRegionResult load_file_region_result{};
183  load_file_region_result.file_offset = file_regions[start_index].first_row_file_offset;
184  load_file_region_result.row_count = 0;
185 
186  ParseBufferResult result;
187  for (size_t i = start_index; i <= end_index; i++) {
188  CHECK(file_regions[i].region_size <= parse_file_request.buffer_size);
189  auto read_size = file_reader.readRegion(parse_file_request.buffer.get(),
190  file_regions[i].first_row_file_offset,
191  file_regions[i].region_size);
192  CHECK_EQ(file_regions[i].region_size, read_size);
193  parse_file_request.begin_pos = 0;
194  parse_file_request.end_pos = file_regions[i].region_size;
195  parse_file_request.first_row_index = file_regions[i].first_row_index;
196  parse_file_request.file_offset = file_regions[i].first_row_file_offset;
197  parse_file_request.process_row_count = file_regions[i].row_count;
198 
199  result = parser.parseBuffer(parse_file_request, i == end_index);
200  CHECK_EQ(file_regions[i].row_count, result.row_count);
201  load_file_region_result.row_count += result.row_count;
202  }
203  load_file_region_result.column_id_to_data_blocks_map =
204  result.column_id_to_data_blocks_map;
205  return load_file_region_result;
206 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<size_t> foreign_storage::partition_by_fragment ( const size_t  start_row_index,
const size_t  max_fragment_size,
const size_t  buffer_row_count 
)

Given a start row index, maximum fragment size, and number of rows in a buffer, this function returns a vector indicating how the rows in the buffer should be partitioned in order to fill up available fragment slots while staying within the capacity of fragments.

Definition at line 341 of file AbstractTextFileDataWrapper.cpp.

References CHECK.

Referenced by scan_metadata().

343  {
344  CHECK(buffer_row_count > 0);
345  std::vector<size_t> partitions{};
346  size_t remaining_rows_in_last_fragment;
347  if (start_row_index % max_fragment_size == 0) {
348  remaining_rows_in_last_fragment = 0;
349  } else {
350  remaining_rows_in_last_fragment =
351  max_fragment_size - (start_row_index % max_fragment_size);
352  }
353  if (buffer_row_count <= remaining_rows_in_last_fragment) {
354  partitions.emplace_back(buffer_row_count);
355  } else {
356  if (remaining_rows_in_last_fragment > 0) {
357  partitions.emplace_back(remaining_rows_in_last_fragment);
358  }
359  size_t remaining_buffer_row_count =
360  buffer_row_count - remaining_rows_in_last_fragment;
361  while (remaining_buffer_row_count > 0) {
362  partitions.emplace_back(
363  std::min<size_t>(remaining_buffer_row_count, max_fragment_size));
364  remaining_buffer_row_count -= partitions.back();
365  }
366  }
367  return partitions;
368 }
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::set< T > &  items,
size_t  max_threads 
)

Definition at line 40 of file FsiChunkUtils.h.

References i.

Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan(), and foreign_storage::ParquetDataWrapper::populateChunkBuffers().

40  {
41  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
42  std::list<std::set<T>> items_by_thread;
43  auto i = 0U;
44  for (auto item : items) {
45  if (i++ % items_per_thread == 0) {
46  items_by_thread.emplace_back(std::set<T>{});
47  }
48  items_by_thread.back().emplace(item);
49  }
50  return items_by_thread;
51 }

+ Here is the caller graph for this function:

template<typename T >
auto foreign_storage::partition_for_threads ( const std::vector< T > &  items,
size_t  max_threads 
)

Definition at line 59 of file FsiChunkUtils.h.

References i.

59  {
60  const size_t items_per_thread = (items.size() + (max_threads - 1)) / max_threads;
61  std::list<std::vector<T>> items_by_thread;
62  auto i = 0U;
63  for (auto item : items) {
64  if (i++ % items_per_thread == 0) {
65  items_by_thread.emplace_back(std::vector<T>{});
66  }
67  items_by_thread.back().emplace_back(item);
68  }
69  return items_by_thread;
70 }
void foreign_storage::process_data_blocks ( MetadataScanMultiThreadingParams &  multi_threading_params,
int  fragment_id,
const ParseBufferRequest &  request,
ParseBufferResult &  result,
std::map< int, const ColumnDescriptor * > &  column_by_id,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map 
)

Updates metadata encapsulated in encoders for all table columns given new data blocks gotten from parsing a new set of rows in a file buffer. If cache is available, also append the data_blocks to chunks in the cache

Definition at line 501 of file AbstractTextFileDataWrapper.cpp.

References add_file_region(), cache_blocks(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getMaxFragRows(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::row_count, and update_stats().

Referenced by scan_metadata().

506  {
507  std::lock_guard<std::mutex> lock(multi_threading_params.chunk_encoder_buffers_mutex);
508  // File regions should be added in same order as appendData
509  add_file_region(fragment_id_to_file_regions_map,
510  fragment_id,
511  request.first_row_index,
512  result,
513  request.getFilePath());
514 
515  for (auto& [column_id, data_block] : result.column_id_to_data_blocks_map) {
516  ChunkKey chunk_key{request.db_id, request.getTableId(), column_id, fragment_id};
517  const auto column = column_by_id[column_id];
518  if (column->columnType.is_varlen_indeed()) {
519  chunk_key.emplace_back(1);
520  }
521  if (multi_threading_params.chunk_encoder_buffers.find(chunk_key) ==
522  multi_threading_params.chunk_encoder_buffers.end()) {
523  multi_threading_params.chunk_encoder_buffers[chunk_key] =
524  std::make_unique<ForeignStorageBuffer>();
525  multi_threading_params.chunk_encoder_buffers[chunk_key]->initEncoder(
526  column->columnType);
527  }
528  update_stats(multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder(),
529  column->columnType,
530  data_block,
531  result.row_count);
532  size_t num_elements = multi_threading_params.chunk_encoder_buffers[chunk_key]
533  ->getEncoder()
534  ->getNumElems() +
535  result.row_count;
536  multi_threading_params.chunk_encoder_buffers[chunk_key]->getEncoder()->setNumElems(
537  num_elements);
538  cache_blocks(
539  multi_threading_params.cached_chunks,
540  data_block,
541  result.row_count,
542  chunk_key,
543  column,
544  (num_elements - result.row_count) == 0, // Is the first block added to this chunk
545  num_elements == request.getMaxFragRows(), // Is the last block for this chunk
546  multi_threading_params.disable_cache);
547  }
548 }
void cache_blocks(std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool is_last_block, bool disable_cache)
std::vector< int > ChunkKey
Definition: types.h:37
void update_stats(Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count)
void add_file_region(std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::refresh_foreign_table ( Catalog_Namespace::Catalog catalog,
const std::string &  table_name,
const bool  evict_cached_entries 
)

Definition at line 22 of file ForeignTableRefresh.cpp.

References CHUNK_KEY_FRAGMENT_IDX, Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, StorageType::FOREIGN_TABLE, Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getForeignTable(), PostEvictionRefreshException::getOriginalException(), Data_Namespace::GPU_LEVEL, foreign_storage::ForeignTable::isAppendMode(), Catalog_Namespace::Catalog::removeFragmenterForTable(), and Catalog_Namespace::Catalog::updateForeignTableRefreshTimes().

Referenced by RefreshForeignTablesCommand::execute(), and foreign_storage::ForeignTableRefreshScheduler::start().

24  {
25  auto& data_mgr = catalog.getDataMgr();
26  auto table_lock =
27  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
29  catalog, table_name, false));
30 
31  const TableDescriptor* td = (*table_lock)();
32  if (td->storageType != StorageType::FOREIGN_TABLE) {
33  throw std::runtime_error{
34  table_name +
35  " is not a foreign table. Refreshes are applicable to only foreign tables."};
36  }
37 
38  catalog.removeFragmenterForTable(td->tableId);
39  ChunkKey table_key{catalog.getCurrentDB().dbId, td->tableId};
40 
41  if (catalog.getForeignTable(td->tableId)->isAppendMode() && !evict_cached_entries) {
42  ChunkMetadataVector metadata_vec;
43  data_mgr.getChunkMetadataVecForKeyPrefix(metadata_vec, table_key);
44  int last_fragment_id = 0;
45  for (const auto& [key, metadata] : metadata_vec) {
46  if (key[CHUNK_KEY_FRAGMENT_IDX] > last_fragment_id) {
47  last_fragment_id = key[CHUNK_KEY_FRAGMENT_IDX];
48  }
49  }
50  for (const auto& [key, metadata] : metadata_vec) {
51  if (key[CHUNK_KEY_FRAGMENT_IDX] == last_fragment_id) {
52  data_mgr.deleteChunksWithPrefix(key, MemoryLevel::CPU_LEVEL);
53  data_mgr.deleteChunksWithPrefix(key, MemoryLevel::GPU_LEVEL);
54  }
55  }
56  } else {
57  data_mgr.deleteChunksWithPrefix(table_key, MemoryLevel::CPU_LEVEL);
58  data_mgr.deleteChunksWithPrefix(table_key, MemoryLevel::GPU_LEVEL);
59  }
60 
61  try {
62  data_mgr.getPersistentStorageMgr()->getForeignStorageMgr()->refreshTable(
63  table_key, evict_cached_entries);
64  catalog.updateForeignTableRefreshTimes(td->tableId);
65  } catch (PostEvictionRefreshException& e) {
66  catalog.updateForeignTableRefreshTimes(td->tableId);
67  throw e.getOriginalException();
68  }
69 }
const foreign_storage::ForeignTable * getForeignTable(const std::string &tableName) const
Definition: Catalog.cpp:1502
std::vector< int > ChunkKey
Definition: types.h:37
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:229
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
std::runtime_error getOriginalException()
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:228
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
bool isAppendMode() const
Checks if the table is in append mode.
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:3518
static constexpr char const * FOREIGN_TABLE
void updateForeignTableRefreshTimes(const int32_t table_id)
Definition: Catalog.cpp:4863

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::resize_buffer_if_needed ( std::unique_ptr< char[]> &  buffer,
size_t &  buffer_size,
const size_t  alloc_size 
)

Optionally resizes the given buffer if the buffer size is less than the current buffer allocation size.

Definition at line 655 of file AbstractTextFileDataWrapper.cpp.

References CHECK_LE.

Referenced by dispatch_metadata_scan_requests().

657  {
658  CHECK_LE(buffer_size, alloc_size);
659  if (buffer_size < alloc_size) {
660  buffer = std::make_unique<char[]>(alloc_size);
661  buffer_size = alloc_size;
662  }
663 }
#define CHECK_LE(x, y)
Definition: Logger.h:222

+ Here is the caller graph for this function:

std::vector< Aws::S3::Model::Object > foreign_storage::s3_objects_filter_sort_files ( const std::vector< Aws::S3::Model::Object > &  file_paths,
const std::optional< std::string > &  filter_regex,
const std::optional< std::string > &  sort_by,
const std::optional< std::string > &  sort_regex 
)

Definition at line 22 of file S3FilePathUtil.cpp.

References shared::PATHNAME_ORDER_TYPE, and foreign_storage::anonymous_namespace{S3FilePathUtil.cpp}::s3_objects_regex_file_filter().

26  {
27  auto result_files = filter_regex.has_value()
28  ? s3_objects_regex_file_filter(filter_regex.value(), file_paths)
29  : file_paths;
30  // initial lexicographical order ensures a determinisitc ordering for files not matching
31  // sort_regex
32  auto initial_file_order = FileOrderS3(std::nullopt, shared::PATHNAME_ORDER_TYPE);
33  auto lexi_comp = initial_file_order.getFileComparator();
34  std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
35 
36  auto file_order = FileOrderS3(sort_regex, sort_by);
37  auto comp = file_order.getFileComparator();
38  std::stable_sort(result_files.begin(), result_files.end(), comp);
39  return result_files;
40 }
std::vector< Aws::S3::Model::Object > s3_objects_regex_file_filter(const std::string &pattern, const std::vector< Aws::S3::Model::Object > &objects_list)
const std::string PATHNAME_ORDER_TYPE

+ Here is the call graph for this function:

void foreign_storage::scan_metadata ( MetadataScanMultiThreadingParams &  multi_threading_params,
std::map< int, FileRegions > &  fragment_id_to_file_regions_map,
const TextFileBufferParser &  parser 
)

Consumes and processes metadata scan requests from a pending requests queue and updates existing metadata objects based on newly scanned metadata.

Definition at line 567 of file AbstractTextFileDataWrapper.cpp.

References add_request_to_pool(), foreign_storage::MetadataScanMultiThreadingParams::continue_processing, get_next_metadata_scan_request(), foreign_storage::TextFileBufferParser::parseBuffer(), partition_by_fragment(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, process_data_blocks(), and run_benchmark_import::result.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

569  {
570  std::map<int, const ColumnDescriptor*> column_by_id{};
571  while (true) {
572  auto request_opt = get_next_metadata_scan_request(multi_threading_params);
573  if (!request_opt.has_value()) {
574  break;
575  }
576  auto& request = request_opt.value();
577  try {
578  if (column_by_id.empty()) {
579  for (const auto column : request.getColumns()) {
580  column_by_id[column->columnId] = column;
581  }
582  }
583  auto partitions = partition_by_fragment(
584  request.first_row_index, request.getMaxFragRows(), request.buffer_row_count);
585  request.begin_pos = 0;
586  size_t row_index = request.first_row_index;
587  for (const auto partition : partitions) {
588  request.process_row_count = partition;
589  for (const auto& import_buffer : request.import_buffers) {
590  if (import_buffer != nullptr) {
591  import_buffer->clear();
592  }
593  }
594  auto result = parser.parseBuffer(request, true);
595  int fragment_id = row_index / request.getMaxFragRows();
596  process_data_blocks(multi_threading_params,
597  fragment_id,
598  request,
599  result,
600  column_by_id,
601  fragment_id_to_file_regions_map);
602  row_index += result.row_count;
603  request.begin_pos = result.row_offsets.back() - request.file_offset;
604  }
605  } catch (...) {
606  // Re-add request to pool so we dont block any other threads
607  {
608  std::lock_guard<std::mutex> pending_requests_lock(
609  multi_threading_params.pending_requests_mutex);
610  multi_threading_params.continue_processing = false;
611  }
612  add_request_to_pool(multi_threading_params, request);
613  throw;
614  }
615  add_request_to_pool(multi_threading_params, request);
616  }
617 }
std::vector< size_t > partition_by_fragment(const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count)
void add_request_to_pool(MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request)
std::optional< ParseBufferRequest > get_next_metadata_scan_request(MetadataScanMultiThreadingParams &multi_threading_params)
void process_data_blocks(MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::set_comp ( const ChunkKey left,
const ChunkKey right 
)
void foreign_storage::set_value ( rapidjson::Value &  json_val,
const FileRegion &  file_region,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 26 of file CsvShared.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::FileRegion::filename, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.

28  {
29  json_val.SetObject();
31  json_val, file_region.first_row_file_offset, "first_row_file_offset", allocator);
33  json_val, file_region.first_row_index, "first_row_index", allocator);
35  json_val, file_region.region_size, "region_size", allocator);
37  json_val, file_region.row_count, "row_count", allocator);
38  if (file_region.filename.size()) {
40  json_val, file_region.filename, "filename", allocator);
41  }
42 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:149

+ Here is the call graph for this function:

void foreign_storage::set_value ( rapidjson::Value &  json_val,
const RowGroupInterval &  value,
rapidjson::Document::AllocatorType &  allocator 
)

Definition at line 517 of file ParquetDataWrapper.cpp.

References foreign_storage::json_utils::add_value_to_object(), foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, and foreign_storage::RowGroupInterval::start_index.

519  {
520  json_val.SetObject();
521  json_utils::add_value_to_object(json_val, value.file_path, "file_path", allocator);
522  json_utils::add_value_to_object(json_val, value.start_index, "start_index", allocator);
523  json_utils::add_value_to_object(json_val, value.end_index, "end_index", allocator);
524 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:149

+ Here is the call graph for this function:

void foreign_storage::throw_file_access_error ( const std::string &  file_path,
const std::string &  message 
)
inline

Definition at line 70 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

71  {
72  std::string error_message{"Unable to access file \"" + file_path + "\". " + message};
73  throw ForeignStorageException{error_message};
74 }

+ Here is the caller graph for this function:

void foreign_storage::throw_file_not_found_error ( const std::string &  file_path)
inline

Definition at line 76 of file ForeignStorageException.h.

Referenced by foreign_storage::ParquetImporter::getAllFilePaths().

76  {
77  throw ForeignStorageException{"File or directory \"" + file_path +
78  "\" does not exist."};
79 }

+ Here is the caller graph for this function:

void foreign_storage::throw_number_of_columns_mismatch_error ( size_t  num_table_cols,
size_t  num_file_cols,
const std::string &  file_path 
)
inline

Definition at line 61 of file ForeignStorageException.h.

References to_string().

Referenced by foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::regex_match_columns(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_expected_column_count(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().

63  {
64  throw ForeignStorageException{"Mismatched number of logical columns: (expected " +
65  std::to_string(num_table_cols) + " columns, has " +
66  std::to_string(num_file_cols) + "): in file '" +
67  file_path + "'"};
68 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::throw_parquet_metadata_out_of_bounds_error ( const std::string &  min_value,
const std::string &  max_value,
const std::string &  encountered_value 
)
inline

Definition at line 46 of file ParquetMetadataValidator.h.

Referenced by foreign_storage::TimestampBoundsValidator< T >::validateValue(), foreign_storage::IntegralFixedLengthBoundsValidator< T >::validateValue(), foreign_storage::DateInSecondsBoundsValidator< T >::validateValue(), and foreign_storage::FloatPointValidator< T >::validateValue().

49  {
50  std::stringstream error_message;
51  error_message << "Parquet column contains values that are outside the range of the "
52  "OmniSci column "
53  "type. Consider using a wider column type. Min allowed value: "
54  << min_value << ". Max allowed value: " << max_value
55  << ". Encountered value: " << encountered_value << ".";
56  throw std::runtime_error(error_message.str());
57 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_file_error ( const std::string &  file_path)
inline

Definition at line 54 of file ForeignStorageException.h.

Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

54  {
55  throw ForeignStorageException{
56  "Refresh of foreign table created with \"APPEND\" update type failed as "
57  "file \"" +
58  file_path + "\" was removed."};
59 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_file_error ( const std::string &  file_path)
inline

Definition at line 47 of file ForeignStorageException.h.

Referenced by foreign_storage::SingleTextFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().

47  {
48  throw ForeignStorageException{
49  "Refresh of foreign table created with \"APPEND\" update type failed as file "
50  "reduced in size: \"" +
51  file_path + "\""};
52 }

+ Here is the caller graph for this function:

void foreign_storage::throw_removed_row_in_result_set_error ( const std::string &  select_statement)
inline

Definition at line 39 of file ForeignStorageException.h.

39  {
40  throw ForeignStorageException{
41  "Refresh of foreign table created with \"APPEND\" update type failed as result set "
42  "of select statement "
43  "reduced in size: \"" +
44  select_statement + "\""};
45 }
void foreign_storage::throw_s3_compressed_extension ( const std::string &  file_path,
const std::string &  ext_type 
)
inline

Definition at line 88 of file ForeignStorageException.h.

89  {
90  throw ForeignStorageException{
91  "File \"" + file_path + "\" has extension type \"" + ext_type +
92  "\", compressed file formats are not supported by S3 Foreign Tables."};
93 }
void foreign_storage::throw_s3_compressed_mime_type ( const std::string &  file_path,
const std::string &  mime_type 
)
inline

Definition at line 81 of file ForeignStorageException.h.

82  {
83  throw ForeignStorageException{
84  "File \"" + file_path + "\" has mime type \"" + mime_type +
85  "\", compressed file formats are not supported by S3 Foreign Tables."};
86 }
void foreign_storage::update_stats ( Encoder encoder,
const SQLTypeInfo column_type,
DataBlockPtr  data_block,
const size_t  row_count 
)

Updates the statistics metadata encapsulated in the encoder given new data in a data block.

Definition at line 433 of file AbstractTextFileDataWrapper.cpp.

References DataBlockPtr::arraysPtr, SQLTypeInfo::is_array(), SQLTypeInfo::is_varlen(), DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and Encoder::updateStats().

Referenced by process_data_blocks(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata(), and StorageIOFacility::yieldUpdateCallback().

436  {
437  if (column_type.is_array()) {
438  encoder->updateStats(data_block.arraysPtr, 0, row_count);
439  } else if (!column_type.is_varlen()) {
440  encoder->updateStats(data_block.numbersPtr, row_count);
441  } else {
442  encoder->updateStats(data_block.stringsPtr, 0, row_count);
443  }
444 }
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
bool is_varlen() const
Definition: sqltypes.h:545
int8_t * numbersPtr
Definition: sqltypes.h:226
virtual void updateStats(const int64_t val, const bool is_null)=0
bool is_array() const
Definition: sqltypes.h:527

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< parquet::Statistics > foreign_storage::validate_and_get_column_metadata_statistics ( const parquet::ColumnChunkMetaData *  column_metadata)

Definition at line 86 of file ParquetShared.cpp.

References CHECK.

Referenced by foreign_storage::ParquetEncoder::getRowGroupMetadata(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_definition_levels().

87  {
88  CHECK(column_metadata->is_stats_set());
89  std::shared_ptr<parquet::Statistics> stats = column_metadata->statistics();
90  return stats;
91 }
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

void foreign_storage::validate_equal_column_descriptor ( const parquet::ColumnDescriptor *  reference_descriptor,
const parquet::ColumnDescriptor *  new_descriptor,
const std::string &  reference_file_path,
const std::string &  new_file_path 
)

Definition at line 60 of file ParquetShared.cpp.

Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().

64  {
65  if (!reference_descriptor->Equals(*new_descriptor)) {
66  throw std::runtime_error{"Parquet file \"" + new_file_path +
67  "\" has a different schema. Please ensure that all Parquet "
68  "files use the same schema. Reference Parquet file: " +
69  reference_file_path +
70  ", column name: " + reference_descriptor->name() +
71  ". New Parquet file: " + new_file_path +
72  ", column name: " + new_descriptor->name() + "."};
73  }
74 }

+ Here is the caller graph for this function:

void foreign_storage::validate_non_foreign_table_write ( const TableDescriptor table_descriptor)
inline

Definition at line 22 of file FsiUtils.h.

References StorageType::FOREIGN_TABLE, and TableDescriptor::storageType.

Referenced by Parser::InsertStmt::analyze(), Parser::InsertValuesStmt::determineLeafIndex(), Parser::TruncateTableStmt::execute(), Parser::InsertValuesStmt::execute(), Parser::InsertIntoTableAsSelectStmt::populateData(), and RelModify::RelModify().

22  {
23  if (table_descriptor && table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
24  throw std::runtime_error{
25  "DELETE, INSERT, TRUNCATE, OR UPDATE commands are not supported for foreign "
26  "tables."};
27  }
28 }
std::string storageType
static constexpr char const * FOREIGN_TABLE

+ Here is the caller graph for this function: