OmniSciDB  8a228a1076
ParquetDataWrapper.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ParquetDataWrapper.h"
18 #include "LazyParquetChunkLoader.h"
19 #include "ParquetShared.h"
20 
21 #include <regex>
22 
23 #include <boost/filesystem.hpp>
24 
25 #include "ImportExport/Importer.h"
26 #include "Utils/DdlUtils.h"
27 
28 namespace foreign_storage {
29 
30 namespace {
31 template <typename T>
32 std::pair<typename std::map<ChunkKey, T>::iterator,
33  typename std::map<ChunkKey, T>::iterator>
34 prefix_range(std::map<ChunkKey, T>& map, const ChunkKey& chunk_key_prefix) {
35  ChunkKey chunk_key_prefix_sentinel = chunk_key_prefix;
36  chunk_key_prefix_sentinel.push_back(std::numeric_limits<int>::max());
37  auto begin = map.lower_bound(chunk_key_prefix);
38  auto end = map.upper_bound(chunk_key_prefix_sentinel);
39  return std::make_pair(begin, end);
40 }
41 } // namespace
42 
43 ParquetDataWrapper::ParquetDataWrapper(const int db_id, const ForeignTable* foreign_table)
44  : db_id_(db_id)
45  , foreign_table_(foreign_table)
46  , last_fragment_index_(0)
47  , last_fragment_row_count_(0)
48  , last_row_group_(0)
49  , schema_(std::make_unique<ForeignTableSchema>(db_id, foreign_table)) {}
50 
52  : db_id_(-1), foreign_table_(foreign_table) {}
53 
55  for (const auto& entry : foreign_table->options) {
56  const auto& table_options = foreign_table->supported_options;
57  if (std::find(table_options.begin(), table_options.end(), entry.first) ==
58  table_options.end() &&
59  std::find(supported_options_.begin(), supported_options_.end(), entry.first) ==
60  supported_options_.end()) {
61  throw std::runtime_error{"Invalid foreign table option \"" + entry.first + "\"."};
62  }
63  }
64  ParquetDataWrapper data_wrapper{foreign_table};
65  data_wrapper.validateAndGetCopyParams();
66  data_wrapper.validateFilePath();
67 }
68 
72 }
73 
77 
78  last_row_group_ = 0;
81 }
82 
83 std::list<const ColumnDescriptor*> ParquetDataWrapper::getColumnsToInitialize(
84  const Interval<ColumnType>& column_interval) {
85  const auto catalog = Catalog_Namespace::Catalog::get(db_id_);
86  CHECK(catalog);
87  const auto& columns = schema_->getLogicalAndPhysicalColumns();
88  auto column_start = column_interval.start;
89  auto column_end = column_interval.end;
90  std::list<const ColumnDescriptor*> columns_to_init;
91  for (const auto column : columns) {
92  auto column_id = column->columnId;
93  if (column_id >= column_start && column_id <= column_end) {
94  columns_to_init.push_back(column);
95  }
96  }
97  return columns_to_init;
98 }
99 
101  const int fragment_index,
102  const Interval<ColumnType>& column_interval,
103  std::map<ChunkKey, AbstractBuffer*>& required_buffers,
104  const bool reserve_buffers_and_set_stats,
105  const size_t physical_byte_size) {
106  for (const auto column : getColumnsToInitialize(column_interval)) {
107  Chunk_NS::Chunk chunk{column};
108  ChunkKey data_chunk_key;
109  if (column->columnType.is_varlen_indeed()) {
110  data_chunk_key = {
111  db_id_, foreign_table_->tableId, column->columnId, fragment_index, 1};
112  auto data_buffer = required_buffers[data_chunk_key];
113  CHECK(data_buffer);
114  chunk.setBuffer(data_buffer);
115 
116  ChunkKey index_chunk_key{
117  db_id_, foreign_table_->tableId, column->columnId, fragment_index, 2};
118  auto index_buffer = required_buffers[index_chunk_key];
119  CHECK(index_buffer);
120  chunk.setIndexBuffer(index_buffer);
121  } else {
122  data_chunk_key = {
123  db_id_, foreign_table_->tableId, column->columnId, fragment_index};
124  auto data_buffer = required_buffers[data_chunk_key];
125  CHECK(data_buffer);
126  chunk.setBuffer(data_buffer);
127  }
128  chunk.initEncoder();
129  if (reserve_buffers_and_set_stats) {
130  const auto metadata_it = chunk_metadata_map_.find(data_chunk_key);
131  CHECK(metadata_it != chunk_metadata_map_.end());
132  auto buffer = chunk.getBuffer();
133  auto& metadata = metadata_it->second;
134  auto& encoder = buffer->encoder;
135  encoder->resetChunkStats(metadata->chunkStats);
136  encoder->setNumElems(metadata->numElements);
137  if (column->columnType.is_string() &&
138  column->columnType.get_compression() == kENCODING_NONE) {
139  auto index_buffer = chunk.getIndexBuf();
140  index_buffer->reserve(sizeof(StringOffsetT) * (metadata->numElements + 1));
141  } else {
142  size_t num_bytes_to_reserve =
143  metadata->numElements * column->columnType.get_size();
144  buffer->reserve(num_bytes_to_reserve);
145  }
146  }
147  }
148 }
149 
153 }
154 
155 void ParquetDataWrapper::updateFragmentMap(int fragment_index, int row_group) {
156  CHECK(fragment_index > 0);
157  fragment_to_row_group_interval_map_[fragment_index - 1].end_row_group_index =
158  row_group - 1;
159  fragment_to_row_group_interval_map_[fragment_index] = {row_group, -1};
160 }
161 
163  auto catalog = Catalog_Namespace::Catalog::get(db_id_);
164  CHECK(catalog);
165 
168  LazyParquetImporter importer(getMetadataLoader(*catalog, metadata_vector),
169  getFilePath(),
171  metadata_vector,
172  *schema_);
173  importer.metadataScan();
175 }
176 
178  auto& server_options = foreign_table_->foreign_server->options;
179  auto base_path_entry = server_options.find("BASE_PATH");
180  if (base_path_entry == server_options.end()) {
181  throw std::runtime_error{"No base path found in foreign server options."};
182  }
183  auto file_path_entry = foreign_table_->options.find("FILE_PATH");
184  std::string file_path{};
185  if (file_path_entry != foreign_table_->options.end()) {
186  file_path = file_path_entry->second;
187  }
188  const std::string separator{boost::filesystem::path::preferred_separator};
189  return std::regex_replace(base_path_entry->second + separator + file_path,
190  std::regex{separator + "{2,}"},
191  separator);
192 }
193 
195  import_export::CopyParams copy_params{};
196  if (const auto& value = validateAndGetStringWithLength("ARRAY_DELIMITER", 1);
197  !value.empty()) {
198  copy_params.array_delim = value[0];
199  }
200  if (const auto& value = validateAndGetStringWithLength("ARRAY_MARKER", 2);
201  !value.empty()) {
202  copy_params.array_begin = value[0];
203  copy_params.array_end = value[1];
204  }
205  // The file_type argument is never utilized in the context of FSI,
206  // for completeness, set the file_type
207  copy_params.file_type = import_export::FileType::PARQUET;
208  return copy_params;
209 }
210 
212  const std::string& option_name,
213  const size_t expected_num_chars) {
214  if (auto it = foreign_table_->options.find(option_name);
215  it != foreign_table_->options.end()) {
216  if (it->second.length() != expected_num_chars) {
217  throw std::runtime_error{"Value of \"" + option_name +
218  "\" foreign table option has the wrong number of "
219  "characters. Expected " +
220  std::to_string(expected_num_chars) + " character(s)."};
221  }
222  return it->second;
223  }
224  return "";
225 }
226 
228  const SQLTypeInfo type_info,
229  const DataBlockPtr& data_block,
230  const size_t import_count) {
231  CHECK(encoder);
232  if (type_info.is_varlen()) {
233  switch (type_info.get_type()) {
234  case kARRAY: {
235  encoder->updateStats(data_block.arraysPtr, 0, import_count);
236  break;
237  }
238  case kTEXT:
239  case kVARCHAR:
240  case kCHAR:
241  case kPOINT:
242  case kLINESTRING:
243  case kPOLYGON:
244  case kMULTIPOLYGON: {
245  encoder->updateStats(data_block.stringsPtr, 0, import_count);
246  break;
247  }
248  default:
249  UNREACHABLE();
250  }
251  } else {
252  encoder->updateStats(data_block.numbersPtr, import_count);
253  }
254  encoder->setNumElems(encoder->getNumElems() + import_count);
255 }
256 
258  const ChunkKey& chunk_key,
259  DataBlockPtr& data_block,
260  const size_t import_count,
261  const bool has_nulls,
262  const bool is_all_nulls) {
263  auto type_info = column->columnType;
264  ChunkKey data_chunk_key = chunk_key;
265  if (type_info.is_varlen_indeed()) {
266  data_chunk_key.emplace_back(1);
267  }
268  ForeignStorageBuffer buffer;
269  buffer.initEncoder(type_info);
270  auto encoder = buffer.encoder.get();
271  if (chunk_metadata_map_.find(data_chunk_key) != chunk_metadata_map_.end()) {
272  encoder->resetChunkStats(chunk_metadata_map_[data_chunk_key]->chunkStats);
273  encoder->setNumElems(chunk_metadata_map_[data_chunk_key]->numElements);
274  buffer.setSize(chunk_metadata_map_[data_chunk_key]->numBytes);
275  } else {
276  chunk_metadata_map_[data_chunk_key] = std::make_shared<ChunkMetadata>();
277  }
278 
279  auto logical_type_info =
280  schema_->getLogicalColumn(chunk_key[CHUNK_KEY_COLUMN_IDX])->columnType;
281  if (is_all_nulls || logical_type_info.is_string() || logical_type_info.is_varlen()) {
282  // Do not attempt to load min/max statistics if entire row group is null or
283  // if the column is a string or variable length column
284  encoder->setNumElems(encoder->getNumElems() + import_count);
285  } else {
286  // Loads min/max statistics for columns with this information
287  updateStatsForEncoder(encoder, type_info, data_block, 2);
288  encoder->setNumElems(encoder->getNumElems() + import_count - 2);
289  }
290  encoder->getMetadata(chunk_metadata_map_[data_chunk_key]);
291  chunk_metadata_map_[data_chunk_key]->chunkStats.has_nulls |= has_nulls;
292 }
293 
295  const ColumnDescriptor* column,
296  const ChunkKey& chunk_key,
297  DataBlockPtr& data_block,
298  const size_t import_count,
299  std::map<ChunkKey, AbstractBuffer*>& required_buffers) {
300  Chunk_NS::Chunk chunk{column};
301  auto column_id = column->columnId;
302  CHECK(column_id == chunk_key[CHUNK_KEY_COLUMN_IDX]);
303  auto& type_info = column->columnType;
304  if (type_info.is_varlen_indeed()) {
305  ChunkKey data_chunk_key{chunk_key};
306  data_chunk_key.resize(5);
307  data_chunk_key[4] = 1;
308  CHECK(required_buffers.find(data_chunk_key) != required_buffers.end());
309  auto& data_buffer = required_buffers[data_chunk_key];
310  chunk.setBuffer(data_buffer);
311 
312  ChunkKey index_chunk_key{chunk_key};
313  index_chunk_key.resize(5);
314  index_chunk_key[4] = 2;
315  CHECK(required_buffers.find(index_chunk_key) != required_buffers.end());
316  auto& index_buffer = required_buffers[index_chunk_key];
317  chunk.setIndexBuffer(index_buffer);
318  } else {
319  CHECK(required_buffers.find(chunk_key) != required_buffers.end());
320  auto& buffer = required_buffers[chunk_key];
321  chunk.setBuffer(buffer);
322  }
323  chunk.appendData(data_block, import_count, 0);
324  chunk.setBuffer(nullptr);
325  chunk.setIndexBuffer(nullptr);
326 }
327 
330  const Interval<ColumnType>& column_interval,
331  const int db_id,
332  const int fragment_index,
333  std::map<ChunkKey, AbstractBuffer*>& required_buffers) {
334  auto callback =
335  [this, column_interval, db_id, fragment_index, &required_buffers](
336  const std::vector<std::unique_ptr<import_export::TypedImportBuffer>>&
337  import_buffers,
338  std::vector<DataBlockPtr>& data_blocks,
339  size_t import_row_count) {
340  for (int column_id = column_interval.start; column_id <= column_interval.end;
341  column_id++) {
342  // Column ids start at 1, hence the -1 offset
343  auto& import_buffer = import_buffers[column_id - 1];
344  ChunkKey chunk_key{db_id, foreign_table_->tableId, column_id, fragment_index};
345  loadChunk(import_buffer->getColumnDesc(),
346  chunk_key,
347  data_blocks[column_id - 1],
348  import_row_count,
349  required_buffers);
350  }
351  return true;
352  };
353 
354  return new import_export::Loader(catalog, foreign_table_, callback, false);
355 }
356 
359  const LazyParquetImporter::RowGroupMetadataVector& metadata_vector) {
360  auto callback =
361  [this, &metadata_vector](
362  const std::vector<std::unique_ptr<import_export::TypedImportBuffer>>&
363  import_buffers,
364  std::vector<DataBlockPtr>& data_blocks,
365  size_t import_row_count) {
366  int row_group = metadata_vector[0].row_group_index;
367  last_row_group_ = row_group;
368  if (moveToNextFragment(import_row_count)) {
372  }
373 
374  for (size_t i = 0; i < import_buffers.size(); i++) {
375  auto& import_buffer = import_buffers[i];
376  const auto column = import_buffer->getColumnDesc();
377  auto column_id = column->columnId;
378  ChunkKey chunk_key{
380  const auto& metadata = metadata_vector[i];
381  CHECK(metadata.row_group_index == row_group);
382  CHECK(metadata.metadata_only);
383  loadMetadataChunk(column,
384  chunk_key,
385  data_blocks[i],
386  metadata.num_elements,
387  metadata.has_nulls,
388  metadata.is_all_nulls);
389  }
390 
391  last_fragment_row_count_ += import_row_count;
392  return true;
393  };
394 
395  return new import_export::Loader(catalog, foreign_table_, callback, false);
396 }
397 
398 bool ParquetDataWrapper::moveToNextFragment(size_t new_rows_count) {
399  return (last_fragment_row_count_ + new_rows_count) >
400  static_cast<size_t>(foreign_table_->maxFragRows);
401 }
402 
404  ChunkMetadataVector& chunk_metadata_vector) {
405  chunk_metadata_map_.clear();
407  for (const auto& [chunk_key, chunk_metadata] : chunk_metadata_map_) {
408  chunk_metadata_vector.emplace_back(chunk_key, chunk_metadata);
409  }
410 }
411 
413  const int logical_column_id,
414  const int fragment_id,
415  const size_t physical_byte_size,
416  std::map<ChunkKey, AbstractBuffer*>& required_buffers) {
417  auto catalog = Catalog_Namespace::Catalog::get(db_id_);
418  CHECK(catalog);
419 
420  const ColumnDescriptor* logical_column =
421  schema_->getColumnDescriptor(logical_column_id);
422  auto parquet_column_index = schema_->getParquetColumnIndex(logical_column_id);
423 
424  const Interval<ColumnType> column_interval = {logical_column_id, logical_column_id};
426  fragment_id, column_interval, required_buffers, true, physical_byte_size);
427 
428  const auto& row_group_interval = fragment_to_row_group_interval_map_[fragment_id];
429 
430  StringDictionary* string_dictionary = nullptr;
431  if (logical_column->columnType.is_dict_encoded_string()) {
432  auto dict_descriptor = catalog->getMetadataForDictUnlocked(
433  logical_column->columnType.get_comp_param(), true);
434  CHECK(dict_descriptor);
435  string_dictionary = dict_descriptor->stringDict.get();
436  }
437 
438  LazyParquetChunkLoader chunk_loader(getFilePath());
439  Chunk_NS::Chunk chunk{logical_column};
440  if (logical_column->columnType.is_varlen_indeed()) {
441  ChunkKey data_chunk_key = {
442  db_id_, foreign_table_->tableId, logical_column_id, fragment_id, 1};
443  auto buffer = required_buffers[data_chunk_key];
444  CHECK(buffer);
445  chunk.setBuffer(buffer);
446  ChunkKey index_chunk_key = {
447  db_id_, foreign_table_->tableId, logical_column_id, fragment_id, 2};
448  CHECK(required_buffers.find(index_chunk_key) != required_buffers.end());
449  chunk.setIndexBuffer(required_buffers[index_chunk_key]);
450  } else {
451  ChunkKey chunk_key = {
452  db_id_, foreign_table_->tableId, logical_column_id, fragment_id};
453  auto buffer = required_buffers[chunk_key];
454  CHECK(buffer);
455  chunk.setBuffer(buffer);
456  }
457  auto metadata = chunk_loader.loadChunk(
458  {row_group_interval.start_row_group_index, row_group_interval.end_row_group_index},
459  parquet_column_index,
460  chunk,
461  string_dictionary);
462 
463  if (logical_column->columnType
464  .is_dict_encoded_string()) { // update metadata for dictionary encoded strings
465  CHECK(metadata.get());
466  auto fragmenter = foreign_table_->fragmenter;
467  if (fragmenter) {
468  fragmenter->updateColumnChunkMetadata(logical_column, fragment_id, metadata);
469  }
470  }
471 }
472 
474  const int logical_column_id,
475  const int fragment_id,
476  std::map<ChunkKey, AbstractBuffer*>& required_buffers) {
477  auto catalog = Catalog_Namespace::Catalog::get(db_id_);
478  CHECK(catalog);
479 
480  const ColumnDescriptor* logical_column =
481  schema_->getColumnDescriptor(logical_column_id);
482  const Interval<ColumnType> column_interval = {
483  logical_column->columnId,
484  logical_column->columnId + logical_column->columnType.get_physical_cols()};
485  initializeChunkBuffers(fragment_id, column_interval, required_buffers);
486 
488  LazyParquetImporter importer(
489  getChunkLoader(*catalog, column_interval, db_id_, fragment_id, required_buffers),
490  getFilePath(),
492  metadata_vector,
493  *schema_);
494  const auto& row_group_interval = fragment_to_row_group_interval_map_[fragment_id];
495  importer.partialImport(
496  {row_group_interval.start_row_group_index, row_group_interval.end_row_group_index},
497  column_interval);
498 }
499 
501  std::map<ChunkKey, AbstractBuffer*>& required_buffers,
502  std::map<ChunkKey, AbstractBuffer*>& optional_buffers) {
503  std::unique_ptr<parquet::arrow::FileReader> reader;
504  open_parquet_table(getFilePath(), reader);
505 
506  CHECK(!required_buffers.empty());
507  auto fragment_id = required_buffers.begin()->first[CHUNK_KEY_FRAGMENT_IDX];
508 
509  std::set<int> logical_column_ids;
510  for (const auto& [chunk_key, buffer] : required_buffers) {
511  CHECK_EQ(fragment_id, chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
512  CHECK_EQ(buffer->size(), static_cast<size_t>(0));
513  const auto column_id =
514  schema_->getLogicalColumn(chunk_key[CHUNK_KEY_COLUMN_IDX])->columnId;
515  logical_column_ids.emplace(column_id);
516  }
517 
518  for (const auto column_id : logical_column_ids) {
519  const ColumnDescriptor* column_descriptor = schema_->getColumnDescriptor(column_id);
520  auto parquet_column_index = schema_->getParquetColumnIndex(column_id);
522  column_descriptor, get_column_descriptor(reader, parquet_column_index))) {
523  auto physical_byte_size = get_physical_type_byte_size(reader, parquet_column_index);
525  column_id, fragment_id, physical_byte_size, required_buffers);
526  } else {
527  loadBuffersUsingLazyParquetImporter(column_id, fragment_id, required_buffers);
528  }
529  }
530 }
531 
532 } // namespace foreign_storage
void initEncoder(const SQLTypeInfo tmp_sql_type)
void partialImport(const Interval< RowGroupType > &row_group_interval, const Interval< ColumnType > &column_interval, const bool metadata_scan=false)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::map< std::string, std::string, std::less<> > options
size_t get_physical_type_byte_size(std::unique_ptr< parquet::arrow::FileReader > &reader, const int logical_column_index)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
void loadMetadataChunk(const ColumnDescriptor *column, const ChunkKey &chunk_key, DataBlockPtr &data_block, const size_t import_count, const bool has_nulls, const bool is_all_nulls)
static bool isColumnMappingSupported(const ColumnDescriptor *omnisci_column, const parquet::ColumnDescriptor *parquet_column)
std::map< int, FragmentToRowGroupInterval > fragment_to_row_group_interval_map_
std::unique_ptr< ForeignTableSchema > schema_
static std::shared_ptr< Catalog > get(const std::string &dbName)
Definition: Catalog.cpp:3671
static constexpr std::array< char const *, 1 > supported_options
Definition: ForeignTable.h:27
void updateStatsForEncoder(Encoder *encoder, const SQLTypeInfo type_info, const DataBlockPtr &data_block, const size_t import_count)
std::vector< RowGroupMetadata > RowGroupMetadataVector
void updateFragmentMap(int fragment_index, int row_group)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:268
std::shared_ptr< ChunkMetadata > loadChunk(const Interval< RowGroupType > &row_group_interval, const int parquet_column_index, Chunk_NS::Chunk &chunk, StringDictionary *string_dictionary=nullptr)
#define UNREACHABLE()
Definition: Logger.h:241
std::string validateAndGetStringWithLength(const std::string &option_name, const size_t expected_num_chars)
ParquetDataWrapper(const int db_id, const ForeignTable *foreign_table)
void initializeChunkBuffers(const int fragment_index, const Interval< ColumnType > &column_interval, std::map< ChunkKey, AbstractBuffer *> &required_buffers, const bool reserve_buffers_and_set_stats=false, const size_t physical_byte_size=0)
bool is_varlen() const
Definition: sqltypes.h:431
const parquet::ColumnDescriptor * get_column_descriptor(std::unique_ptr< parquet::arrow::FileReader > &reader, const int logical_column_index)
void setNumElems(const size_t num_elems)
Definition: Encoder.h:215
std::string to_string(char const *&&v)
int32_t StringOffsetT
Definition: sqltypes.h:867
std::pair< typename std::map< ChunkKey, T >::iterator, typename std::map< ChunkKey, T >::iterator > prefix_range(std::map< ChunkKey, T > &map, const ChunkKey &chunk_key_prefix)
import_export::CopyParams validateAndGetCopyParams()
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > chunk_metadata_map_
bool is_dict_encoded_string() const
Definition: sqltypes.h:443
void open_parquet_table(const std::string &file_path, std::unique_ptr< parquet::arrow::FileReader > &reader)
void loadBuffersUsingLazyParquetChunkLoader(const int logical_column_id, const int fragment_id, const size_t physical_byte_size, std::map< ChunkKey, AbstractBuffer *> &required_buffers)
int get_physical_cols() const
Definition: sqltypes.h:280
specifies the content in-memory of a row in the column metadata table
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
bool moveToNextFragment(size_t new_rows_count)
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:611
void setSize(const size_t num_bytes) override
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:40
std::list< const ColumnDescriptor * > getColumnsToInitialize(const Interval< ColumnType > &column_interval)
Definition: sqltypes.h:54
void loadChunk(const ColumnDescriptor *column, const ChunkKey &chunk_key, DataBlockPtr &data_block, const size_t import_count, std::map< ChunkKey, AbstractBuffer *> &required_buffers)
import_export::Loader * getChunkLoader(Catalog_Namespace::Catalog &catalog, const Interval< ColumnType > &column_interval, const int db_id, const int fragment_index, std::map< ChunkKey, AbstractBuffer *> &required_buffers)
void populateChunkMetadata(ChunkMetadataVector &chunk_metadata_vector) override
Definition: sqltypes.h:43
const ForeignServer * foreign_server
Definition: ForeignTable.h:26
size_t getNumElems() const
Definition: Encoder.h:214
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:39
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
std::vector< int > ChunkKey
Definition: types.h:35
import_export::Loader * getMetadataLoader(Catalog_Namespace::Catalog &catalog, const LazyParquetImporter::RowGroupMetadataVector &metadata_vector)
static void validateOptions(const ForeignTable *foreign_table)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata > >> ChunkMetadataVector
SQLTypeInfo columnType
int8_t * numbersPtr
Definition: sqltypes.h:149
virtual void updateStats(const int64_t val, const bool is_null)=0
void loadBuffersUsingLazyParquetImporter(const int logical_column_id, const int fragment_id, std::map< ChunkKey, AbstractBuffer *> &required_buffers)
std::unique_ptr< Encoder > encoder
bool is_varlen_indeed() const
Definition: sqltypes.h:437
void populateChunkBuffers(std::map< ChunkKey, AbstractBuffer *> &required_buffers, std::map< ChunkKey, AbstractBuffer *> &optional_buffers) override
static constexpr std::array< char const *, 4 > supported_options_