OmniSciDB  a575cb28ea
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParquetDataWrapper.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ParquetDataWrapper.h"
18 
19 #include <regex>
20 
21 #include <arrow/filesystem/localfs.h>
22 #include <boost/filesystem.hpp>
23 
25 #include "FsiJsonUtils.h"
26 #include "ImportExport/Importer.h"
27 #include "LazyParquetChunkLoader.h"
28 #include "ParquetShared.h"
29 #include "Utils/DdlUtils.h"
30 
31 namespace foreign_storage {
32 
33 namespace {
34 void reduce_metadata(std::shared_ptr<ChunkMetadata> reduce_to,
35  std::shared_ptr<ChunkMetadata> reduce_from) {
36  CHECK(reduce_to->sqlType == reduce_from->sqlType);
37  reduce_to->numBytes += reduce_from->numBytes;
38  reduce_to->numElements += reduce_from->numElements;
39  reduce_to->chunkStats.has_nulls |= reduce_from->chunkStats.has_nulls;
40 
41  auto column_type = reduce_to->sqlType;
42  column_type = column_type.is_array() ? column_type.get_elem_type() : column_type;
43 
44  // metadata reducution is done at metadata scan time, both string & geometry
45  // columns have no valid stats to reduce beyond `has_nulls`
46  if (column_type.is_string() || column_type.is_geometry()) {
47  return;
48  }
49 
50  ForeignStorageBuffer buffer_to;
51  buffer_to.initEncoder(column_type);
52  auto encoder_to = buffer_to.getEncoder();
53  encoder_to->resetChunkStats(reduce_to->chunkStats);
54 
55  ForeignStorageBuffer buffer_from;
56  buffer_from.initEncoder(column_type);
57  auto encoder_from = buffer_from.getEncoder();
58  encoder_from->resetChunkStats(reduce_from->chunkStats);
59 
60  encoder_to->reduceStats(*encoder_from);
61  auto updated_metadata = std::make_shared<ChunkMetadata>();
62  encoder_to->getMetadata(updated_metadata);
63  reduce_to->chunkStats = updated_metadata->chunkStats;
64 }
65 
66 } // namespace
67 
68 ParquetDataWrapper::ParquetDataWrapper() : db_id_(-1), foreign_table_(nullptr) {}
69 
70 ParquetDataWrapper::ParquetDataWrapper(const int db_id, const ForeignTable* foreign_table)
71  : db_id_(db_id)
72  , foreign_table_(foreign_table)
73  , last_fragment_index_(0)
74  , last_fragment_row_count_(0)
75  , total_row_count_(0)
76  , last_row_group_(0)
77  , is_restored_(false)
78  , schema_(std::make_unique<ForeignTableSchema>(db_id, foreign_table))
79  , file_reader_cache_(std::make_unique<FileReaderMap>()) {
80  auto& server_options = foreign_table->foreign_server->options;
81  if (server_options.find(STORAGE_TYPE_KEY)->second == LOCAL_FILE_STORAGE_TYPE) {
82  file_system_ = std::make_shared<arrow::fs::LocalFileSystem>();
83  } else {
84  UNREACHABLE();
85  }
86 }
87 
91 
92  last_row_group_ = 0;
95  total_row_count_ = 0;
96  file_reader_cache_->clear();
97 }
98 
99 std::list<const ColumnDescriptor*> ParquetDataWrapper::getColumnsToInitialize(
100  const Interval<ColumnType>& column_interval) {
102  CHECK(catalog);
103  const auto& columns = schema_->getLogicalAndPhysicalColumns();
104  auto column_start = column_interval.start;
105  auto column_end = column_interval.end;
106  std::list<const ColumnDescriptor*> columns_to_init;
107  for (const auto column : columns) {
108  auto column_id = column->columnId;
109  if (column_id >= column_start && column_id <= column_end) {
110  columns_to_init.push_back(column);
111  }
112  }
113  return columns_to_init;
114 }
115 
117  const int fragment_index,
118  const Interval<ColumnType>& column_interval,
119  std::map<ChunkKey, AbstractBuffer*>& required_buffers,
120  const bool reserve_buffers_and_set_stats) {
121  for (const auto column : getColumnsToInitialize(column_interval)) {
122  Chunk_NS::Chunk chunk{column};
123  ChunkKey data_chunk_key;
124  if (column->columnType.is_varlen_indeed()) {
125  data_chunk_key = {
126  db_id_, foreign_table_->tableId, column->columnId, fragment_index, 1};
127  auto data_buffer = required_buffers[data_chunk_key];
128  CHECK(data_buffer);
129  chunk.setBuffer(data_buffer);
130 
131  ChunkKey index_chunk_key{
132  db_id_, foreign_table_->tableId, column->columnId, fragment_index, 2};
133  auto index_buffer = required_buffers[index_chunk_key];
134  CHECK(index_buffer);
135  chunk.setIndexBuffer(index_buffer);
136  } else {
137  data_chunk_key = {
138  db_id_, foreign_table_->tableId, column->columnId, fragment_index};
139  auto data_buffer = required_buffers[data_chunk_key];
140  CHECK(data_buffer);
141  chunk.setBuffer(data_buffer);
142  }
143  chunk.initEncoder();
144  if (reserve_buffers_and_set_stats) {
145  const auto metadata_it = chunk_metadata_map_.find(data_chunk_key);
146  CHECK(metadata_it != chunk_metadata_map_.end());
147  auto buffer = chunk.getBuffer();
148  auto& metadata = metadata_it->second;
149  auto encoder = buffer->getEncoder();
150  encoder->resetChunkStats(metadata->chunkStats);
151  encoder->setNumElems(metadata->numElements);
152  if (column->columnType.is_string() &&
153  column->columnType.get_compression() == kENCODING_NONE) {
154  auto index_buffer = chunk.getIndexBuf();
155  index_buffer->reserve(sizeof(StringOffsetT) * (metadata->numElements + 1));
156  } else if (!column->columnType.is_fixlen_array() && column->columnType.is_array()) {
157  auto index_buffer = chunk.getIndexBuf();
158  index_buffer->reserve(sizeof(ArrayOffsetT) * (metadata->numElements + 1));
159  } else {
160  size_t num_bytes_to_reserve =
161  metadata->numElements * column->columnType.get_size();
162  buffer->reserve(num_bytes_to_reserve);
163  }
164  }
165  }
166 }
167 
171 }
172 
173 void ParquetDataWrapper::addNewFragment(int row_group, const std::string& file_path) {
174  const auto last_fragment_entry =
176  CHECK(last_fragment_entry != fragment_to_row_group_interval_map_.end());
177 
178  last_fragment_entry->second.back().end_index = last_row_group_;
182  RowGroupInterval{file_path, row_group});
183 }
184 
185 bool ParquetDataWrapper::isNewFile(const std::string& file_path) const {
186  const auto last_fragment_entry =
188  CHECK(last_fragment_entry != fragment_to_row_group_interval_map_.end());
189 
190  // The entry for the first fragment starts out as an empty vector
191  if (last_fragment_entry->second.empty()) {
193  return true;
194  } else {
195  return (last_fragment_entry->second.back().file_path != file_path);
196  }
197 }
198 
199 void ParquetDataWrapper::addNewFile(const std::string& file_path) {
200  const auto last_fragment_entry =
202  CHECK(last_fragment_entry != fragment_to_row_group_interval_map_.end());
203 
204  // The entry for the first fragment starts out as an empty vector
205  if (last_fragment_entry->second.empty()) {
207  } else {
208  last_fragment_entry->second.back().end_index = last_row_group_;
209  }
210  last_fragment_entry->second.emplace_back(RowGroupInterval{file_path, 0});
211 }
212 
215  CHECK(catalog);
216  std::set<std::string> new_file_paths;
217  auto processed_file_paths = getProcessedFilePaths();
218  if (foreign_table_->isAppendMode() && !processed_file_paths.empty()) {
219  auto all_file_paths = getAllFilePaths();
220  for (const auto& file_path : processed_file_paths) {
221  if (all_file_paths.find(file_path) == all_file_paths.end()) {
222  throw_removed_file_error(file_path);
223  }
224  }
225 
226  for (const auto& file_path : all_file_paths) {
227  if (processed_file_paths.find(file_path) == processed_file_paths.end()) {
228  new_file_paths.emplace(file_path);
229  }
230  }
231 
232  // Single file append
233  // If an append occurs with multiple files, then we assume any existing files have not
234  // been altered. If an append occurs on a single file, then we check to see if it has
235  // changed.
236  if (new_file_paths.empty() && all_file_paths.size() == 1) {
237  CHECK_EQ(processed_file_paths.size(), static_cast<size_t>(1));
238  const auto& file_path = *all_file_paths.begin();
239  CHECK_EQ(*processed_file_paths.begin(), file_path);
240 
241  // Since an existing file is being appended to we need to update the cached
242  // FileReader as the existing one will be out of date.
243  (*file_reader_cache_)[file_path] = open_parquet_table(file_path, file_system_);
244  auto& reader = file_reader_cache_->at(file_path);
245  size_t row_count = reader->parquet_reader()->metadata()->num_rows();
246 
247  if (row_count < total_row_count_) {
248  throw_removed_row_error(file_path);
249  } else if (row_count > total_row_count_) {
250  new_file_paths = all_file_paths;
251  chunk_metadata_map_.clear();
253  }
254  }
255  } else {
256  new_file_paths = getAllFilePaths();
257  chunk_metadata_map_.clear();
259  }
260 
261  if (!new_file_paths.empty()) {
262  metadataScanFiles(new_file_paths);
263  }
264 }
265 
267  std::set<std::string> file_paths;
268  for (const auto& entry : fragment_to_row_group_interval_map_) {
269  for (const auto& row_group_interval : entry.second) {
270  file_paths.emplace(row_group_interval.file_path);
271  }
272  }
273  return file_paths;
274 }
275 
276 std::set<std::string> ParquetDataWrapper::getAllFilePaths() {
277  auto timer = DEBUG_TIMER(__func__);
278  std::set<std::string> file_paths;
279  arrow::fs::FileSelector file_selector{};
280  std::string base_path = getFullFilePath(foreign_table_);
281  file_selector.base_dir = base_path;
282  file_selector.recursive = true;
283 
284  auto file_info_result = file_system_->GetFileInfo(file_selector);
285  if (!file_info_result.ok()) {
286  // This is expected when `base_path` points to a single file.
287  file_paths.emplace(base_path);
288  } else {
289  auto& file_info_vector = file_info_result.ValueOrDie();
290  for (const auto& file_info : file_info_vector) {
291  if (file_info.type() == arrow::fs::FileType::File) {
292  file_paths.emplace(file_info.path());
293  }
294  }
295  if (file_paths.empty()) {
296  throw std::runtime_error{"No file found at given path \"" + base_path + "\"."};
297  }
298  }
299  return file_paths;
300 }
301 
302 void ParquetDataWrapper::metadataScanFiles(const std::set<std::string>& file_paths) {
304  auto row_group_metadata = chunk_loader.metadataScan(file_paths, *schema_);
305  auto column_interval =
306  Interval<ColumnType>{schema_->getLogicalAndPhysicalColumns().front()->columnId,
307  schema_->getLogicalAndPhysicalColumns().back()->columnId};
308 
309  for (const auto& row_group_metadata_item : row_group_metadata) {
310  const auto& column_chunk_metadata = row_group_metadata_item.column_chunk_metadata;
311  CHECK(static_cast<int>(column_chunk_metadata.size()) ==
312  schema_->numLogicalAndPhysicalColumns());
313  auto column_chunk_metadata_iter = column_chunk_metadata.begin();
314  const int64_t import_row_count = (*column_chunk_metadata_iter)->numElements;
315  int row_group = row_group_metadata_item.row_group_index;
316  const auto& file_path = row_group_metadata_item.file_path;
317  if (moveToNextFragment(import_row_count)) {
318  addNewFragment(row_group, file_path);
319  } else if (isNewFile(file_path)) {
320  CHECK_EQ(row_group, 0);
321  addNewFile(file_path);
322  }
323  last_row_group_ = row_group;
324 
325  for (int column_id = column_interval.start; column_id <= column_interval.end;
326  column_id++, column_chunk_metadata_iter++) {
327  CHECK(column_chunk_metadata_iter != column_chunk_metadata.end());
328  const auto column_descriptor = schema_->getColumnDescriptor(column_id);
329 
330  const auto& type_info = column_descriptor->columnType;
331  ChunkKey chunk_key{
333  ChunkKey data_chunk_key = chunk_key;
334  if (type_info.is_varlen_indeed()) {
335  data_chunk_key.emplace_back(1);
336  }
337  std::shared_ptr<ChunkMetadata> chunk_metadata = *column_chunk_metadata_iter;
338  if (chunk_metadata_map_.find(data_chunk_key) == chunk_metadata_map_.end()) {
339  chunk_metadata_map_[data_chunk_key] = chunk_metadata;
340  } else {
341  reduce_metadata(chunk_metadata_map_[data_chunk_key], chunk_metadata);
342  }
343  }
344  last_fragment_row_count_ += import_row_count;
345  total_row_count_ += import_row_count;
346  }
348 }
349 
350 bool ParquetDataWrapper::moveToNextFragment(size_t new_rows_count) const {
351  return (last_fragment_row_count_ + new_rows_count) >
352  static_cast<size_t>(foreign_table_->maxFragRows);
353 }
354 
356  ChunkMetadataVector& chunk_metadata_vector) {
358  for (const auto& [chunk_key, chunk_metadata] : chunk_metadata_map_) {
359  chunk_metadata_vector.emplace_back(chunk_key, chunk_metadata);
360  }
361 }
362 
364  const int logical_column_id,
365  const int fragment_id,
366  std::map<ChunkKey, AbstractBuffer*>& required_buffers) {
368  CHECK(catalog);
369  const ColumnDescriptor* logical_column =
370  schema_->getColumnDescriptor(logical_column_id);
371  auto parquet_column_index = schema_->getParquetColumnIndex(logical_column_id);
372 
373  const Interval<ColumnType> column_interval = {
374  logical_column_id,
375  logical_column_id + logical_column->columnType.get_physical_cols()};
376  initializeChunkBuffers(fragment_id, column_interval, required_buffers, true);
377 
378  const auto& row_group_intervals = fragment_to_row_group_interval_map_[fragment_id];
379 
380  const bool is_dictionary_encoded_string_column =
381  logical_column->columnType.is_dict_encoded_string() ||
382  (logical_column->columnType.is_array() &&
383  logical_column->columnType.get_elem_type().is_dict_encoded_string());
384 
385  StringDictionary* string_dictionary = nullptr;
386  if (is_dictionary_encoded_string_column) {
387  auto dict_descriptor = catalog->getMetadataForDictUnlocked(
388  logical_column->columnType.get_comp_param(), true);
389  CHECK(dict_descriptor);
390  string_dictionary = dict_descriptor->stringDict.get();
391  }
392 
393  std::list<Chunk_NS::Chunk> chunks;
394  for (int column_id = column_interval.start; column_id <= column_interval.end;
395  ++column_id) {
396  auto column_descriptor = schema_->getColumnDescriptor(column_id);
397  Chunk_NS::Chunk chunk{column_descriptor};
398  if (column_descriptor->columnType.is_varlen_indeed()) {
399  ChunkKey data_chunk_key = {
400  db_id_, foreign_table_->tableId, column_id, fragment_id, 1};
401  auto buffer = required_buffers[data_chunk_key];
402  CHECK(buffer);
403  chunk.setBuffer(buffer);
404  ChunkKey index_chunk_key = {
405  db_id_, foreign_table_->tableId, column_id, fragment_id, 2};
406  auto index_buffer = required_buffers[index_chunk_key];
407  CHECK(index_buffer);
408  chunk.setIndexBuffer(index_buffer);
409  } else {
410  ChunkKey chunk_key = {db_id_, foreign_table_->tableId, column_id, fragment_id};
411  auto buffer = required_buffers[chunk_key];
412  CHECK(buffer);
413  chunk.setBuffer(buffer);
414  }
415  chunks.emplace_back(chunk);
416  }
417 
419  auto metadata = chunk_loader.loadChunk(
420  row_group_intervals, parquet_column_index, chunks, string_dictionary);
421  auto fragmenter = foreign_table_->fragmenter;
422  if (fragmenter) {
423  auto metadata_iter = metadata.begin();
424  for (int column_id = column_interval.start; column_id <= column_interval.end;
425  ++column_id, ++metadata_iter) {
426  auto column = schema_->getColumnDescriptor(column_id);
427  ChunkKey data_chunk_key = {db_id_, foreign_table_->tableId, column_id, fragment_id};
428  if (column->columnType.is_varlen_indeed()) {
429  data_chunk_key.emplace_back(1);
430  }
431  CHECK(chunk_metadata_map_.find(data_chunk_key) != chunk_metadata_map_.end());
432  auto cached_metadata = chunk_metadata_map_[data_chunk_key];
433  auto updated_metadata = std::make_shared<ChunkMetadata>();
434  *updated_metadata = *cached_metadata;
435  // for certain types, update the metadata statistics
436  if (is_dictionary_encoded_string_column ||
437  logical_column->columnType.is_geometry()) {
438  CHECK(metadata_iter != metadata.end());
439  auto& chunk_metadata_ptr = *metadata_iter;
440  updated_metadata->chunkStats.max = chunk_metadata_ptr->chunkStats.max;
441  updated_metadata->chunkStats.min = chunk_metadata_ptr->chunkStats.min;
442  }
443  CHECK(required_buffers.find(data_chunk_key) != required_buffers.end());
444  updated_metadata->numBytes = required_buffers[data_chunk_key]->size();
445  fragmenter->updateColumnChunkMetadata(column, fragment_id, updated_metadata);
446  }
447  }
448 }
449 
451  std::map<ChunkKey, AbstractBuffer*>& required_buffers,
452  std::map<ChunkKey, AbstractBuffer*>& optional_buffers) {
453  CHECK(!required_buffers.empty());
454  auto fragment_id = required_buffers.begin()->first[CHUNK_KEY_FRAGMENT_IDX];
455 
456  std::set<int> logical_column_ids;
457  for (const auto& [chunk_key, buffer] : required_buffers) {
458  CHECK_EQ(fragment_id, chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
459  CHECK_EQ(buffer->size(), static_cast<size_t>(0));
460  const auto column_id =
461  schema_->getLogicalColumn(chunk_key[CHUNK_KEY_COLUMN_IDX])->columnId;
462  logical_column_ids.emplace(column_id);
463  }
464 
465  for (const auto column_id : logical_column_ids) {
466  loadBuffersUsingLazyParquetChunkLoader(column_id, fragment_id, required_buffers);
467  }
468 }
469 
470 void set_value(rapidjson::Value& json_val,
471  const RowGroupInterval& value,
472  rapidjson::Document::AllocatorType& allocator) {
473  json_val.SetObject();
474  json_utils::add_value_to_object(json_val, value.file_path, "file_path", allocator);
475  json_utils::add_value_to_object(json_val, value.start_index, "start_index", allocator);
476  json_utils::add_value_to_object(json_val, value.end_index, "end_index", allocator);
477 }
478 
479 void get_value(const rapidjson::Value& json_val, RowGroupInterval& value) {
480  CHECK(json_val.IsObject());
481  json_utils::get_value_from_object(json_val, value.file_path, "file_path");
482  json_utils::get_value_from_object(json_val, value.start_index, "start_index");
483  json_utils::get_value_from_object(json_val, value.end_index, "end_index");
484 }
485 
487  const std::string& file_path) const {
488  rapidjson::Document d;
489  d.SetObject();
490 
493  "fragment_to_row_group_interval_map",
494  d.GetAllocator());
495  json_utils::add_value_to_object(d, last_row_group_, "last_row_group", d.GetAllocator());
497  d, last_fragment_index_, "last_fragment_index", d.GetAllocator());
499  d, last_fragment_row_count_, "last_fragment_row_count", d.GetAllocator());
501  d, total_row_count_, "total_row_count", d.GetAllocator());
502 
503  json_utils::write_to_file(d, file_path);
504 }
505 
507  const std::string& file_path,
508  const ChunkMetadataVector& chunk_metadata_vector) {
509  auto d = json_utils::read_from_file(file_path);
510  CHECK(d.IsObject());
511 
513  d, fragment_to_row_group_interval_map_, "fragment_to_row_group_interval_map");
515  json_utils::get_value_from_object(d, last_fragment_index_, "last_fragment_index");
517  d, last_fragment_row_count_, "last_fragment_row_count");
519 
520  CHECK(chunk_metadata_map_.empty());
521  for (const auto& [chunk_key, chunk_metadata] : chunk_metadata_vector) {
522  chunk_metadata_map_[chunk_key] = chunk_metadata;
523  }
524  is_restored_ = true;
525 }
526 
528  return is_restored_;
529 }
530 
531 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void loadBuffersUsingLazyParquetChunkLoader(const int logical_column_id, const int fragment_id, std::map< ChunkKey, AbstractBuffer * > &required_buffers)
void populateChunkBuffers(std::map< ChunkKey, AbstractBuffer * > &required_buffers, std::map< ChunkKey, AbstractBuffer * > &optional_buffers) override
std::vector< int > ChunkKey
Definition: types.h:37
std::set< std::string > getProcessedFilePaths()
std::unique_ptr< FileReaderMap > file_reader_cache_
std::set< std::string > getAllFilePaths()
void restoreDataWrapperInternals(const std::string &file_path, const ChunkMetadataVector &chunk_metadata_vector) override
std::unique_ptr< ForeignTableSchema > schema_
void serializeDataWrapperInternals(const std::string &file_path) const override
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
#define UNREACHABLE()
Definition: Logger.h:241
void reduce_metadata(std::shared_ptr< ChunkMetadata > reduce_to, std::shared_ptr< ChunkMetadata > reduce_from)
void get_value(const rapidjson::Value &json_val, FileRegion &file_region)
std::map< const std::string, ReaderPtr > FileReaderMap
Definition: ParquetShared.h:32
std::list< std::unique_ptr< ChunkMetadata > > loadChunk(const std::vector< RowGroupInterval > &row_group_intervals, const int parquet_column_index, std::list< Chunk_NS::Chunk > &chunks, StringDictionary *string_dictionary=nullptr)
void initEncoder(const SQLTypeInfo &tmp_sql_type)
std::map< int, std::vector< RowGroupInterval > > fragment_to_row_group_interval_map_
virtual bool resetChunkStats(const ChunkStats &)
: Reset chunk level stats (min, max, nulls) using new values from the argument.
Definition: Encoder.h:223
void throw_removed_row_error(const std::string &file_path)
int32_t StringOffsetT
Definition: sqltypes.h:919
void throw_removed_file_error(const std::string &file_path)
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > chunk_metadata_map_
void addNewFile(const std::string &file_path)
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: FsiJsonUtils.h:126
int get_physical_cols() const
Definition: sqltypes.h:332
static SysCatalog & instance()
Definition: SysCatalog.h:286
rapidjson::Document read_from_file(const std::string &file_path)
void metadataScanFiles(const std::set< std::string > &file_paths)
void addNewFragment(int row_group, const std::string &file_path)
ReaderPtr open_parquet_table(const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
specifies the content in-memory of a row in the column metadata table
bool isNewFile(const std::string &file_path) const
bool moveToNextFragment(size_t new_rows_count) const
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: FsiJsonUtils.h:111
void write_to_file(const rapidjson::Document &document, const std::string &filepath)
std::shared_ptr< arrow::fs::FileSystem > file_system_
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
bool isAppendMode() const
Checks if the table is in append mode.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
std::list< const ColumnDescriptor * > getColumnsToInitialize(const Interval< ColumnType > &column_interval)
int32_t ArrayOffsetT
Definition: sqltypes.h:920
void populateChunkMetadata(ChunkMetadataVector &chunk_metadata_vector) override
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:320
const ForeignServer * foreign_server
Definition: ForeignTable.h:53
bool g_enable_watchdog false
Definition: Execute.cpp:76
void initializeChunkBuffers(const int fragment_index, const Interval< ColumnType > &column_interval, std::map< ChunkKey, AbstractBuffer * > &required_buffers, const bool reserve_buffers_and_set_stats=false)
#define CHECK(condition)
Definition: Logger.h:197
bool is_geometry() const
Definition: sqltypes.h:490
#define DEBUG_TIMER(name)
Definition: Logger.h:313
bool is_dict_encoded_string() const
Definition: sqltypes.h:512
SQLTypeInfo columnType
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
static std::string getFullFilePath(const ForeignTable *foreign_table)
Returns the path to the source file/dir of the table. Depending on options this may result from a con...
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697
void set_value(rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
bool is_array() const
Definition: sqltypes.h:486
std::list< RowGroupMetadata > metadataScan(const std::set< std::string > &file_paths, const ForeignTableSchema &schema)
Perform a metadata scan for the paths specified.