OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Importer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Importer.h
19  * @author Wei Hong < wei@mapd.com>
20  * @brief Importer class for table import from file
21  */
22 #ifndef _IMPORTER_H_
23 #define _IMPORTER_H_
24 
25 #include <atomic>
26 #include <boost/filesystem.hpp>
27 #include <boost/noncopyable.hpp>
28 #include <boost/tokenizer.hpp>
29 #include <condition_variable>
30 #include <cstdio>
31 #include <cstdlib>
32 #include <iostream>
33 #include <list>
34 #include <map>
35 #include <memory>
36 #include <mutex>
37 #include <set>
38 #include <string>
39 #include <string_view>
40 #include <utility>
41 
42 #include "AbstractImporter.h"
43 #include "Catalog/Catalog.h"
45 #include "DataMgr/Chunk/Chunk.h"
46 #if defined(ENABLE_IMPORT_PARQUET)
48 #endif
49 #include "Fragmenter/Fragmenter.h"
50 #include "Geospatial/GDAL.h"
52 #include "Logger/Logger.h"
54 #include "Shared/checked_alloc.h"
55 #include "Shared/fixautotools.h"
56 
57 // Some builds of boost::geometry require iostream, but don't explicitly include it.
58 // Placing in own section to ensure it's included after iostream.
59 #include <boost/geometry/index/rtree.hpp>
60 
61 class TDatum;
62 class TColumn;
63 
64 namespace arrow {
65 
66 class Array;
67 
68 } // namespace arrow
69 
70 namespace import_export {
71 
72 class Importer;
73 
74 using ArraySliceRange = std::pair<size_t, size_t>;
75 
77  std::mutex mutex;
78  std::set<int64_t> rows;
79  std::atomic<int> nerrors;
80  std::string file_name;
81  int row_group;
83 };
84 
86  public:
87  static ArrayDatum composeNullArray(const SQLTypeInfo& ti);
88  static ArrayDatum composeNullPointCoords(const SQLTypeInfo& coords_ti,
89  const SQLTypeInfo& geo_ti);
90 };
91 
92 class TypedImportBuffer : boost::noncopyable {
93  public:
94  using OptionalStringVector = std::optional<std::vector<std::string>>;
95  TypedImportBuffer(const ColumnDescriptor* col_desc, StringDictionary* string_dict)
96  : column_desc_(col_desc), string_dict_(string_dict) {
97  switch (col_desc->columnType.get_type()) {
98  case kBOOLEAN:
99  bool_buffer_ = new std::vector<int8_t>();
100  break;
101  case kTINYINT:
102  tinyint_buffer_ = new std::vector<int8_t>();
103  break;
104  case kSMALLINT:
105  smallint_buffer_ = new std::vector<int16_t>();
106  break;
107  case kINT:
108  int_buffer_ = new std::vector<int32_t>();
109  break;
110  case kBIGINT:
111  case kNUMERIC:
112  case kDECIMAL:
113  bigint_buffer_ = new std::vector<int64_t>();
114  break;
115  case kFLOAT:
116  float_buffer_ = new std::vector<float>();
117  break;
118  case kDOUBLE:
119  double_buffer_ = new std::vector<double>();
120  break;
121  case kTEXT:
122  case kVARCHAR:
123  case kCHAR:
124  string_buffer_ = new std::vector<std::string>();
125  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
126  switch (col_desc->columnType.get_size()) {
127  case 1:
128  string_dict_i8_buffer_ = new std::vector<uint8_t>();
129  break;
130  case 2:
131  string_dict_i16_buffer_ = new std::vector<uint16_t>();
132  break;
133  case 4:
134  string_dict_i32_buffer_ = new std::vector<int32_t>();
135  break;
136  default:
137  CHECK(false);
138  }
139  }
140  break;
141  case kDATE:
142  case kTIME:
143  case kTIMESTAMP:
144  bigint_buffer_ = new std::vector<int64_t>();
145  break;
146  case kARRAY:
147  if (IS_STRING(col_desc->columnType.get_subtype())) {
149  string_array_buffer_ = new std::vector<OptionalStringVector>();
150  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
151  } else {
152  array_buffer_ = new std::vector<ArrayDatum>();
153  }
154  break;
155  case kPOINT:
156  case kLINESTRING:
157  case kPOLYGON:
158  case kMULTIPOLYGON:
159  geo_string_buffer_ = new std::vector<std::string>();
160  break;
161  default:
162  CHECK(false);
163  }
164  }
165 
167  switch (column_desc_->columnType.get_type()) {
168  case kBOOLEAN:
169  delete bool_buffer_;
170  break;
171  case kTINYINT:
172  delete tinyint_buffer_;
173  break;
174  case kSMALLINT:
175  delete smallint_buffer_;
176  break;
177  case kINT:
178  delete int_buffer_;
179  break;
180  case kBIGINT:
181  case kNUMERIC:
182  case kDECIMAL:
183  delete bigint_buffer_;
184  break;
185  case kFLOAT:
186  delete float_buffer_;
187  break;
188  case kDOUBLE:
189  delete double_buffer_;
190  break;
191  case kTEXT:
192  case kVARCHAR:
193  case kCHAR:
194  delete string_buffer_;
196  switch (column_desc_->columnType.get_size()) {
197  case 1:
198  delete string_dict_i8_buffer_;
199  break;
200  case 2:
202  break;
203  case 4:
205  break;
206  }
207  }
208  break;
209  case kDATE:
210  case kTIME:
211  case kTIMESTAMP:
212  delete bigint_buffer_;
213  break;
214  case kARRAY:
216  delete string_array_buffer_;
218  } else {
219  delete array_buffer_;
220  }
221  break;
222  case kPOINT:
223  case kLINESTRING:
224  case kPOLYGON:
225  case kMULTIPOLYGON:
226  delete geo_string_buffer_;
227  break;
228  default:
229  CHECK(false);
230  }
231  }
232 
233  void addBoolean(const int8_t v) { bool_buffer_->push_back(v); }
234 
235  void addTinyint(const int8_t v) { tinyint_buffer_->push_back(v); }
236 
237  void addSmallint(const int16_t v) { smallint_buffer_->push_back(v); }
238 
239  void addInt(const int32_t v) { int_buffer_->push_back(v); }
240 
241  void addBigint(const int64_t v) { bigint_buffer_->push_back(v); }
242 
243  void addFloat(const float v) { float_buffer_->push_back(v); }
244 
245  void addDouble(const double v) { double_buffer_->push_back(v); }
246 
247  void addString(const std::string_view v) { string_buffer_->emplace_back(v); }
248 
249  void addGeoString(const std::string_view v) { geo_string_buffer_->emplace_back(v); }
250 
251  void addArray(const ArrayDatum& v) { array_buffer_->push_back(v); }
252 
254  string_array_buffer_->emplace_back(std::vector<std::string>{});
255  return string_array_buffer_->back();
256  }
257 
259  string_array_buffer_->push_back(arr);
260  }
261 
262  void addDictEncodedString(const std::vector<std::string>& string_vec);
263 
265  const std::vector<OptionalStringVector>& string_array_vec) {
267 
268  // first check data is ok
269  for (auto& p : string_array_vec) {
270  if (!p) {
271  continue;
272  }
273  for (const auto& str : *p) {
274  if (str.size() > StringDictionary::MAX_STRLEN) {
275  throw std::runtime_error("String too long for dictionary encoding.");
276  }
277  }
278  }
279 
280  // to avoid copying, create a string view of each string in the
281  // `string_array_vec` where the array holding the string is *not null*
282  std::vector<std::vector<std::string_view>> string_view_array_vec;
283  for (auto& p : string_array_vec) {
284  if (!p) {
285  continue;
286  }
287  auto& array = string_view_array_vec.emplace_back();
288  for (const auto& str : *p) {
289  array.emplace_back(str);
290  }
291  }
292 
293  std::vector<std::vector<int32_t>> ids_array(0);
294  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
295 
296  size_t i, j;
297  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
298  if (!string_array_vec[i]) { // null array
299  string_array_dict_buffer_->push_back(
301  } else { // non-null array
302  auto& p = ids_array[j++];
303  size_t len = p.size() * sizeof(int32_t);
304  auto a = static_cast<int32_t*>(checked_malloc(len));
305  memcpy(a, &p[0], len);
306  string_array_dict_buffer_->push_back(
307  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
308  }
309  }
310  }
311 
312  const SQLTypeInfo& getTypeInfo() const { return column_desc_->columnType; }
313 
314  const ColumnDescriptor* getColumnDesc() const { return column_desc_; }
315 
317 
318  int8_t* getAsBytes() const {
319  switch (column_desc_->columnType.get_type()) {
320  case kBOOLEAN:
321  return reinterpret_cast<int8_t*>(bool_buffer_->data());
322  case kTINYINT:
323  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
324  case kSMALLINT:
325  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
326  case kINT:
327  return reinterpret_cast<int8_t*>(int_buffer_->data());
328  case kBIGINT:
329  case kNUMERIC:
330  case kDECIMAL:
331  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
332  case kFLOAT:
333  return reinterpret_cast<int8_t*>(float_buffer_->data());
334  case kDOUBLE:
335  return reinterpret_cast<int8_t*>(double_buffer_->data());
336  case kDATE:
337  case kTIME:
338  case kTIMESTAMP:
339  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
340  default:
341  abort();
342  }
343  }
344 
345  size_t getElementSize() const {
346  switch (column_desc_->columnType.get_type()) {
347  case kBOOLEAN:
348  return sizeof((*bool_buffer_)[0]);
349  case kTINYINT:
350  return sizeof((*tinyint_buffer_)[0]);
351  case kSMALLINT:
352  return sizeof((*smallint_buffer_)[0]);
353  case kINT:
354  return sizeof((*int_buffer_)[0]);
355  case kBIGINT:
356  case kNUMERIC:
357  case kDECIMAL:
358  return sizeof((*bigint_buffer_)[0]);
359  case kFLOAT:
360  return sizeof((*float_buffer_)[0]);
361  case kDOUBLE:
362  return sizeof((*double_buffer_)[0]);
363  case kDATE:
364  case kTIME:
365  case kTIMESTAMP:
366  return sizeof((*bigint_buffer_)[0]);
367  default:
368  abort();
369  }
370  }
371 
372  std::vector<std::string>* getStringBuffer() const { return string_buffer_; }
373 
374  std::vector<std::string>* getGeoStringBuffer() const { return geo_string_buffer_; }
375 
376  std::vector<ArrayDatum>* getArrayBuffer() const { return array_buffer_; }
377 
378  std::vector<OptionalStringVector>* getStringArrayBuffer() const {
379  return string_array_buffer_;
380  }
381 
382  std::vector<ArrayDatum>* getStringArrayDictBuffer() const {
384  }
385 
386  int8_t* getStringDictBuffer() const {
387  switch (column_desc_->columnType.get_size()) {
388  case 1:
389  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
390  case 2:
391  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
392  case 4:
393  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
394  default:
395  abort();
396  }
397  }
398 
400  if (string_dict_ == nullptr) {
401  return true;
402  }
403  return string_dict_->checkpoint();
404  }
405 
406  void clear() {
407  switch (column_desc_->columnType.get_type()) {
408  case kBOOLEAN: {
409  bool_buffer_->clear();
410  break;
411  }
412  case kTINYINT: {
413  tinyint_buffer_->clear();
414  break;
415  }
416  case kSMALLINT: {
417  smallint_buffer_->clear();
418  break;
419  }
420  case kINT: {
421  int_buffer_->clear();
422  break;
423  }
424  case kBIGINT:
425  case kNUMERIC:
426  case kDECIMAL: {
427  bigint_buffer_->clear();
428  break;
429  }
430  case kFLOAT: {
431  float_buffer_->clear();
432  break;
433  }
434  case kDOUBLE: {
435  double_buffer_->clear();
436  break;
437  }
438  case kTEXT:
439  case kVARCHAR:
440  case kCHAR: {
441  string_buffer_->clear();
443  switch (column_desc_->columnType.get_size()) {
444  case 1:
445  string_dict_i8_buffer_->clear();
446  break;
447  case 2:
448  string_dict_i16_buffer_->clear();
449  break;
450  case 4:
451  string_dict_i32_buffer_->clear();
452  break;
453  default:
454  CHECK(false);
455  }
456  }
457  break;
458  }
459  case kDATE:
460  case kTIME:
461  case kTIMESTAMP:
462  bigint_buffer_->clear();
463  break;
464  case kARRAY: {
466  string_array_buffer_->clear();
467  string_array_dict_buffer_->clear();
468  } else {
469  array_buffer_->clear();
470  }
471  break;
472  }
473  case kPOINT:
474  case kLINESTRING:
475  case kPOLYGON:
476  case kMULTIPOLYGON:
477  geo_string_buffer_->clear();
478  break;
479  default:
480  CHECK(false);
481  }
482  }
483 
484  size_t add_values(const ColumnDescriptor* cd, const TColumn& data);
485 
486  size_t add_arrow_values(const ColumnDescriptor* cd,
487  const arrow::Array& data,
488  const bool exact_type_match,
489  const ArraySliceRange& slice_range,
490  BadRowsTracker* bad_rows_tracker);
491 
492  void add_value(const ColumnDescriptor* cd,
493  const std::string_view val,
494  const bool is_null,
495  const CopyParams& copy_params,
496  const bool check_not_null = true);
497 
498  void add_value(const ColumnDescriptor* cd, const TDatum& val, const bool is_null);
499 
500  void addDefaultValues(const ColumnDescriptor* cd, size_t num_rows);
501 
502  void pop_value();
503 
504  template <typename DATA_TYPE>
506  const arrow::Array& array,
507  std::vector<DATA_TYPE>& buffer,
508  const ArraySliceRange& slice_range,
509  BadRowsTracker* const bad_rows_tracker);
510  template <typename DATA_TYPE>
511  auto del_values(std::vector<DATA_TYPE>& buffer, BadRowsTracker* const bad_rows_tracker);
512  auto del_values(const SQLTypes type, BadRowsTracker* const bad_rows_tracker);
513 
514  static std::vector<DataBlockPtr> get_data_block_pointers(
515  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers);
516 
517  std::vector<std::unique_ptr<TypedImportBuffer>>* import_buffers;
518  size_t col_idx;
519 
520  private:
521  union {
522  std::vector<int8_t>* bool_buffer_;
523  std::vector<int8_t>* tinyint_buffer_;
524  std::vector<int16_t>* smallint_buffer_;
525  std::vector<int32_t>* int_buffer_;
526  std::vector<int64_t>* bigint_buffer_;
527  std::vector<float>* float_buffer_;
528  std::vector<double>* double_buffer_;
529  std::vector<std::string>* string_buffer_;
530  std::vector<std::string>* geo_string_buffer_;
531  std::vector<ArrayDatum>* array_buffer_;
532  std::vector<OptionalStringVector>* string_array_buffer_;
533  };
534  union {
535  std::vector<uint8_t>* string_dict_i8_buffer_;
536  std::vector<uint16_t>* string_dict_i16_buffer_;
537  std::vector<int32_t>* string_dict_i32_buffer_;
538  std::vector<ArrayDatum>* string_array_dict_buffer_;
539  };
542 };
543 
544 class Loader {
545  using LoadCallbackType =
546  std::function<bool(const std::vector<std::unique_ptr<TypedImportBuffer>>&,
547  std::vector<DataBlockPtr>&,
548  size_t)>;
549 
550  public:
551  // ParquetDataWrapper
553  const TableDescriptor* t,
554  LoadCallbackType load_callback = nullptr)
555  : catalog_(c)
556  , table_desc_(t)
557  , column_descs_(c.getAllColumnMetadataForTable(t->tableId, false, false, true))
558  , load_callback_(load_callback) {
559  init();
560  }
561 
562  virtual ~Loader() {}
563 
565  const TableDescriptor* getTableDesc() const { return table_desc_; }
566  const std::list<const ColumnDescriptor*>& get_column_descs() const {
567  return column_descs_;
568  }
569 
571  if ((cd->columnType.get_type() != kARRAY ||
572  !IS_STRING(cd->columnType.get_subtype())) &&
573  (!cd->columnType.is_string() ||
575  return nullptr;
576  }
577  return dict_map_.at(cd->columnId);
578  }
579 
580  virtual bool load(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
581  const size_t row_count,
582  const Catalog_Namespace::SessionInfo* session_info);
583  virtual bool loadNoCheckpoint(
584  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
585  const size_t row_count,
586  const Catalog_Namespace::SessionInfo* session_info);
587  virtual void checkpoint();
588  virtual std::vector<Catalog_Namespace::TableEpochInfo> getTableEpochs() const;
589  virtual void setTableEpochs(
590  const std::vector<Catalog_Namespace::TableEpochInfo>& table_epochs);
591 
592  void setAddingColumns(const bool adding_columns) { adding_columns_ = adding_columns; }
593  bool isAddingColumns() const { return adding_columns_; }
594  void dropColumns(const std::vector<int>& columns);
595  std::string getErrorMessage() { return error_msg_; };
596 
597  protected:
598  void init();
599 
600  virtual bool loadImpl(
601  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
602  size_t row_count,
603  bool checkpoint,
604  const Catalog_Namespace::SessionInfo* session_info);
605 
606  using OneShardBuffers = std::vector<std::unique_ptr<TypedImportBuffer>>;
607  void distributeToShards(std::vector<OneShardBuffers>& all_shard_import_buffers,
608  std::vector<size_t>& all_shard_row_counts,
609  const OneShardBuffers& import_buffers,
610  const size_t row_count,
611  const size_t shard_count,
612  const Catalog_Namespace::SessionInfo* session_info);
613 
616  std::list<const ColumnDescriptor*> column_descs_;
619  std::map<int, StringDictionary*> dict_map_;
620 
621  private:
622  bool loadToShard(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
623  size_t row_count,
624  const TableDescriptor* shard_table,
625  bool checkpoint,
626  const Catalog_Namespace::SessionInfo* session_info);
628  std::vector<OneShardBuffers>& all_shard_import_buffers,
629  std::vector<size_t>& all_shard_row_counts,
630  const OneShardBuffers& import_buffers,
631  const size_t row_count,
632  const size_t shard_count,
633  const Catalog_Namespace::SessionInfo* session_info);
635  std::vector<OneShardBuffers>& all_shard_import_buffers,
636  std::vector<size_t>& all_shard_row_counts,
637  const OneShardBuffers& import_buffers,
638  const size_t row_count,
639  const size_t shard_count,
640  const Catalog_Namespace::SessionInfo* session_info);
641  void fillShardRow(const size_t row_index,
642  OneShardBuffers& shard_output_buffers,
643  const OneShardBuffers& import_buffers);
644 
645  bool adding_columns_ = false;
646  std::mutex loader_mutex_;
647  std::string error_msg_;
648 };
649 
650 struct ImportStatus {
651  std::chrono::steady_clock::time_point start;
652  std::chrono::steady_clock::time_point end;
656  std::chrono::duration<size_t, std::milli> elapsed;
657  bool load_failed = false;
658  std::string load_msg;
659  int thread_id; // to recall thread_id after thread exit
661  : start(std::chrono::steady_clock::now())
662  , rows_completed(0)
663  , rows_estimated(0)
664  , rows_rejected(0)
665  , elapsed(0)
666  , thread_id(0) {}
667 
671  if (is.load_failed) {
672  load_failed = true;
673  load_msg = is.load_msg;
674  }
675 
676  return *this;
677  }
678 };
679 
681  public:
683  DataStreamSink(const CopyParams& copy_params, const std::string file_path)
684  : copy_params(copy_params), file_path(file_path) {}
685  virtual ~DataStreamSink() {}
687  const std::string& file_path,
688  const bool decompressed,
689  const Catalog_Namespace::SessionInfo* session_info) = 0;
690 #ifdef ENABLE_IMPORT_PARQUET
691  virtual void import_parquet(std::vector<std::string>& file_paths,
692  const Catalog_Namespace::SessionInfo* session_info);
693  virtual void import_local_parquet(
694  const std::string& file_path,
695  const Catalog_Namespace::SessionInfo* session_info) = 0;
696 #endif
697  const CopyParams& get_copy_params() const { return copy_params; }
698  void import_compressed(std::vector<std::string>& file_paths,
699  const Catalog_Namespace::SessionInfo* session_info);
700 
701  protected:
703 
705  const std::string file_path;
706  FILE* p_file = nullptr;
709  size_t total_file_size{0};
710  std::vector<size_t> file_offsets;
711  std::mutex file_offsets_mutex;
712 };
713 
714 class Detector : public DataStreamSink {
715  public:
716  Detector(const boost::filesystem::path& fp, CopyParams& cp);
717 
718 #ifdef ENABLE_IMPORT_PARQUET
719  void import_local_parquet(const std::string& file_path,
720  const Catalog_Namespace::SessionInfo* session_info) override;
721 #endif
722  static SQLTypes detect_sqltype(const std::string& str);
723  std::vector<std::string> get_headers();
724  std::vector<std::vector<std::string>> raw_rows;
725  std::vector<std::vector<std::string>> get_sample_rows(size_t n);
726  bool has_headers = false;
727 
728  std::vector<SQLTypeInfo> getBestColumnTypes() const;
729 
730  static constexpr size_t kDefaultSampleRowsCount{100};
731 
732  private:
733  void init();
734  void read_file();
735  void detect_row_delimiter();
736  void split_raw_data();
737  std::vector<SQLTypes> detect_column_types(const std::vector<std::string>& row);
738  static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b);
739  void find_best_sqltypes();
740  std::vector<SQLTypes> find_best_sqltypes(
741  const std::vector<std::vector<std::string>>& raw_rows,
742  const CopyParams& copy_params);
743  std::vector<SQLTypes> find_best_sqltypes(
744  const std::vector<std::vector<std::string>>::const_iterator& row_begin,
745  const std::vector<std::vector<std::string>>::const_iterator& row_end,
746  const CopyParams& copy_params);
747 
748  std::vector<EncodingType> find_best_encodings(
749  const std::vector<std::vector<std::string>>::const_iterator& row_begin,
750  const std::vector<std::vector<std::string>>::const_iterator& row_end,
751  const std::vector<SQLTypes>& best_types);
752 
753  bool detect_headers(const std::vector<SQLTypes>& first_types,
754  const std::vector<SQLTypes>& rest_types);
757  const std::string& file_path,
758  const bool decompressed,
759  const Catalog_Namespace::SessionInfo* session_info) override;
760  std::string raw_data;
761  boost::filesystem::path file_path;
762  std::chrono::duration<double> timeout{1};
763  std::string line1;
764 #if defined(ENABLE_IMPORT_PARQUET)
765  std::optional<foreign_storage::DataPreview> data_preview_;
766 #endif
767  std::vector<SQLTypes> best_sqltypes;
768  std::vector<EncodingType> best_encodings;
769 };
770 
771 class Importer : public DataStreamSink, public AbstractImporter {
772  public:
774  const TableDescriptor* t,
775  const std::string& f,
776  const CopyParams& p);
777  Importer(Loader* providedLoader, const std::string& f, const CopyParams& p);
778  ~Importer() override;
779  ImportStatus import(const Catalog_Namespace::SessionInfo* session_info) override;
781  const std::string& file_path,
782  const bool decompressed,
783  const Catalog_Namespace::SessionInfo* session_info) override;
784  ImportStatus importGDAL(const std::map<std::string, std::string>& colname_to_src,
785  const Catalog_Namespace::SessionInfo* session_info,
786  const bool is_raster);
787  const CopyParams& get_copy_params() const { return copy_params; }
788  const std::list<const ColumnDescriptor*>& get_column_descs() const {
789  return loader->get_column_descs();
790  }
791  void load(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
792  size_t row_count,
793  const Catalog_Namespace::SessionInfo* session_info);
794  std::vector<std::vector<std::unique_ptr<TypedImportBuffer>>>& get_import_buffers_vec() {
795  return import_buffers_vec;
796  }
797  std::vector<std::unique_ptr<TypedImportBuffer>>& get_import_buffers(int i) {
798  return import_buffers_vec[i];
799  }
800  const bool* get_is_array() const { return is_array_a.get(); }
801 #ifdef ENABLE_IMPORT_PARQUET
802  void import_local_parquet(const std::string& file_path,
803  const Catalog_Namespace::SessionInfo* session_info) override;
804 #endif
805  static ImportStatus get_import_status(const std::string& id);
806  static void set_import_status(const std::string& id, const ImportStatus is);
807  static const std::list<ColumnDescriptor> gdalToColumnDescriptors(
808  const std::string& fileName,
809  const bool is_raster,
810  const std::string& geoColumnName,
811  const CopyParams& copy_params);
812  static void readMetadataSampleGDAL(
813  const std::string& fileName,
814  const std::string& geoColumnName,
815  std::map<std::string, std::vector<std::string>>& metadata,
816  int rowLimit,
817  const CopyParams& copy_params);
818  static bool gdalFileExists(const std::string& path, const CopyParams& copy_params);
819  static bool gdalFileOrDirectoryExists(const std::string& path,
820  const CopyParams& copy_params);
821  static std::vector<std::string> gdalGetAllFilesInArchive(
822  const std::string& archive_path,
823  const CopyParams& copy_params);
826  GeoFileLayerInfo(const std::string& name_, GeoFileLayerContents contents_)
827  : name(name_), contents(contents_) {}
828  std::string name;
830  };
831  static std::vector<GeoFileLayerInfo> gdalGetLayersInGeoFile(
832  const std::string& file_name,
833  const CopyParams& copy_params);
834  Catalog_Namespace::Catalog& getCatalog() { return loader->getCatalog(); }
835  static void set_geo_physical_import_buffer(
836  const Catalog_Namespace::Catalog& catalog,
837  const ColumnDescriptor* cd,
838  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
839  size_t& col_idx,
840  std::vector<double>& coords,
841  std::vector<double>& bounds,
842  std::vector<int>& ring_sizes,
843  std::vector<int>& poly_rings,
844  int render_group,
845  const bool force_null = false);
847  const Catalog_Namespace::Catalog& catalog,
848  const ColumnDescriptor* cd,
849  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
850  size_t& col_idx,
851  std::vector<std::vector<double>>& coords_column,
852  std::vector<std::vector<double>>& bounds_column,
853  std::vector<std::vector<int>>& ring_sizes_column,
854  std::vector<std::vector<int>>& poly_rings_column,
855  std::vector<int>& render_groups_column);
856  void checkpoint(const std::vector<Catalog_Namespace::TableEpochInfo>& table_epochs);
857  auto getLoader() const { return loader.get(); }
858 
859  private:
860  static bool gdalStatInternal(const std::string& path,
861  const CopyParams& copy_params,
862  bool also_dir);
864  const std::string& fileName,
865  const CopyParams& copy_params);
866 
867  ImportStatus importGDALGeo(const std::map<std::string, std::string>& colname_to_src,
868  const Catalog_Namespace::SessionInfo* session_info);
870 
871  static const std::list<ColumnDescriptor> gdalToColumnDescriptorsGeo(
872  const std::string& fileName,
873  const std::string& geoColumnName,
874  const CopyParams& copy_params);
875  static const std::list<ColumnDescriptor> gdalToColumnDescriptorsRaster(
876  const std::string& fileName,
877  const std::string& geoColumnName,
878  const CopyParams& copy_params);
879 
880  std::string import_id;
881  size_t file_size;
882  size_t max_threads;
883  char* buffer[2];
884  std::vector<std::vector<std::unique_ptr<TypedImportBuffer>>> import_buffers_vec;
885  std::unique_ptr<Loader> loader;
886  std::unique_ptr<bool[]> is_array_a;
887  static std::mutex init_gdal_mutex;
888 };
889 
890 std::vector<std::unique_ptr<TypedImportBuffer>> setup_column_loaders(
891  const TableDescriptor* td,
892  Loader* loader);
893 
894 std::vector<std::unique_ptr<TypedImportBuffer>> fill_missing_columns(
896  Fragmenter_Namespace::InsertData& insert_data);
897 
898 std::unique_ptr<AbstractImporter> create_importer(
900  const TableDescriptor* td,
901  const std::string& copy_from_source,
902  const import_export::CopyParams& copy_params);
903 
904 } // namespace import_export
905 
906 #endif // _IMPORTER_H_
std::pair< size_t, size_t > ArraySliceRange
Definition: Importer.h:74
Loader(Catalog_Namespace::Catalog &c, const TableDescriptor *t, LoadCallbackType load_callback=nullptr)
Definition: Importer.h:552
const std::list< const ColumnDescriptor * > & get_column_descs() const
Definition: Importer.h:566
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
virtual std::vector< Catalog_Namespace::TableEpochInfo > getTableEpochs() const
Definition: Importer.cpp:4661
ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override
Definition: Importer.cpp:4437
ImportStatus importGDAL(const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info, const bool is_raster)
Definition: Importer.cpp:5305
std::mutex loader_mutex_
Definition: Importer.h:646
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:312
StringDictionary * getStringDictionary() const
Definition: Importer.h:316
ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override
Definition: Importer.cpp:3179
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void addBigint(const int64_t v)
Definition: Importer.h:241
std::string cat(Ts &&...args)
OptionalStringVector & addStringArray()
Definition: Importer.h:253
void addSmallint(const int16_t v)
Definition: Importer.h:237
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:114
Definition: sqltypes.h:49
SQLTypes
Definition: sqltypes.h:38
TypedImportBuffer(const ColumnDescriptor *col_desc, StringDictionary *string_dict)
Definition: Importer.h:95
void import_compressed(std::vector< std::string > &file_paths, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:4168
const TableDescriptor * getTableDesc() const
Definition: Importer.h:565
void dropColumns(const std::vector< int > &columns)
Definition: Importer.cpp:3148
std::vector< std::string > * string_buffer_
Definition: Importer.h:529
void addString(const std::string_view v)
Definition: Importer.h:247
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:531
void find_best_sqltypes_and_headers()
Definition: Importer.cpp:3420
std::vector< SQLTypeInfo > getBestColumnTypes() const
Definition: Importer.cpp:3581
StringDictionary * string_dict_
Definition: Importer.h:541
void load(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3597
std::atomic< int > nerrors
Definition: Importer.h:79
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:94
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:434
static void set_geo_physical_import_buffer_columnar(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, std::vector< int > &render_groups_column)
Definition: Importer.cpp:1757
void addDouble(const double v)
Definition: Importer.h:245
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
Definition: Importer.cpp:6198
Importer(Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
Definition: Importer.cpp:176
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:524
const bool * get_is_array() const
Definition: Importer.h:800
std::vector< ArrayDatum > * getStringArrayDictBuffer() const
Definition: Importer.h:382
const TableDescriptor * table_desc_
Definition: Importer.h:615
virtual void checkpoint()
Definition: Importer.cpp:4653
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:6183
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > & get_import_buffers_vec()
Definition: Importer.h:794
ImportStatus importGDALGeo(const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:5315
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:767
static std::vector< GeoFileLayerInfo > gdalGetLayersInGeoFile(const std::string &file_name, const CopyParams &copy_params)
Definition: Importer.cpp:5236
std::chrono::duration< size_t, std::milli > elapsed
Definition: Importer.h:656
void distributeToShards(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2965
const CopyParams & get_copy_params() const
Definition: Importer.h:697
std::vector< float > * float_buffer_
Definition: Importer.h:527
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< std::unique_ptr< TypedImportBuffer > > & get_import_buffers(int i)
Definition: Importer.h:797
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:5097
GeoFileLayerInfo(const std::string &name_, GeoFileLayerContents contents_)
Definition: Importer.h:826
static bool gdalFileExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5132
std::unique_ptr< AbstractImporter > create_importer(Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
Definition: Importer.cpp:6270
std::vector< double > * double_buffer_
Definition: Importer.h:528
constexpr double f
Definition: Utm.h:31
void addFloat(const float v)
Definition: Importer.h:243
Fragmenter_Namespace::InsertData insert_data_
Definition: Importer.h:618
std::vector< std::string > * getStringBuffer() const
Definition: Importer.h:372
size_t add_values(const ColumnDescriptor *cd, const TColumn &data)
Definition: Importer.cpp:1008
mapd_shared_mutex import_mutex_
Definition: Importer.h:708
DataStreamSink(const CopyParams &copy_params, const std::string file_path)
Definition: Importer.h:683
ImportStatus & operator+=(const ImportStatus &is)
Definition: Importer.h:668
constexpr double a
Definition: Utm.h:32
void addStringArray(const OptionalStringVector &arr)
Definition: Importer.h:258
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
This file contains the class specification and related data structures for Catalog.
void addGeoString(const std::string_view v)
Definition: Importer.h:249
virtual ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info)=0
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:3308
std::vector< EncodingType > find_best_encodings(const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
Definition: Importer.cpp:3496
auto del_values(std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
void setAddingColumns(const bool adding_columns)
Definition: Importer.h:592
std::vector< std::unique_ptr< TypedImportBuffer >> OneShardBuffers
Definition: Importer.h:606
std::vector< int32_t > * int_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:538
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3030
CONSTEXPR DEVICE bool is_null(const T &value)
DEVICE Array(const int64_t size, const bool is_null=false)
Definition: heavydbTypes.h:127
void addBoolean(const int8_t v)
Definition: Importer.h:233
auto getLoader() const
Definition: Importer.h:857
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:535
void addDictEncodedStringArray(const std::vector< OptionalStringVector > &string_array_vec)
Definition: Importer.h:264
void addTinyint(const int8_t v)
Definition: Importer.h:235
std::shared_timed_mutex mapd_shared_mutex
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:532
static void readMetadataSampleGDAL(const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
Definition: Importer.cpp:4714
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:526
int8_t * getAsBytes() const
Definition: Importer.h:318
std::string error_msg_
Definition: Importer.h:647
void addInt(const int32_t v)
Definition: Importer.h:239
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:834
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:724
virtual bool load(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, const size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2779
specifies the content in-memory of a row in the column metadata table
void fillShardRow(const size_t row_index, OneShardBuffers &shard_output_buffers, const OneShardBuffers &import_buffers)
Definition: Importer.cpp:2834
bool isAddingColumns() const
Definition: Importer.h:593
std::vector< EncodingType > best_encodings
Definition: Importer.h:768
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:522
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:884
static constexpr size_t kDefaultSampleRowsCount
Definition: Importer.h:730
bool g_enable_smem_group_by true
boost::filesystem::path file_path
Definition: Importer.h:761
size_t getElementSize() const
Definition: Importer.h:345
int8_t * getStringDictBuffer() const
Definition: Importer.h:386
static bool gdalFileOrDirectoryExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5137
std::set< int64_t > rows
Definition: Importer.h:78
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:886
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:517
bool checkpoint() noexcept
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:240
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Detector(const boost::filesystem::path &fp, CopyParams &cp)
Definition: Importer.cpp:3754
static Geospatial::GDAL::DataSourceUqPtr openGDALDataSource(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4672
virtual bool loadNoCheckpoint(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, const size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2772
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:537
static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b)
Definition: Importer.cpp:3398
std::list< const ColumnDescriptor * > column_descs_
Definition: Importer.h:616
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
Definition: GDAL.h:48
static const std::list< ColumnDescriptor > gdalToColumnDescriptorsGeo(const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4995
static ArrayDatum composeNullPointCoords(const SQLTypeInfo &coords_ti, const SQLTypeInfo &geo_ti)
Definition: Importer.cpp:438
void addArray(const ArrayDatum &v)
Definition: Importer.h:251
bool loadToShard(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, const TableDescriptor *shard_table, bool checkpoint, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3091
Catalog_Namespace::Catalog & getCatalog() const
Definition: Importer.h:564
ImportStatus archivePlumber(const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3643
std::string getErrorMessage()
Definition: Importer.h:595
std::chrono::duration< double > timeout
Definition: Importer.h:762
Definition: sqltypes.h:41
std::vector< std::string > * getGeoStringBuffer() const
Definition: Importer.h:374
std::vector< std::vector< std::string > > get_sample_rows(size_t n)
Definition: Importer.cpp:3547
bool detect_headers(const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
Definition: Importer.cpp:3532
#define IS_STRING(T)
Definition: sqltypes.h:250
std::string import_id
Definition: Importer.h:880
const ColumnDescriptor * column_desc_
Definition: Importer.h:540
size_t add_arrow_values(const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
Definition: Importer.cpp:907
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3607
std::chrono::steady_clock::time_point start
Definition: Importer.h:651
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:536
virtual bool loadImpl(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, bool checkpoint, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2998
static const std::list< ColumnDescriptor > gdalToColumnDescriptorsRaster(const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4923
std::vector< SQLTypes > detect_column_types(const std::vector< std::string > &row)
Definition: Importer.cpp:3390
std::vector< std::string > get_headers()
Definition: Importer.cpp:3562
Catalog_Namespace::Catalog & catalog_
Definition: Importer.h:614
const CopyParams & get_copy_params() const
Definition: Importer.h:787
bool g_enable_watchdog false
Definition: Execute.cpp:79
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:523
ImportStatus importGDALRaster(const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:5621
#define CHECK(condition)
Definition: Logger.h:223
std::string raw_data
Definition: Importer.h:760
static ImportStatus get_import_status(const std::string &id)
Definition: Importer.cpp:235
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:314
StringDictionary * getStringDict(const ColumnDescriptor *cd) const
Definition: Importer.h:570
void distributeToShardsExistingColumns(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2906
static const std::list< ColumnDescriptor > gdalToColumnDescriptors(const std::string &fileName, const bool is_raster, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4911
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5209
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
static constexpr size_t MAX_STRLEN
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1653
void addDefaultValues(const ColumnDescriptor *cd, size_t num_rows)
Definition: Importer.cpp:1474
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:378
bool is_string() const
Definition: sqltypes.h:510
LoadCallbackType load_callback_
Definition: Importer.h:617
void distributeToShardsNewColumns(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2944
constexpr double n
Definition: Utm.h:38
static std::mutex init_gdal_mutex
Definition: Importer.h:887
std::vector< size_t > file_offsets
Definition: Importer.h:710
void add_value(const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
Definition: Importer.cpp:566
std::map< int, StringDictionary * > dict_map_
Definition: Importer.h:619
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:530
std::chrono::steady_clock::time_point end
Definition: Importer.h:652
std::vector< ArrayDatum > * getArrayBuffer() const
Definition: Importer.h:376
std::unique_ptr< Loader > loader
Definition: Importer.h:885
std::function< bool(const std::vector< std::unique_ptr< TypedImportBuffer >> &, std::vector< DataBlockPtr > &, size_t)> LoadCallbackType
Definition: Importer.h:548
void addDictEncodedString(const std::vector< std::string > &string_vec)
Definition: Importer.cpp:526
const std::list< const ColumnDescriptor * > & get_column_descs() const
Definition: Importer.h:788
const std::string file_path
Definition: Importer.h:705
virtual void setTableEpochs(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:4666