OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Importer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Importer.h
19  * @brief Importer class for table import from file
20  *
21  */
22 
23 #ifndef _IMPORTER_H_
24 #define _IMPORTER_H_
25 
26 #include <atomic>
27 #include <boost/filesystem.hpp>
28 #include <boost/noncopyable.hpp>
29 #include <boost/tokenizer.hpp>
30 #include <condition_variable>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <iostream>
34 #include <list>
35 #include <map>
36 #include <memory>
37 #include <mutex>
38 #include <set>
39 #include <string>
40 #include <string_view>
41 #include <utility>
42 
43 #include "AbstractImporter.h"
44 #include "Catalog/Catalog.h"
46 #include "DataMgr/Chunk/Chunk.h"
47 #if defined(ENABLE_IMPORT_PARQUET)
49 #endif
50 #include "Fragmenter/Fragmenter.h"
51 #include "Geospatial/GDAL.h"
53 #include "Logger/Logger.h"
55 #include "Shared/checked_alloc.h"
56 #include "Shared/fixautotools.h"
57 
58 // Some builds of boost::geometry require iostream, but don't explicitly include it.
59 // Placing in own section to ensure it's included after iostream.
60 #include <boost/geometry/index/rtree.hpp>
61 
62 class TDatum;
63 class TColumn;
64 
65 namespace arrow {
66 
67 class Array;
68 
69 } // namespace arrow
70 
71 namespace import_export {
72 
73 class Importer;
74 
75 using ArraySliceRange = std::pair<size_t, size_t>;
76 
78  std::mutex mutex;
79  std::set<int64_t> rows;
80  std::atomic<int> nerrors;
81  std::string file_name;
82  int row_group;
84 };
85 
87  public:
88  static ArrayDatum composeNullArray(const SQLTypeInfo& ti);
89  static ArrayDatum composeNullPointCoords(const SQLTypeInfo& coords_ti,
90  const SQLTypeInfo& geo_ti);
91 };
92 
93 class TypedImportBuffer : boost::noncopyable {
94  public:
95  using OptionalStringVector = std::optional<std::vector<std::string>>;
96  TypedImportBuffer(const ColumnDescriptor* col_desc, StringDictionary* string_dict)
97  : column_desc_(col_desc), string_dict_(string_dict) {
98  switch (col_desc->columnType.get_type()) {
99  case kBOOLEAN:
100  bool_buffer_ = new std::vector<int8_t>();
101  break;
102  case kTINYINT:
103  tinyint_buffer_ = new std::vector<int8_t>();
104  break;
105  case kSMALLINT:
106  smallint_buffer_ = new std::vector<int16_t>();
107  break;
108  case kINT:
109  int_buffer_ = new std::vector<int32_t>();
110  break;
111  case kBIGINT:
112  case kNUMERIC:
113  case kDECIMAL:
114  bigint_buffer_ = new std::vector<int64_t>();
115  break;
116  case kFLOAT:
117  float_buffer_ = new std::vector<float>();
118  break;
119  case kDOUBLE:
120  double_buffer_ = new std::vector<double>();
121  break;
122  case kTEXT:
123  case kVARCHAR:
124  case kCHAR:
125  string_buffer_ = new std::vector<std::string>();
126  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
127  switch (col_desc->columnType.get_size()) {
128  case 1:
129  string_dict_i8_buffer_ = new std::vector<uint8_t>();
130  break;
131  case 2:
132  string_dict_i16_buffer_ = new std::vector<uint16_t>();
133  break;
134  case 4:
135  string_dict_i32_buffer_ = new std::vector<int32_t>();
136  break;
137  default:
138  CHECK(false);
139  }
140  }
141  break;
142  case kDATE:
143  case kTIME:
144  case kTIMESTAMP:
145  bigint_buffer_ = new std::vector<int64_t>();
146  break;
147  case kARRAY:
148  if (IS_STRING(col_desc->columnType.get_subtype())) {
150  string_array_buffer_ = new std::vector<OptionalStringVector>();
151  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
152  } else {
153  array_buffer_ = new std::vector<ArrayDatum>();
154  }
155  break;
156  case kPOINT:
157  case kMULTIPOINT:
158  case kLINESTRING:
159  case kMULTILINESTRING:
160  case kPOLYGON:
161  case kMULTIPOLYGON:
162  geo_string_buffer_ = new std::vector<std::string>();
163  break;
164  default:
165  CHECK(false);
166  }
167  }
168 
170  switch (column_desc_->columnType.get_type()) {
171  case kBOOLEAN:
172  delete bool_buffer_;
173  break;
174  case kTINYINT:
175  delete tinyint_buffer_;
176  break;
177  case kSMALLINT:
178  delete smallint_buffer_;
179  break;
180  case kINT:
181  delete int_buffer_;
182  break;
183  case kBIGINT:
184  case kNUMERIC:
185  case kDECIMAL:
186  delete bigint_buffer_;
187  break;
188  case kFLOAT:
189  delete float_buffer_;
190  break;
191  case kDOUBLE:
192  delete double_buffer_;
193  break;
194  case kTEXT:
195  case kVARCHAR:
196  case kCHAR:
197  delete string_buffer_;
199  switch (column_desc_->columnType.get_size()) {
200  case 1:
201  delete string_dict_i8_buffer_;
202  break;
203  case 2:
205  break;
206  case 4:
208  break;
209  }
210  }
211  break;
212  case kDATE:
213  case kTIME:
214  case kTIMESTAMP:
215  delete bigint_buffer_;
216  break;
217  case kARRAY:
219  delete string_array_buffer_;
221  } else {
222  delete array_buffer_;
223  }
224  break;
225  case kPOINT:
226  case kMULTIPOINT:
227  case kLINESTRING:
228  case kMULTILINESTRING:
229  case kPOLYGON:
230  case kMULTIPOLYGON:
231  delete geo_string_buffer_;
232  break;
233  default:
234  CHECK(false);
235  }
236  }
237 
238  void addBoolean(const int8_t v) { bool_buffer_->push_back(v); }
239 
240  void addTinyint(const int8_t v) { tinyint_buffer_->push_back(v); }
241 
242  void addSmallint(const int16_t v) { smallint_buffer_->push_back(v); }
243 
244  void addInt(const int32_t v) { int_buffer_->push_back(v); }
245 
246  void addBigint(const int64_t v) { bigint_buffer_->push_back(v); }
247 
248  void addFloat(const float v) { float_buffer_->push_back(v); }
249 
250  void addDouble(const double v) { double_buffer_->push_back(v); }
251 
252  void addString(const std::string_view v) { string_buffer_->emplace_back(v); }
253 
254  void addGeoString(const std::string_view v) { geo_string_buffer_->emplace_back(v); }
255 
256  void addArray(const ArrayDatum& v) { array_buffer_->push_back(v); }
257 
259  string_array_buffer_->emplace_back(std::vector<std::string>{});
260  return string_array_buffer_->back();
261  }
262 
264  string_array_buffer_->push_back(arr);
265  }
266 
267  void addDictEncodedString(const std::vector<std::string>& string_vec);
268 
270  const std::vector<OptionalStringVector>& string_array_vec) {
272 
273  // first check data is ok
274  for (auto& p : string_array_vec) {
275  if (!p) {
276  continue;
277  }
278  for (const auto& str : *p) {
279  if (str.size() > StringDictionary::MAX_STRLEN) {
280  throw std::runtime_error("String too long for dictionary encoding.");
281  }
282  }
283  }
284 
285  // to avoid copying, create a string view of each string in the
286  // `string_array_vec` where the array holding the string is *not null*
287  std::vector<std::vector<std::string_view>> string_view_array_vec;
288  for (auto& p : string_array_vec) {
289  if (!p) {
290  continue;
291  }
292  auto& array = string_view_array_vec.emplace_back();
293  for (const auto& str : *p) {
294  array.emplace_back(str);
295  }
296  }
297 
298  std::vector<std::vector<int32_t>> ids_array(0);
299  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
300 
301  size_t i, j;
302  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
303  if (!string_array_vec[i]) { // null array
304  string_array_dict_buffer_->push_back(
306  } else { // non-null array
307  auto& p = ids_array[j++];
308  size_t len = p.size() * sizeof(int32_t);
309  auto a = static_cast<int32_t*>(checked_malloc(len));
310  memcpy(a, &p[0], len);
311  string_array_dict_buffer_->push_back(
312  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
313  }
314  }
315  }
316 
317  const SQLTypeInfo& getTypeInfo() const { return column_desc_->columnType; }
318 
319  const ColumnDescriptor* getColumnDesc() const { return column_desc_; }
320 
322 
323  int8_t* getAsBytes() const {
324  switch (column_desc_->columnType.get_type()) {
325  case kBOOLEAN:
326  return reinterpret_cast<int8_t*>(bool_buffer_->data());
327  case kTINYINT:
328  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
329  case kSMALLINT:
330  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
331  case kINT:
332  return reinterpret_cast<int8_t*>(int_buffer_->data());
333  case kBIGINT:
334  case kNUMERIC:
335  case kDECIMAL:
336  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
337  case kFLOAT:
338  return reinterpret_cast<int8_t*>(float_buffer_->data());
339  case kDOUBLE:
340  return reinterpret_cast<int8_t*>(double_buffer_->data());
341  case kDATE:
342  case kTIME:
343  case kTIMESTAMP:
344  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
345  default:
346  abort();
347  }
348  }
349 
350  size_t getElementSize() const {
351  switch (column_desc_->columnType.get_type()) {
352  case kBOOLEAN:
353  return sizeof((*bool_buffer_)[0]);
354  case kTINYINT:
355  return sizeof((*tinyint_buffer_)[0]);
356  case kSMALLINT:
357  return sizeof((*smallint_buffer_)[0]);
358  case kINT:
359  return sizeof((*int_buffer_)[0]);
360  case kBIGINT:
361  case kNUMERIC:
362  case kDECIMAL:
363  return sizeof((*bigint_buffer_)[0]);
364  case kFLOAT:
365  return sizeof((*float_buffer_)[0]);
366  case kDOUBLE:
367  return sizeof((*double_buffer_)[0]);
368  case kDATE:
369  case kTIME:
370  case kTIMESTAMP:
371  return sizeof((*bigint_buffer_)[0]);
372  default:
373  abort();
374  }
375  }
376 
377  std::vector<std::string>* getStringBuffer() const { return string_buffer_; }
378 
379  std::vector<std::string>* getGeoStringBuffer() const { return geo_string_buffer_; }
380 
381  std::vector<ArrayDatum>* getArrayBuffer() const { return array_buffer_; }
382 
383  std::vector<OptionalStringVector>* getStringArrayBuffer() const {
384  return string_array_buffer_;
385  }
386 
387  std::vector<ArrayDatum>* getStringArrayDictBuffer() const {
389  }
390 
391  int8_t* getStringDictBuffer() const {
392  switch (column_desc_->columnType.get_size()) {
393  case 1:
394  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
395  case 2:
396  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
397  case 4:
398  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
399  default:
400  abort();
401  }
402  }
403 
405  if (string_dict_ == nullptr) {
406  return true;
407  }
408  return string_dict_->checkpoint();
409  }
410 
411  void clear() {
412  switch (column_desc_->columnType.get_type()) {
413  case kBOOLEAN: {
414  bool_buffer_->clear();
415  break;
416  }
417  case kTINYINT: {
418  tinyint_buffer_->clear();
419  break;
420  }
421  case kSMALLINT: {
422  smallint_buffer_->clear();
423  break;
424  }
425  case kINT: {
426  int_buffer_->clear();
427  break;
428  }
429  case kBIGINT:
430  case kNUMERIC:
431  case kDECIMAL: {
432  bigint_buffer_->clear();
433  break;
434  }
435  case kFLOAT: {
436  float_buffer_->clear();
437  break;
438  }
439  case kDOUBLE: {
440  double_buffer_->clear();
441  break;
442  }
443  case kTEXT:
444  case kVARCHAR:
445  case kCHAR: {
446  string_buffer_->clear();
448  switch (column_desc_->columnType.get_size()) {
449  case 1:
450  string_dict_i8_buffer_->clear();
451  break;
452  case 2:
453  string_dict_i16_buffer_->clear();
454  break;
455  case 4:
456  string_dict_i32_buffer_->clear();
457  break;
458  default:
459  CHECK(false);
460  }
461  }
462  break;
463  }
464  case kDATE:
465  case kTIME:
466  case kTIMESTAMP:
467  bigint_buffer_->clear();
468  break;
469  case kARRAY: {
471  string_array_buffer_->clear();
472  string_array_dict_buffer_->clear();
473  } else {
474  array_buffer_->clear();
475  }
476  break;
477  }
478  case kPOINT:
479  case kMULTIPOINT:
480  case kLINESTRING:
481  case kMULTILINESTRING:
482  case kPOLYGON:
483  case kMULTIPOLYGON:
484  geo_string_buffer_->clear();
485  break;
486  default:
487  CHECK(false);
488  }
489  }
490 
491  size_t add_values(const ColumnDescriptor* cd, const TColumn& data);
492 
493  size_t add_arrow_values(const ColumnDescriptor* cd,
494  const arrow::Array& data,
495  const bool exact_type_match,
496  const ArraySliceRange& slice_range,
497  BadRowsTracker* bad_rows_tracker);
498 
499  void add_value(const ColumnDescriptor* cd,
500  const std::string_view val,
501  const bool is_null,
502  const CopyParams& copy_params,
503  const bool check_not_null = true);
504 
505  void add_value(const ColumnDescriptor* cd, const TDatum& val, const bool is_null);
506 
507  void addDefaultValues(const ColumnDescriptor* cd, size_t num_rows);
508 
509  void pop_value();
510 
511  template <typename DATA_TYPE>
513  const arrow::Array& array,
514  std::vector<DATA_TYPE>& buffer,
515  const ArraySliceRange& slice_range,
516  BadRowsTracker* const bad_rows_tracker);
517  template <typename DATA_TYPE>
518  auto del_values(std::vector<DATA_TYPE>& buffer, BadRowsTracker* const bad_rows_tracker);
519  auto del_values(const SQLTypes type, BadRowsTracker* const bad_rows_tracker);
520 
521  static std::vector<DataBlockPtr> get_data_block_pointers(
522  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers);
523 
524  std::vector<std::unique_ptr<TypedImportBuffer>>* import_buffers;
525  size_t col_idx;
526 
527  private:
528  union {
529  std::vector<int8_t>* bool_buffer_;
530  std::vector<int8_t>* tinyint_buffer_;
531  std::vector<int16_t>* smallint_buffer_;
532  std::vector<int32_t>* int_buffer_;
533  std::vector<int64_t>* bigint_buffer_;
534  std::vector<float>* float_buffer_;
535  std::vector<double>* double_buffer_;
536  std::vector<std::string>* string_buffer_;
537  std::vector<std::string>* geo_string_buffer_;
538  std::vector<ArrayDatum>* array_buffer_;
539  std::vector<OptionalStringVector>* string_array_buffer_;
540  };
541  union {
542  std::vector<uint8_t>* string_dict_i8_buffer_;
543  std::vector<uint16_t>* string_dict_i16_buffer_;
544  std::vector<int32_t>* string_dict_i32_buffer_;
545  std::vector<ArrayDatum>* string_array_dict_buffer_;
546  };
549 };
550 
551 class Loader {
552  using LoadCallbackType =
553  std::function<bool(const std::vector<std::unique_ptr<TypedImportBuffer>>&,
554  std::vector<DataBlockPtr>&,
555  size_t)>;
556 
557  public:
558  // ParquetDataWrapper
560  const TableDescriptor* t,
561  LoadCallbackType load_callback = nullptr)
562  : catalog_(c)
563  , table_desc_(t)
564  , column_descs_(c.getAllColumnMetadataForTable(t->tableId, false, false, true))
565  , load_callback_(load_callback) {
566  init();
567  }
568 
569  virtual ~Loader() {}
570 
572  const TableDescriptor* getTableDesc() const { return table_desc_; }
573  const std::list<const ColumnDescriptor*>& get_column_descs() const {
574  return column_descs_;
575  }
576 
578  if ((cd->columnType.get_type() != kARRAY ||
579  !IS_STRING(cd->columnType.get_subtype())) &&
580  (!cd->columnType.is_string() ||
582  return nullptr;
583  }
584  return dict_map_.at(cd->columnId);
585  }
586 
587  virtual bool load(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
588  const size_t row_count,
589  const Catalog_Namespace::SessionInfo* session_info);
590  virtual bool loadNoCheckpoint(
591  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
592  const size_t row_count,
593  const Catalog_Namespace::SessionInfo* session_info);
594  virtual void checkpoint();
595  virtual std::vector<Catalog_Namespace::TableEpochInfo> getTableEpochs() const;
596  virtual void setTableEpochs(
597  const std::vector<Catalog_Namespace::TableEpochInfo>& table_epochs);
598 
599  void setAddingColumns(const bool adding_columns) { adding_columns_ = adding_columns; }
600  bool isAddingColumns() const { return adding_columns_; }
601  void dropColumns(const std::vector<int>& columns);
602  std::string getErrorMessage() { return error_msg_; };
603 
604  protected:
605  void init();
606 
607  virtual bool loadImpl(
608  const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
609  size_t row_count,
610  bool checkpoint,
611  const Catalog_Namespace::SessionInfo* session_info);
612 
613  using OneShardBuffers = std::vector<std::unique_ptr<TypedImportBuffer>>;
614  void distributeToShards(std::vector<OneShardBuffers>& all_shard_import_buffers,
615  std::vector<size_t>& all_shard_row_counts,
616  const OneShardBuffers& import_buffers,
617  const size_t row_count,
618  const size_t shard_count,
619  const Catalog_Namespace::SessionInfo* session_info);
620 
623  std::list<const ColumnDescriptor*> column_descs_;
626  std::map<int, StringDictionary*> dict_map_;
627 
628  private:
629  bool loadToShard(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
630  size_t row_count,
631  const TableDescriptor* shard_table,
632  bool checkpoint,
633  const Catalog_Namespace::SessionInfo* session_info);
635  std::vector<OneShardBuffers>& all_shard_import_buffers,
636  std::vector<size_t>& all_shard_row_counts,
637  const OneShardBuffers& import_buffers,
638  const size_t row_count,
639  const size_t shard_count,
640  const Catalog_Namespace::SessionInfo* session_info);
642  std::vector<OneShardBuffers>& all_shard_import_buffers,
643  std::vector<size_t>& all_shard_row_counts,
644  const OneShardBuffers& import_buffers,
645  const size_t row_count,
646  const size_t shard_count,
647  const Catalog_Namespace::SessionInfo* session_info);
648  void fillShardRow(const size_t row_index,
649  OneShardBuffers& shard_output_buffers,
650  const OneShardBuffers& import_buffers);
651 
652  bool adding_columns_ = false;
653  std::mutex loader_mutex_;
654  std::string error_msg_;
655 };
656 
657 struct ImportStatus {
658  std::chrono::steady_clock::time_point start;
659  std::chrono::steady_clock::time_point end;
663  std::chrono::duration<size_t, std::milli> elapsed;
664  bool load_failed = false;
665  std::string load_msg;
666  int thread_id; // to recall thread_id after thread exit
668  : start(std::chrono::steady_clock::now())
669  , rows_completed(0)
670  , rows_estimated(0)
671  , rows_rejected(0)
672  , elapsed(0)
673  , thread_id(0) {}
674 
678  if (is.load_failed) {
679  load_failed = true;
680  load_msg = is.load_msg;
681  }
682 
683  return *this;
684  }
685 };
686 
688  public:
690  DataStreamSink(const CopyParams& copy_params, const std::string file_path)
691  : copy_params(copy_params), file_path(file_path) {}
692  virtual ~DataStreamSink() {}
694  const std::string& file_path,
695  const bool decompressed,
696  const Catalog_Namespace::SessionInfo* session_info) = 0;
697 #ifdef ENABLE_IMPORT_PARQUET
698  virtual void import_parquet(std::vector<std::string>& file_paths,
699  const Catalog_Namespace::SessionInfo* session_info);
700  virtual void import_local_parquet(
701  const std::string& file_path,
702  const Catalog_Namespace::SessionInfo* session_info) = 0;
703 #endif
704  const CopyParams& get_copy_params() const { return copy_params; }
705  void import_compressed(std::vector<std::string>& file_paths,
706  const Catalog_Namespace::SessionInfo* session_info);
707 
708  protected:
710 
712  const std::string file_path;
713  FILE* p_file = nullptr;
716  size_t total_file_size{0};
717  std::vector<size_t> file_offsets;
718  std::mutex file_offsets_mutex;
719 };
720 
721 class Detector : public DataStreamSink {
722  public:
723  Detector(const boost::filesystem::path& fp, CopyParams& cp);
724 
725 #ifdef ENABLE_IMPORT_PARQUET
726  void import_local_parquet(const std::string& file_path,
727  const Catalog_Namespace::SessionInfo* session_info) override;
728 #endif
729  static SQLTypes detect_sqltype(const std::string& str);
730  std::vector<std::string> get_headers();
731  std::vector<std::vector<std::string>> raw_rows;
732  std::vector<std::vector<std::string>> get_sample_rows(size_t n);
733  bool has_headers = false;
734 
735  std::vector<SQLTypeInfo> getBestColumnTypes() const;
736 
737  static constexpr size_t kDefaultSampleRowsCount{100};
738 
739  private:
740  void init();
741  void read_file();
742  void detect_row_delimiter();
743  void split_raw_data();
744  std::vector<SQLTypes> detect_column_types(const std::vector<std::string>& row);
745  static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b);
746  void find_best_sqltypes();
747  std::vector<SQLTypes> find_best_sqltypes(
748  const std::vector<std::vector<std::string>>& raw_rows,
749  const CopyParams& copy_params);
750  std::vector<SQLTypes> find_best_sqltypes(
751  const std::vector<std::vector<std::string>>::const_iterator& row_begin,
752  const std::vector<std::vector<std::string>>::const_iterator& row_end,
753  const CopyParams& copy_params);
754 
755  std::vector<EncodingType> find_best_encodings(
756  const std::vector<std::vector<std::string>>::const_iterator& row_begin,
757  const std::vector<std::vector<std::string>>::const_iterator& row_end,
758  const std::vector<SQLTypes>& best_types);
759 
760  bool detect_headers(const std::vector<SQLTypes>& first_types,
761  const std::vector<SQLTypes>& rest_types);
764  const std::string& file_path,
765  const bool decompressed,
766  const Catalog_Namespace::SessionInfo* session_info) override;
767  std::string raw_data;
768  boost::filesystem::path file_path;
769  std::chrono::duration<double> timeout{1};
770  std::string line1;
771 #if defined(ENABLE_IMPORT_PARQUET)
772  std::optional<foreign_storage::DataPreview> data_preview_;
773 #endif
774  std::vector<SQLTypes> best_sqltypes;
775  std::vector<EncodingType> best_encodings;
776 };
777 
778 class Importer : public DataStreamSink, public AbstractImporter {
779  public:
781  const TableDescriptor* t,
782  const std::string& f,
783  const CopyParams& p);
784  Importer(Loader* providedLoader, const std::string& f, const CopyParams& p);
785  ~Importer() override;
786  ImportStatus import(const Catalog_Namespace::SessionInfo* session_info) override;
788  const std::string& file_path,
789  const bool decompressed,
790  const Catalog_Namespace::SessionInfo* session_info) override;
791  ImportStatus importGDAL(const std::map<std::string, std::string>& colname_to_src,
792  const Catalog_Namespace::SessionInfo* session_info,
793  const bool is_raster);
794  const CopyParams& get_copy_params() const { return copy_params; }
795  const std::list<const ColumnDescriptor*>& get_column_descs() const {
796  return loader->get_column_descs();
797  }
798  void load(const std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
799  size_t row_count,
800  const Catalog_Namespace::SessionInfo* session_info);
801  std::vector<std::vector<std::unique_ptr<TypedImportBuffer>>>& get_import_buffers_vec() {
802  return import_buffers_vec;
803  }
804  std::vector<std::unique_ptr<TypedImportBuffer>>& get_import_buffers(int i) {
805  return import_buffers_vec[i];
806  }
807  const bool* get_is_array() const { return is_array_a.get(); }
808 #ifdef ENABLE_IMPORT_PARQUET
809  void import_local_parquet(const std::string& file_path,
810  const Catalog_Namespace::SessionInfo* session_info) override;
811 #endif
812  static ImportStatus get_import_status(const std::string& id);
813  static void set_import_status(const std::string& id, const ImportStatus is);
814  static const std::list<ColumnDescriptor> gdalToColumnDescriptors(
815  const std::string& fileName,
816  const bool is_raster,
817  const std::string& geoColumnName,
818  const CopyParams& copy_params);
819  static void readMetadataSampleGDAL(
820  const std::string& fileName,
821  const std::string& geoColumnName,
822  std::map<std::string, std::vector<std::string>>& metadata,
823  int rowLimit,
824  const CopyParams& copy_params);
825  static bool gdalFileExists(const std::string& path, const CopyParams& copy_params);
826  static bool gdalFileOrDirectoryExists(const std::string& path,
827  const CopyParams& copy_params);
828  static std::vector<std::string> gdalGetAllFilesInArchive(
829  const std::string& archive_path,
830  const CopyParams& copy_params);
833  GeoFileLayerInfo(const std::string& name_, GeoFileLayerContents contents_)
834  : name(name_), contents(contents_) {}
835  std::string name;
837  };
838  static std::vector<GeoFileLayerInfo> gdalGetLayersInGeoFile(
839  const std::string& file_name,
840  const CopyParams& copy_params);
841  Catalog_Namespace::Catalog& getCatalog() { return loader->getCatalog(); }
842  static void set_geo_physical_import_buffer(
843  const Catalog_Namespace::Catalog& catalog,
844  const ColumnDescriptor* cd,
845  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
846  size_t& col_idx,
847  std::vector<double>& coords,
848  std::vector<double>& bounds,
849  std::vector<int>& ring_sizes,
850  std::vector<int>& poly_rings,
851  int render_group,
852  const bool force_null = false);
854  const Catalog_Namespace::Catalog& catalog,
855  const ColumnDescriptor* cd,
856  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers,
857  size_t& col_idx,
858  std::vector<std::vector<double>>& coords_column,
859  std::vector<std::vector<double>>& bounds_column,
860  std::vector<std::vector<int>>& ring_sizes_column,
861  std::vector<std::vector<int>>& poly_rings_column,
862  std::vector<int>& render_groups_column);
863  void checkpoint(const std::vector<Catalog_Namespace::TableEpochInfo>& table_epochs);
864  auto getLoader() const { return loader.get(); }
865 
866  private:
867  static bool gdalStatInternal(const std::string& path,
868  const CopyParams& copy_params,
869  bool also_dir);
871  const std::string& fileName,
872  const CopyParams& copy_params);
873 
874  ImportStatus importGDALGeo(const std::map<std::string, std::string>& colname_to_src,
875  const Catalog_Namespace::SessionInfo* session_info);
877 
878  static const std::list<ColumnDescriptor> gdalToColumnDescriptorsGeo(
879  const std::string& fileName,
880  const std::string& geoColumnName,
881  const CopyParams& copy_params);
882  static const std::list<ColumnDescriptor> gdalToColumnDescriptorsRaster(
883  const std::string& fileName,
884  const std::string& geoColumnName,
885  const CopyParams& copy_params);
886 
887  std::string import_id;
888  size_t file_size;
889  size_t max_threads;
890  char* buffer[2];
891  std::vector<std::vector<std::unique_ptr<TypedImportBuffer>>> import_buffers_vec;
892  std::unique_ptr<Loader> loader;
893  std::unique_ptr<bool[]> is_array_a;
894  static std::mutex init_gdal_mutex;
895 };
896 
897 std::vector<std::unique_ptr<TypedImportBuffer>> setup_column_loaders(
898  const TableDescriptor* td,
899  Loader* loader);
900 
901 std::vector<std::unique_ptr<TypedImportBuffer>> fill_missing_columns(
903  Fragmenter_Namespace::InsertData& insert_data);
904 
905 std::unique_ptr<AbstractImporter> create_importer(
907  const TableDescriptor* td,
908  const std::string& copy_from_source,
909  const import_export::CopyParams& copy_params);
910 
911 } // namespace import_export
912 
913 #endif // _IMPORTER_H_
std::pair< size_t, size_t > ArraySliceRange
Definition: Importer.h:75
Loader(Catalog_Namespace::Catalog &c, const TableDescriptor *t, LoadCallbackType load_callback=nullptr)
Definition: Importer.h:559
const std::list< const ColumnDescriptor * > & get_column_descs() const
Definition: Importer.h:573
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:382
virtual std::vector< Catalog_Namespace::TableEpochInfo > getTableEpochs() const
Definition: Importer.cpp:4674
ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override
Definition: Importer.cpp:4443
ImportStatus importGDAL(const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info, const bool is_raster)
Definition: Importer.cpp:5326
std::mutex loader_mutex_
Definition: Importer.h:653
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:317
StringDictionary * getStringDictionary() const
Definition: Importer.h:321
ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override
Definition: Importer.cpp:3163
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
void addBigint(const int64_t v)
Definition: Importer.h:246
std::string cat(Ts &&...args)
OptionalStringVector & addStringArray()
Definition: Importer.h:258
void addSmallint(const int16_t v)
Definition: Importer.h:242
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
Definition: sqltypes.h:66
SQLTypes
Definition: sqltypes.h:55
TypedImportBuffer(const ColumnDescriptor *col_desc, StringDictionary *string_dict)
Definition: Importer.h:96
void import_compressed(std::vector< std::string > &file_paths, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:4164
const TableDescriptor * getTableDesc() const
Definition: Importer.h:572
void dropColumns(const std::vector< int > &columns)
Definition: Importer.cpp:3132
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
void addString(const std::string_view v)
Definition: Importer.h:252
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
void find_best_sqltypes_and_headers()
Definition: Importer.cpp:3415
std::vector< SQLTypeInfo > getBestColumnTypes() const
Definition: Importer.cpp:3576
StringDictionary * string_dict_
Definition: Importer.h:548
void load(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3592
std::atomic< int > nerrors
Definition: Importer.h:80
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:394
static void set_geo_physical_import_buffer_columnar(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, std::vector< int > &render_groups_column)
Definition: Importer.cpp:1732
void addDouble(const double v)
Definition: Importer.h:250
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
Definition: Importer.cpp:6230
Importer(Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
Definition: Importer.cpp:171
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
const bool * get_is_array() const
Definition: Importer.h:807
std::vector< ArrayDatum > * getStringArrayDictBuffer() const
Definition: Importer.h:387
const TableDescriptor * table_desc_
Definition: Importer.h:622
virtual void checkpoint()
Definition: Importer.cpp:4666
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:6215
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > & get_import_buffers_vec()
Definition: Importer.h:801
ImportStatus importGDALGeo(const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:5336
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:774
static std::vector< GeoFileLayerInfo > gdalGetLayersInGeoFile(const std::string &file_name, const CopyParams &copy_params)
Definition: Importer.cpp:5257
std::chrono::duration< size_t, std::milli > elapsed
Definition: Importer.h:663
void distributeToShards(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2949
const CopyParams & get_copy_params() const
Definition: Importer.h:704
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
std::vector< std::unique_ptr< TypedImportBuffer > > & get_import_buffers(int i)
Definition: Importer.h:804
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:5118
GeoFileLayerInfo(const std::string &name_, GeoFileLayerContents contents_)
Definition: Importer.h:833
static bool gdalFileExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5153
std::unique_ptr< AbstractImporter > create_importer(Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
Definition: Importer.cpp:6302
std::vector< double > * double_buffer_
Definition: Importer.h:535
constexpr double f
Definition: Utm.h:31
void addFloat(const float v)
Definition: Importer.h:248
Fragmenter_Namespace::InsertData insert_data_
Definition: Importer.h:625
std::vector< std::string > * getStringBuffer() const
Definition: Importer.h:377
size_t add_values(const ColumnDescriptor *cd, const TColumn &data)
Definition: Importer.cpp:976
DataStreamSink(const CopyParams &copy_params, const std::string file_path)
Definition: Importer.h:690
ImportStatus & operator+=(const ImportStatus &is)
Definition: Importer.h:675
constexpr double a
Definition: Utm.h:32
void addStringArray(const OptionalStringVector &arr)
Definition: Importer.h:263
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
This file contains the class specification and related data structures for Catalog.
void addGeoString(const std::string_view v)
Definition: Importer.h:254
virtual ImportStatus importDelimited(const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info)=0
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:3293
std::vector< EncodingType > find_best_encodings(const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
Definition: Importer.cpp:3491
auto del_values(std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
void setAddingColumns(const bool adding_columns)
Definition: Importer.h:599
std::vector< std::unique_ptr< TypedImportBuffer >> OneShardBuffers
Definition: Importer.h:613
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3014
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:238
auto getLoader() const
Definition: Importer.h:864
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
void addDictEncodedStringArray(const std::vector< OptionalStringVector > &string_array_vec)
Definition: Importer.h:269
void addTinyint(const int8_t v)
Definition: Importer.h:240
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
static void readMetadataSampleGDAL(const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
Definition: Importer.cpp:4727
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
int8_t * getAsBytes() const
Definition: Importer.h:323
std::string error_msg_
Definition: Importer.h:654
void addInt(const int32_t v)
Definition: Importer.h:244
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:841
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:731
virtual bool load(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, const size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2761
specifies the content in-memory of a row in the column metadata table
void fillShardRow(const size_t row_index, OneShardBuffers &shard_output_buffers, const OneShardBuffers &import_buffers)
Definition: Importer.cpp:2816
DEVICE Array()
Definition: heavydbTypes.h:230
bool isAddingColumns() const
Definition: Importer.h:600
std::vector< EncodingType > best_encodings
Definition: Importer.h:775
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:891
static constexpr size_t kDefaultSampleRowsCount
Definition: Importer.h:737
bool g_enable_smem_group_by true
boost::filesystem::path file_path
Definition: Importer.h:768
size_t getElementSize() const
Definition: Importer.h:350
int8_t * getStringDictBuffer() const
Definition: Importer.h:391
static bool gdalFileOrDirectoryExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5158
std::set< int64_t > rows
Definition: Importer.h:79
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:893
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:524
bool checkpoint() noexcept
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:239
Definition: sqltypes.h:69
Definition: sqltypes.h:70
Detector(const boost::filesystem::path &fp, CopyParams &cp)
Definition: Importer.cpp:3748
static Geospatial::GDAL::DataSourceUqPtr openGDALDataSource(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4685
virtual bool loadNoCheckpoint(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, const size_t row_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2754
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:389
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b)
Definition: Importer.cpp:3391
std::list< const ColumnDescriptor * > column_descs_
Definition: Importer.h:623
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
Definition: GDAL.h:48
static const std::list< ColumnDescriptor > gdalToColumnDescriptorsGeo(const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:5006
static ArrayDatum composeNullPointCoords(const SQLTypeInfo &coords_ti, const SQLTypeInfo &geo_ti)
Definition: Importer.cpp:398
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
bool loadToShard(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, const TableDescriptor *shard_table, bool checkpoint, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3075
Catalog_Namespace::Catalog & getCatalog() const
Definition: Importer.h:571
ImportStatus archivePlumber(const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:3638
std::string getErrorMessage()
Definition: Importer.h:602
std::chrono::duration< double > timeout
Definition: Importer.h:769
Definition: sqltypes.h:58
std::vector< std::string > * getGeoStringBuffer() const
Definition: Importer.h:379
std::vector< std::vector< std::string > > get_sample_rows(size_t n)
Definition: Importer.cpp:3542
bool detect_headers(const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
Definition: Importer.cpp:3527
#define IS_STRING(T)
Definition: sqltypes.h:299
std::string import_id
Definition: Importer.h:887
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
size_t add_arrow_values(const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
Definition: Importer.cpp:873
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3602
std::chrono::steady_clock::time_point start
Definition: Importer.h:658
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
virtual bool loadImpl(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, bool checkpoint, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2982
static const std::list< ColumnDescriptor > gdalToColumnDescriptorsRaster(const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4934
std::vector< SQLTypes > detect_column_types(const std::vector< std::string > &row)
Definition: Importer.cpp:3383
std::vector< std::string > get_headers()
Definition: Importer.cpp:3557
Catalog_Namespace::Catalog & catalog_
Definition: Importer.h:621
const CopyParams & get_copy_params() const
Definition: Importer.h:794
bool g_enable_watchdog false
Definition: Execute.cpp:79
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
ImportStatus importGDALRaster(const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:5649
#define CHECK(condition)
Definition: Logger.h:291
std::string raw_data
Definition: Importer.h:767
static ImportStatus get_import_status(const std::string &id)
Definition: Importer.cpp:230
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:319
StringDictionary * getStringDict(const ColumnDescriptor *cd) const
Definition: Importer.h:577
void distributeToShardsExistingColumns(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2890
static const std::list< ColumnDescriptor > gdalToColumnDescriptors(const std::string &fileName, const bool is_raster, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4922
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5230
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
static constexpr size_t MAX_STRLEN
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1627
void addDefaultValues(const ColumnDescriptor *cd, size_t num_rows)
Definition: Importer.cpp:1446
Definition: sqltypes.h:62
SQLTypeInfo columnType
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:383
bool is_string() const
Definition: sqltypes.h:580
LoadCallbackType load_callback_
Definition: Importer.h:624
void distributeToShardsNewColumns(std::vector< OneShardBuffers > &all_shard_import_buffers, std::vector< size_t > &all_shard_row_counts, const OneShardBuffers &import_buffers, const size_t row_count, const size_t shard_count, const Catalog_Namespace::SessionInfo *session_info)
Definition: Importer.cpp:2928
constexpr double n
Definition: Utm.h:38
std::shared_timed_mutex shared_mutex
static std::mutex init_gdal_mutex
Definition: Importer.h:894
std::vector< size_t > file_offsets
Definition: Importer.h:717
void add_value(const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
Definition: Importer.cpp:528
std::map< int, StringDictionary * > dict_map_
Definition: Importer.h:626
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537
std::chrono::steady_clock::time_point end
Definition: Importer.h:659
std::vector< ArrayDatum > * getArrayBuffer() const
Definition: Importer.h:381
heavyai::shared_mutex import_mutex_
Definition: Importer.h:715
std::unique_ptr< Loader > loader
Definition: Importer.h:892
std::function< bool(const std::vector< std::unique_ptr< TypedImportBuffer >> &, std::vector< DataBlockPtr > &, size_t)> LoadCallbackType
Definition: Importer.h:555
void addDictEncodedString(const std::vector< std::string > &string_vec)
Definition: Importer.cpp:488
const std::list< const ColumnDescriptor * > & get_column_descs() const
Definition: Importer.h:795
const std::string file_path
Definition: Importer.h:712
virtual void setTableEpochs(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:4679