OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ForeignDataWrapperFactory Class Reference

#include <ForeignDataWrapperFactory.h>

Static Public Member Functions

static std::unique_ptr
< ForeignDataWrapper
create (const std::string &data_wrapper_type, const int db_id, const ForeignTable *foreign_table)
 
static std::unique_ptr
< UserMapping
createUserMappingProxyIfApplicable (const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)
 
static std::unique_ptr
< ForeignServer
createForeignServerProxy (const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams &copy_params)
 
static std::unique_ptr
< ForeignTable
createForeignTableProxy (const int db_id, const TableDescriptor *table, const std::string &file_path, const import_export::CopyParams &copy_params, const ForeignServer *server)
 
static std::unique_ptr
< ForeignDataWrapper
createForImport (const std::string &data_wrapper_type, const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping)
 
static std::unique_ptr
< ForeignDataWrapper
createForGeneralImport (const std::string &data_wrapper_type, const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping)
 
static const ForeignDataWrappercreateForValidation (const std::string &data_wrapper_type, const ForeignTable *foreign_table=nullptr)
 
static void validateDataWrapperType (const std::string &data_wrapper_type)
 

Static Private Attributes

static std::map< std::string,
std::unique_ptr
< ForeignDataWrapper > > 
validation_data_wrappers_
 

Detailed Description

Definition at line 93 of file ForeignDataWrapperFactory.h.

Member Function Documentation

std::unique_ptr< ForeignDataWrapper > foreign_storage::ForeignDataWrapperFactory::create ( const std::string &  data_wrapper_type,
const int  db_id,
const ForeignTable foreign_table 
)
static

Creates an instance of a ForeignDataWrapper for the given data wrapper type using provided database and foreign table details.

Definition at line 307 of file ForeignDataWrapperFactory.cpp.

References foreign_storage::DataWrapperType::CSV, foreign_storage::DataWrapperType::INTERNAL_CATALOG, foreign_storage::DataWrapperType::INTERNAL_MEMORY_STATS, foreign_storage::DataWrapperType::INTERNAL_STORAGE_STATS, foreign_storage::DataWrapperType::PARQUET, foreign_storage::DataWrapperType::REGEX_PARSER, UNREACHABLE, and foreign_storage::CsvDataWrapper::validateAndGetIsS3Select().

310  {
311  std::unique_ptr<ForeignDataWrapper> data_wrapper;
312  if (data_wrapper_type == DataWrapperType::CSV) {
313  if (CsvDataWrapper::validateAndGetIsS3Select(foreign_table)) {
314  UNREACHABLE();
315  } else {
316  data_wrapper = std::make_unique<CsvDataWrapper>(db_id, foreign_table);
317  }
318 #ifdef ENABLE_IMPORT_PARQUET
319  } else if (data_wrapper_type == DataWrapperType::PARQUET) {
320  data_wrapper = std::make_unique<ParquetDataWrapper>(db_id, foreign_table);
321 #endif
322  } else if (data_wrapper_type == DataWrapperType::REGEX_PARSER) {
323  data_wrapper = std::make_unique<RegexParserDataWrapper>(db_id, foreign_table);
324  } else if (data_wrapper_type == DataWrapperType::INTERNAL_CATALOG) {
325  data_wrapper = std::make_unique<InternalCatalogDataWrapper>(db_id, foreign_table);
326  } else if (data_wrapper_type == DataWrapperType::INTERNAL_MEMORY_STATS) {
327  data_wrapper = std::make_unique<InternalMemoryStatsDataWrapper>(db_id, foreign_table);
328  } else if (data_wrapper_type == DataWrapperType::INTERNAL_STORAGE_STATS) {
329  data_wrapper =
330  std::make_unique<InternalStorageStatsDataWrapper>(db_id, foreign_table);
331  } else {
332  throw std::runtime_error("Unsupported data wrapper");
333  }
334  return data_wrapper;
335 }
static constexpr char const * REGEX_PARSER
static constexpr char const * INTERNAL_STORAGE_STATS
#define UNREACHABLE()
Definition: Logger.h:267
static bool validateAndGetIsS3Select(const ForeignTable *foreign_table)
static constexpr char const * INTERNAL_CATALOG
static constexpr char const * INTERNAL_MEMORY_STATS
static constexpr char const * CSV
static constexpr char const * PARQUET

+ Here is the call graph for this function:

std::unique_ptr< ForeignServer > foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy ( const int  db_id,
const int  user_id,
const std::string &  file_path,
const import_export::CopyParams copy_params 
)
static

Definition at line 166 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::DataWrapperType::CSV, foreign_storage::is_s3_uri(), foreign_storage::is_valid_source_type(), import_export::kDelimitedFile, import_export::kOdbc, import_export::kParquetFile, import_export::kRegexParsedFile, foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, foreign_storage::DataWrapperType::PARQUET, foreign_storage::DataWrapperType::REGEX_PARSER, import_export::CopyParams::source_type, foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, and UNREACHABLE.

Referenced by foreign_storage::create_proxy_fsi_objects().

170  {
171  CHECK(is_valid_source_type(copy_params));
172 
173  auto foreign_server = std::make_unique<foreign_storage::ForeignServer>();
174 
175  foreign_server->id = -1;
176  foreign_server->user_id = user_id;
178  foreign_server->data_wrapper_type = DataWrapperType::CSV;
179  } else if (copy_params.source_type == import_export::SourceType::kRegexParsedFile) {
180  foreign_server->data_wrapper_type = DataWrapperType::REGEX_PARSER;
181 #ifdef ENABLE_IMPORT_PARQUET
182  } else if (copy_params.source_type == import_export::SourceType::kParquetFile) {
183  foreign_server->data_wrapper_type = DataWrapperType::PARQUET;
184 #endif
185  } else {
186  UNREACHABLE();
187  }
188  foreign_server->name = "import_proxy_server";
189 
190  if (copy_params.source_type == import_export::SourceType::kOdbc) {
191  throw std::runtime_error("ODBC storage not supported");
192  } else if (is_s3_uri(file_path)) {
193  throw std::runtime_error("AWS storage not supported");
194  } else {
195  foreign_server->options[AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY] =
197  }
198 
199  return foreign_server;
200 }
static constexpr char const * REGEX_PARSER
#define UNREACHABLE()
Definition: Logger.h:267
import_export::SourceType source_type
Definition: CopyParams.h:57
bool is_valid_source_type(const import_export::CopyParams &copy_params)
bool is_s3_uri(const std::string &file_path)
#define CHECK(condition)
Definition: Logger.h:223
static constexpr char const * CSV
static constexpr char const * PARQUET

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< ForeignTable > foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy ( const int  db_id,
const TableDescriptor table,
const std::string &  file_path,
const import_export::CopyParams copy_params,
const ForeignServer server 
)
static

Definition at line 202 of file ForeignDataWrapperFactory.cpp.

References import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, foreign_storage::CsvFileBufferParser::ARRAY_DELIMITER_KEY, import_export::CopyParams::array_end, foreign_storage::CsvFileBufferParser::ARRAY_MARKER_KEY, foreign_storage::bool_to_option_value(), import_export::CopyParams::buffer_size, foreign_storage::TextFileBufferParser::BUFFER_SIZE_KEY, CHECK, import_export::CopyParams::delimiter, foreign_storage::CsvFileBufferParser::DELIMITER_KEY, import_export::CopyParams::escape, foreign_storage::CsvFileBufferParser::ESCAPE_KEY, import_export::CopyParams::file_sort_order_by, foreign_storage::AbstractFileStorageDataWrapper::FILE_SORT_ORDER_BY_KEY, import_export::CopyParams::file_sort_regex, foreign_storage::AbstractFileStorageDataWrapper::FILE_SORT_REGEX_KEY, import_export::CopyParams::geo_assign_render_groups, foreign_storage::CsvFileBufferParser::GEO_ASSIGN_RENDER_GROUPS_KEY, import_export::CopyParams::geo_explode_collections, foreign_storage::CsvFileBufferParser::GEO_EXPLODE_COLLECTIONS_KEY, Catalog_Namespace::SysCatalog::getCatalog(), import_export::CopyParams::has_header, foreign_storage::CsvFileBufferParser::HEADER_KEY, Catalog_Namespace::SysCatalog::instance(), foreign_storage::is_s3_uri(), foreign_storage::is_valid_source_type(), import_export::kAutoDetect, import_export::kDelimitedFile, import_export::kHasHeader, import_export::kNoHeader, import_export::kOdbc, import_export::kParquetFile, import_export::kRegexParsedFile, import_export::CopyParams::line_delim, foreign_storage::CsvFileBufferParser::LINE_DELIMITER_KEY, import_export::CopyParams::line_regex, foreign_storage::RegexFileBufferParser::LINE_REGEX_KEY, import_export::CopyParams::line_start_regex, foreign_storage::RegexFileBufferParser::LINE_START_REGEX_KEY, import_export::CopyParams::lonlat, foreign_storage::CsvFileBufferParser::LONLAT_KEY, import_export::CopyParams::null_str, foreign_storage::CsvFileBufferParser::NULLS_KEY, foreign_storage::OptionsContainer::options, import_export::CopyParams::quote, foreign_storage::CsvFileBufferParser::QUOTE_KEY, import_export::CopyParams::quoted, foreign_storage::CsvFileBufferParser::QUOTED_KEY, import_export::CopyParams::regex_path_filter, foreign_storage::AbstractFileStorageDataWrapper::REGEX_PATH_FILTER_KEY, import_export::CopyParams::source_srid, foreign_storage::CsvFileBufferParser::SOURCE_SRID_KEY, import_export::CopyParams::source_type, import_export::CopyParams::threads, foreign_storage::TextFileBufferParser::THREADS_KEY, and to_string().

Referenced by foreign_storage::create_proxy_fsi_objects().

207  {
208  CHECK(is_valid_source_type(copy_params));
209 
210  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
211  auto foreign_table = std::make_unique<ForeignTable>();
212 
213  *static_cast<TableDescriptor*>(foreign_table.get()) =
214  *table; // copy table related values
215 
216  CHECK(server);
217  foreign_table->foreign_server = server;
218 
219  // populate options for regex filtering of file-paths in supported data types
223  if (copy_params.regex_path_filter.has_value()) {
225  copy_params.regex_path_filter.value();
226  }
227  if (copy_params.file_sort_order_by.has_value()) {
229  copy_params.file_sort_order_by.value();
230  }
231  if (copy_params.file_sort_regex.has_value()) {
233  copy_params.file_sort_regex.value();
234  }
235  }
236 
238  CHECK(!copy_params.line_regex.empty());
239  foreign_table->options[RegexFileBufferParser::LINE_REGEX_KEY] =
240  copy_params.line_regex;
241  if (!copy_params.line_start_regex.empty()) {
242  foreign_table->options[RegexFileBufferParser::LINE_START_REGEX_KEY] =
243  copy_params.line_start_regex;
244  }
245  foreign_table->options[TextFileBufferParser::THREADS_KEY] =
246  std::to_string(copy_params.threads);
247  }
248 
249  // setup data source options based on various criteria
250  if (copy_params.source_type == import_export::SourceType::kOdbc) {
251  throw std::runtime_error("ODBC storage not supported");
252  } else if (is_s3_uri(copy_from_source)) {
253  throw std::runtime_error("AWS storage not supported");
254  } else {
255  foreign_table->options["FILE_PATH"] = copy_from_source;
256  }
257 
258  // for CSV import
260  foreign_table->options[CsvFileBufferParser::DELIMITER_KEY] = copy_params.delimiter;
261  foreign_table->options[CsvFileBufferParser::NULLS_KEY] = copy_params.null_str;
262  switch (copy_params.has_header) {
264  foreign_table->options[CsvFileBufferParser::HEADER_KEY] = "FALSE";
265  break;
268  foreign_table->options[CsvFileBufferParser::HEADER_KEY] = "TRUE";
269  break;
270  default:
271  CHECK(false);
272  }
273  foreign_table->options[CsvFileBufferParser::QUOTED_KEY] =
274  bool_to_option_value(copy_params.quoted);
275  foreign_table->options[CsvFileBufferParser::QUOTE_KEY] = copy_params.quote;
276  foreign_table->options[CsvFileBufferParser::ESCAPE_KEY] = copy_params.escape;
277  foreign_table->options[CsvFileBufferParser::LINE_DELIMITER_KEY] =
278  copy_params.line_delim;
279  foreign_table->options[CsvFileBufferParser::ARRAY_DELIMITER_KEY] =
280  copy_params.array_delim;
281  const std::array<char, 3> array_marker{
282  copy_params.array_begin, copy_params.array_end, 0};
283  foreign_table->options[CsvFileBufferParser::ARRAY_MARKER_KEY] = array_marker.data();
284  foreign_table->options[CsvFileBufferParser::LONLAT_KEY] =
285  bool_to_option_value(copy_params.lonlat);
286  foreign_table->options[CsvFileBufferParser::GEO_ASSIGN_RENDER_GROUPS_KEY] =
288  if (copy_params.geo_explode_collections) {
289  throw std::runtime_error(
290  "geo_explode_collections is not yet supported for FSI CSV import");
291  }
292  foreign_table->options[CsvFileBufferParser::GEO_EXPLODE_COLLECTIONS_KEY] =
294  foreign_table->options[CsvFileBufferParser::SOURCE_SRID_KEY] =
295  std::to_string(copy_params.source_srid);
296 
297  foreign_table->options[TextFileBufferParser::BUFFER_SIZE_KEY] =
298  std::to_string(copy_params.buffer_size);
299  foreign_table->options[TextFileBufferParser::THREADS_KEY] =
300  std::to_string(copy_params.threads);
301  }
302 
303  foreign_table->initializeOptions();
304  return foreign_table;
305 }
static const std::string GEO_EXPLODE_COLLECTIONS_KEY
static const std::string ARRAY_MARKER_KEY
std::string to_string(char const *&&v)
ImportHeaderRow has_header
Definition: CopyParams.h:46
static const std::string SOURCE_SRID_KEY
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:83
static SysCatalog & instance()
Definition: SysCatalog.h:337
std::string bool_to_option_value(const bool value)
static const std::string LINE_DELIMITER_KEY
import_export::SourceType source_type
Definition: CopyParams.h:57
bool is_valid_source_type(const import_export::CopyParams &copy_params)
bool is_s3_uri(const std::string &file_path)
static const std::string DELIMITER_KEY
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
static const std::string ARRAY_DELIMITER_KEY
std::string line_start_regex
Definition: CopyParams.h:104
#define CHECK(condition)
Definition: Logger.h:223
static const std::string GEO_ASSIGN_RENDER_GROUPS_KEY
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:84
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:85

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< ForeignDataWrapper > foreign_storage::ForeignDataWrapperFactory::createForGeneralImport ( const std::string &  data_wrapper_type,
const int  db_id,
const ForeignTable foreign_table,
const UserMapping user_mapping 
)
static

Definition at line 118 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::DataWrapperType::CSV, anonymous_namespace{ForeignDataWrapperFactory.cpp}::is_valid_data_wrapper(), foreign_storage::DataWrapperType::PARQUET, and foreign_storage::DataWrapperType::REGEX_PARSER.

Referenced by import_export::ForeignDataImporter::importGeneral().

122  {
123  CHECK(is_valid_data_wrapper(data_wrapper_type));
124 
125  if (data_wrapper_type == DataWrapperType::CSV) {
126  return std::make_unique<CsvDataWrapper>(
127  db_id, foreign_table, user_mapping, /*disable_cache=*/true);
128  } else if (data_wrapper_type == DataWrapperType::REGEX_PARSER) {
129  return std::make_unique<RegexParserDataWrapper>(
130  db_id, foreign_table, user_mapping, true);
131  }
132 #ifdef ENABLE_IMPORT_PARQUET
133  else if (data_wrapper_type == DataWrapperType::PARQUET) {
134  return std::make_unique<ParquetDataWrapper>(
135  db_id, foreign_table, /*do_metadata_stats_validation=*/false);
136  }
137 #endif
138 
139  return {};
140 }
bool is_valid_data_wrapper(const std::string &data_wrapper_type)
static constexpr char const * REGEX_PARSER
#define CHECK(condition)
Definition: Logger.h:223
static constexpr char const * CSV
static constexpr char const * PARQUET

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< ForeignDataWrapper > foreign_storage::ForeignDataWrapperFactory::createForImport ( const std::string &  data_wrapper_type,
const int  db_id,
const ForeignTable foreign_table,
const UserMapping user_mapping 
)
static

Create for the import use-case.

Definition at line 142 of file ForeignDataWrapperFactory.cpp.

References CHECK, and foreign_storage::DataWrapperType::PARQUET.

146  {
147 #ifdef ENABLE_IMPORT_PARQUET
148  // only supported for parquet import path currently
149  CHECK(data_wrapper_type == DataWrapperType::PARQUET);
150  return std::make_unique<ParquetImporter>(db_id, foreign_table, user_mapping);
151 #else
152  return {};
153 #endif
154 }
#define CHECK(condition)
Definition: Logger.h:223
static constexpr char const * PARQUET
const ForeignDataWrapper & foreign_storage::ForeignDataWrapperFactory::createForValidation ( const std::string &  data_wrapper_type,
const ForeignTable foreign_table = nullptr 
)
static

Creates an instance (or gets an existing instance) of an immutable ForeignDataWrapper to be used for validation purposes. Returned instance should not be used for any stateful operations, such as fetching foreign table data/metadata.

Definition at line 337 of file ForeignDataWrapperFactory.cpp.

References CHECK, foreign_storage::DataWrapperType::CSV, foreign_storage::DataWrapperType::INTERNAL_CATALOG, foreign_storage::DataWrapperType::INTERNAL_MEMORY_STATS, foreign_storage::DataWrapperType::INTERNAL_STORAGE_STATS, foreign_storage::DataWrapperType::PARQUET, foreign_storage::DataWrapperType::REGEX_PARSER, UNREACHABLE, foreign_storage::CsvDataWrapper::validateAndGetIsS3Select(), and validation_data_wrappers_.

Referenced by foreign_storage::ForeignServer::validateStorageParameters().

339  {
340  bool is_s3_select_wrapper{false};
341  std::string data_wrapper_type_key{data_wrapper_type};
342  constexpr const char* S3_SELECT_WRAPPER_KEY = "CSV_S3_SELECT";
343  if (foreign_table && data_wrapper_type == DataWrapperType::CSV &&
345  is_s3_select_wrapper = true;
346  data_wrapper_type_key = S3_SELECT_WRAPPER_KEY;
347  }
348 
349  if (validation_data_wrappers_.find(data_wrapper_type_key) ==
351  if (data_wrapper_type == DataWrapperType::CSV) {
352  if (is_s3_select_wrapper) {
353  UNREACHABLE();
354  } else {
355  validation_data_wrappers_[data_wrapper_type_key] =
356  std::make_unique<CsvDataWrapper>();
357  }
358 #ifdef ENABLE_IMPORT_PARQUET
359  } else if (data_wrapper_type == DataWrapperType::PARQUET) {
360  validation_data_wrappers_[data_wrapper_type_key] =
361  std::make_unique<ParquetDataWrapper>();
362 #endif
363  } else if (data_wrapper_type == DataWrapperType::REGEX_PARSER) {
364  validation_data_wrappers_[data_wrapper_type_key] =
365  std::make_unique<RegexParserDataWrapper>();
366  } else if (data_wrapper_type == DataWrapperType::INTERNAL_CATALOG) {
367  validation_data_wrappers_[data_wrapper_type_key] =
368  std::make_unique<InternalCatalogDataWrapper>();
369  } else if (data_wrapper_type == DataWrapperType::INTERNAL_MEMORY_STATS) {
370  validation_data_wrappers_[data_wrapper_type_key] =
371  std::make_unique<InternalMemoryStatsDataWrapper>();
372  } else if (data_wrapper_type == DataWrapperType::INTERNAL_STORAGE_STATS) {
373  validation_data_wrappers_[data_wrapper_type_key] =
374  std::make_unique<InternalStorageStatsDataWrapper>();
375  } else {
376  UNREACHABLE();
377  }
378  }
379  CHECK(validation_data_wrappers_.find(data_wrapper_type_key) !=
381  return *validation_data_wrappers_[data_wrapper_type_key];
382 }
static constexpr char const * REGEX_PARSER
static constexpr char const * INTERNAL_STORAGE_STATS
#define UNREACHABLE()
Definition: Logger.h:267
static bool validateAndGetIsS3Select(const ForeignTable *foreign_table)
static constexpr char const * INTERNAL_CATALOG
static std::map< std::string, std::unique_ptr< ForeignDataWrapper > > validation_data_wrappers_
static constexpr char const * INTERNAL_MEMORY_STATS
#define CHECK(condition)
Definition: Logger.h:223
static constexpr char const * CSV
static constexpr char const * PARQUET

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< UserMapping > foreign_storage::ForeignDataWrapperFactory::createUserMappingProxyIfApplicable ( const int  db_id,
const int  user_id,
const std::string &  file_path,
const import_export::CopyParams copy_params,
const ForeignServer server 
)
static

Definition at line 157 of file ForeignDataWrapperFactory.cpp.

Referenced by foreign_storage::create_proxy_fsi_objects().

162  {
163  return {};
164 }

+ Here is the caller graph for this function:

void foreign_storage::ForeignDataWrapperFactory::validateDataWrapperType ( const std::string &  data_wrapper_type)
static

Checks that the given data wrapper type is valid.

Definition at line 384 of file ForeignDataWrapperFactory.cpp.

References shared::contains(), foreign_storage::DataWrapperType::INTERNAL_DATA_WRAPPERS, join(), foreign_storage::DataWrapperType::supported_data_wrapper_types, and run_benchmark_import::type.

Referenced by foreign_storage::ForeignServer::validate().

385  {
386  const auto& supported_wrapper_types = DataWrapperType::supported_data_wrapper_types;
387  if (std::find(supported_wrapper_types.begin(),
388  supported_wrapper_types.end(),
389  data_wrapper_type) == supported_wrapper_types.end()) {
390  std::vector<std::string_view> user_facing_wrapper_types;
391  for (const auto& type : supported_wrapper_types) {
393  user_facing_wrapper_types.emplace_back(type);
394  }
395  }
396  throw std::runtime_error{"Invalid data wrapper type \"" + data_wrapper_type +
397  "\". Data wrapper type must be one of the following: " +
398  join(user_facing_wrapper_types, ", ") + "."};
399  }
400 }
bool contains(const T &container, const U &element)
Definition: misc.h:196
static constexpr std::array< char const *, 3 > INTERNAL_DATA_WRAPPERS
std::string join(T const &container, std::string const &delim)
static constexpr std::array< std::string_view, 6 > supported_data_wrapper_types

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::map< std::string, std::unique_ptr< ForeignDataWrapper > > foreign_storage::ForeignDataWrapperFactory::validation_data_wrappers_
staticprivate

Definition at line 154 of file ForeignDataWrapperFactory.h.

Referenced by createForValidation().


The documentation for this class was generated from the following files: