OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

bool dashboard_exists (const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
TTableRefreshInfo get_refresh_info (const TableDescriptor *td)
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
std::unique_ptr
< Catalog_Namespace::CustomExpression
create_custom_expr_from_thrift_obj (const TCustomExpression &t_custom_expr, const Catalog &catalog)
 
TCustomExpression create_thrift_obj_from_custom_expr (const CustomExpression &custom_expr, const Catalog &catalog)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 
std::string TTypeInfo_TypeToString (const TDatumType::type &t)
 
std::string get_mismatch_attr_warning_text (const std::string &table_name, const std::string &file_path, const std::string &column_name, const std::string &attr, const std::string &got, const std::string &expected)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 3991 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3991  {
3992  // check for compressed file or file bundle
3993  if (boost::iends_with(path, ".zip")) {
3994  // zip archive
3995  path = "/vsizip/" + path;
3996  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3997  boost::iends_with(path, ".tar.gz")) {
3998  // tar archive (compressed or uncompressed)
3999  path = "/vsitar/" + path;
4000  }
4001 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 3984 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3984  {
3985  // single gzip'd file (not an archive)?
3986  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
3987  path = "/vsigzip/" + path;
3988  }
3989 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 3962 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3962  {
3963  // do we support network file access?
3964  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
3965 
3966  // modify head of filename based on source location
3967  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
3968  if (!gdal_network) {
3970  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3971  }
3972  // invoke GDAL CURL virtual file reader
3973  path = "/vsicurl/" + path;
3974  } else if (boost::istarts_with(path, "s3://")) {
3975  if (!gdal_network) {
3977  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3978  }
3979  // invoke GDAL S3 virtual file reader
3980  boost::replace_first(path, "s3://", "/vsis3/");
3981  }
3982 }
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2877 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::prepare_loader_generic().

2877  {
2878  if (td && td->nShards) {
2879  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2880  }
2881 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2883 of file DBHandler.cpp.

References setup::name, THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::prepare_loader_generic().

2884  {
2885  std::unordered_set<std::string> unique_names;
2886  for (const auto& name : column_names) {
2887  auto lower_name = to_lower(name);
2888  if (unique_names.find(lower_name) != unique_names.end()) {
2889  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
2890  } else {
2891  unique_names.insert(lower_name);
2892  }
2893  }
2894  for (const auto& cd : descs) {
2895  auto iter = unique_names.find(to_lower(cd->columnName));
2896  if (iter != unique_names.end()) {
2897  unique_names.erase(iter);
2898  }
2899  }
2900  if (!unique_names.empty()) {
2901  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
2902  }
2903 }
std::string to_lower(const std::string &str)
string name
Definition: setup.in.py:72
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2909 of file DBHandler.cpp.

References THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::loadTableBinaryColumnarInternal().

2910  {
2911  std::vector<int> desc_to_column_ids;
2912  if (column_names.empty()) {
2913  int col_idx = 0;
2914  for (const auto& cd : descs) {
2915  if (!cd->isGeoPhyCol) {
2916  desc_to_column_ids.push_back(col_idx);
2917  ++col_idx;
2918  }
2919  }
2920  } else {
2921  for (const auto& cd : descs) {
2922  if (!cd->isGeoPhyCol) {
2923  bool found = false;
2924  for (size_t j = 0; j < column_names.size(); ++j) {
2925  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
2926  found = true;
2927  desc_to_column_ids.push_back(j);
2928  break;
2929  }
2930  }
2931  if (!found) {
2932  if (!cd->columnType.get_notnull()) {
2933  desc_to_column_ids.push_back(-1);
2934  } else {
2935  THROW_DB_EXCEPTION("Column '" + cd->columnName +
2936  "' cannot be omitted due to NOT NULL constraint");
2937  }
2938  }
2939  }
2940  }
2941  }
2942  return desc_to_column_ids;
2943 }
std::string to_lower(const std::string &str)
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<Catalog_Namespace::CustomExpression> anonymous_namespace{DBHandler.cpp}::create_custom_expr_from_thrift_obj ( const TCustomExpression &  t_custom_expr,
const Catalog catalog 
)

Definition at line 4363 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::Catalog::getMetadataForTable(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::create_custom_expression().

4365  {
4366  if (t_custom_expr.data_source_name.empty()) {
4367  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4368  }
4369  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4370  << "Unexpected data source type: "
4371  << static_cast<int>(t_custom_expr.data_source_type);
4372  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4373  if (!td) {
4374  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4375  t_custom_expr.data_source_name + "\" that does not exist.")
4376  }
4377  DataSourceType data_source_type = DataSourceType::TABLE;
4378  return std::make_unique<CustomExpression>(
4379  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4380 }
#define CHECK(condition)
Definition: Logger.h:289
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TCustomExpression anonymous_namespace{DBHandler.cpp}::create_thrift_obj_from_custom_expr ( const CustomExpression &  custom_expr,
const Catalog catalog 
)

Definition at line 4382 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::CustomExpression::data_source_id, Catalog_Namespace::CustomExpression::data_source_type, Catalog_Namespace::CustomExpression::expression_json, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::CustomExpression::id, Catalog_Namespace::CustomExpression::is_deleted, LOG, Catalog_Namespace::CustomExpression::name, TableDescriptor::tableName, and logger::WARNING.

Referenced by DBHandler::get_custom_expressions().

4383  {
4384  TCustomExpression t_custom_expr;
4385  t_custom_expr.id = custom_expr.id;
4386  t_custom_expr.name = custom_expr.name;
4387  t_custom_expr.expression_json = custom_expr.expression_json;
4388  t_custom_expr.data_source_id = custom_expr.data_source_id;
4389  t_custom_expr.is_deleted = custom_expr.is_deleted;
4390  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4391  << "Unexpected data source type: "
4392  << static_cast<int>(custom_expr.data_source_type);
4393  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4394  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4395  if (td) {
4396  t_custom_expr.data_source_name = td->tableName;
4397  } else {
4398  LOG(WARNING)
4399  << "Custom expression references a deleted data source. Custom expression id: "
4400  << custom_expr.id << ", name: " << custom_expr.name;
4401  }
4402  return t_custom_expr;
4403 }
std::string tableName
#define LOG(tag)
Definition: Logger.h:283
#define CHECK(condition)
Definition: Logger.h:289
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::dashboard_exists ( const Catalog_Namespace::Catalog cat,
const int32_t  user_id,
const std::string &  dashboard_name 
)

Definition at line 149 of file DBHandler.cpp.

References Catalog_Namespace::Catalog::getMetadataForDashboard(), and to_string().

Referenced by DBHandler::create_dashboard().

151  {
152  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
153 }
std::string to_string(char const *&&v)
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 2131 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

2132  {
2133  std::ostringstream oss;
2134  for (const auto& [table_name, col_names] : table_col_names) {
2135  oss << ":" << table_name;
2136  for (const auto& col_name : col_names) {
2137  oss << "," << col_name;
2138  }
2139  }
2140  return oss.str();
2141 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1486 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1487  {
1488  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1489  boost::regex::extended | boost::regex::icase};
1490  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1491  boost::sregex_token_iterator end;
1492  std::unordered_set<std::string> uc_column_names;
1493  std::unordered_set<std::string> uc_column_table_qualifiers;
1494  for (; tok_it != end; ++tok_it) {
1495  std::string column_name = *tok_it;
1496  std::vector<std::string> column_tokens;
1497  boost::split(column_tokens, column_name, boost::is_any_of("."));
1498  if (column_tokens.size() == 2) {
1499  // If the column name is qualified, take user's word.
1500  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1501  } else {
1502  uc_column_names.insert(to_upper(column_name));
1503  }
1504  }
1505  return {uc_column_names, uc_column_table_qualifiers};
1506 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 4070 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4071  {
4072  // get the recursive list of all files in the archive
4073  std::vector<std::string> files =
4074  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4075 
4076  // report the list
4077  LOG(INFO) << "Found " << files.size() << " files in Archive "
4078  << remove_vsi_prefixes(archive_path);
4079  for (const auto& file : files) {
4080  LOG(INFO) << " " << file;
4081  }
4082 
4083  // scan the list for the first candidate file
4084  bool found_suitable_file = false;
4085  std::string file_name;
4086  for (const auto& file : files) {
4087  if (is_a_supported_geo_file(file)) {
4088  file_name = file;
4089  found_suitable_file = true;
4090  break;
4091  }
4092  }
4093 
4094  // if we didn't find anything
4095  if (!found_suitable_file) {
4096  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4097  remove_vsi_prefixes(archive_path);
4098  file_name.clear();
4099  }
4100 
4101  // done
4102  return file_name;
4103 }
#define LOG(tag)
Definition: Logger.h:283
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4003
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5230
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4041

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 3211 of file DBHandler.cpp.

Referenced by DBHandler::loadTableBinaryColumnarInternal().

3211  {
3212  if (!column.nulls.empty()) {
3213  return column.nulls.size();
3214  } else {
3215  // it is a very bold estimate but later we check it against REAL data
3216  // and if this function returns a wrong result (e.g. both int and string
3217  // vectors are filled with values), we get an error
3218  return column.data.int_col.size() + column.data.arr_col.size() +
3219  column.data.real_col.size() + column.data.str_col.size();
3220  }
3221 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::get_mismatch_attr_warning_text ( const std::string &  table_name,
const std::string &  file_path,
const std::string &  column_name,
const std::string &  attr,
const std::string &  got,
const std::string &  expected 
)

Definition at line 5165 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5170  {
5171  return "Issue encountered in geo/raster file '" + file_path +
5172  "' while appending to table '" + table_name + "'. Column '" + column_name +
5173  "' " + attr + " mismatch (got '" + got + "', expected '" + expected + "')";
5174 }

+ Here is the caller graph for this function:

TTableRefreshInfo anonymous_namespace{DBHandler.cpp}::get_refresh_info ( const TableDescriptor td)

Definition at line 2296 of file DBHandler.cpp.

References QueryRunner::ALL, foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE, foreign_storage::ForeignTable::APPEND_REFRESH_UPDATE_TYPE, CHECK, shared::convert_temporal_to_iso_format(), TableDescriptor::isForeignTable(), kTIMESTAMP, foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE, foreign_storage::ForeignTable::NULL_REFRESH_TIME, foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY, foreign_storage::ForeignTable::REFRESH_START_DATE_TIME_KEY, foreign_storage::ForeignTable::REFRESH_TIMING_TYPE_KEY, foreign_storage::ForeignTable::REFRESH_UPDATE_TYPE_KEY, foreign_storage::ForeignTable::SCHEDULE_REFRESH_TIMING_TYPE, and UNREACHABLE.

Referenced by DBHandler::get_table_details_impl().

2296  {
2297  CHECK(td->isForeignTable());
2298  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2299  CHECK(foreign_table);
2300  TTableRefreshInfo refresh_info;
2301  const auto& update_type =
2303  CHECK(update_type.has_value());
2304  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2305  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2306  } else if (update_type.value() ==
2308  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2309  } else {
2310  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2311  }
2312 
2313  const auto& timing_type =
2315  CHECK(timing_type.has_value());
2316  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2317  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2318  refresh_info.interval_count = -1;
2319  } else if (timing_type.value() ==
2321  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2322  const auto& start_date_time = foreign_table->getOption(
2324  CHECK(start_date_time.has_value());
2325  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2326  refresh_info.start_date_time =
2327  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2328  const auto& interval =
2329  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2330  CHECK(interval.has_value());
2331  const auto& interval_str = interval.value();
2332  refresh_info.interval_count =
2333  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2334  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2335  if (interval_type == 'H') {
2336  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2337  } else if (interval_type == 'D') {
2338  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2339  } else if (interval_type == 'S') {
2340  // This use case is for development only.
2341  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2342  } else {
2343  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2344  }
2345  } else {
2346  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2347  }
2348  if (foreign_table->last_refresh_time !=
2350  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2351  {kTIMESTAMP}, foreign_table->last_refresh_time);
2352  }
2353  if (foreign_table->next_refresh_time !=
2355  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2356  {kTIMESTAMP}, foreign_table->next_refresh_time);
2357  }
2358  return refresh_info;
2359 }
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:109
#define UNREACHABLE()
Definition: Logger.h:333
bool isForeignTable() const
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:50
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
#define CHECK(condition)
Definition: Logger.h:289
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:54
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:52

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 4057 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4057  {
4058  if (!path_has_valid_filename(path)) {
4059  return false;
4060  }
4061  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4062  return true;
4063  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4064  boost::iends_with(path, ".tar.gz")) {
4065  return true;
4066  }
4067  return false;
4068 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4033

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path)

Definition at line 4041 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by find_first_geo_file_in_archive().

4041  {
4042  if (!path_has_valid_filename(path)) {
4043  return false;
4044  }
4045  // this is now just for files that we want to recognize
4046  // as geo when inside an archive (see below)
4047  // @TODO(se) make this more flexible?
4048  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4049  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4050  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4051  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4052  return true;
4053  }
4054  return false;
4055 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4033

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 4105 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), and validate_import_file_path_if_local().

4105  {
4106  return (!boost::istarts_with(file_path, "s3://") &&
4107  !boost::istarts_with(file_path, "http://") &&
4108  !boost::istarts_with(file_path, "https://"));
4109 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 3387 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

3387  {
3388  RecordBatchVector batches;
3389  try {
3390  // TODO(wesm): Make this simpler in general, see ARROW-1600
3391  auto stream_buffer =
3392  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3393  static_cast<int64_t>(stream.size()));
3394 
3395  arrow::io::BufferReader buf_reader(stream_buffer);
3396  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3397  ARROW_ASSIGN_OR_THROW(batch_reader,
3398  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3399 
3400  while (true) {
3401  std::shared_ptr<arrow::RecordBatch> batch;
3402  // Read batch (zero-copy) from the stream
3403  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3404  if (batch == nullptr) {
3405  break;
3406  }
3407  batches.emplace_back(std::move(batch));
3408  }
3409  } catch (const std::exception& e) {
3410  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3411  }
3412  return batches;
3413 }
#define LOG(tag)
Definition: Logger.h:283
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3372
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3374

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 4033 of file DBHandler.cpp.

References nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

4033  {
4034  auto filename = boost::filesystem::path(path).filename().string();
4035  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4036  return false;
4037  }
4038  return true;
4039 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 4025 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4025  {
4026  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4027  boost::istarts_with(path, "https://")) {
4028  return false;
4029  }
4030  return !boost::filesystem::path(path).is_absolute();
4031 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 4003 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

4003  {
4004  std::string path(path_in);
4005 
4006  // these will be first
4007  if (boost::istarts_with(path, "/vsizip/")) {
4008  boost::replace_first(path, "/vsizip/", "");
4009  } else if (boost::istarts_with(path, "/vsitar/")) {
4010  boost::replace_first(path, "/vsitar/", "");
4011  } else if (boost::istarts_with(path, "/vsigzip/")) {
4012  boost::replace_first(path, "/vsigzip/", "");
4013  }
4014 
4015  // then these
4016  if (boost::istarts_with(path, "/vsicurl/")) {
4017  boost::replace_first(path, "/vsicurl/", "");
4018  } else if (boost::istarts_with(path, "/vsis3/")) {
4019  boost::replace_first(path, "/vsis3/", "s3://");
4020  }
4021 
4022  return path;
4023 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 5153 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5153  {
5154  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5155  t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5156  t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5157 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::TTypeInfo_TypeToString ( const TDatumType::type &  t)

Definition at line 5159 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5159  {
5160  std::stringstream ss;
5161  ss << t;
5162  return ss.str();
5163 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 4111 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_table(), and DBHandler::importGeoTableSingle().

4111  {
4112  if (is_local_file(file_path)) {
4114  file_path, ddl_utils::DataTransferType::IMPORT, true);
4115  }
4116 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:785
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4105

+ Here is the call graph for this function:

+ Here is the caller graph for this function: