OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

bool dashboard_exists (const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
 
SessionMap::iterator get_session_from_map (const TSessionId &session, SessionMap &session_map)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
TTableRefreshInfo get_refresh_info (const TableDescriptor *td)
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
std::unique_ptr
< Catalog_Namespace::CustomExpression
create_custom_expr_from_thrift_obj (const TCustomExpression &t_custom_expr, const Catalog &catalog)
 
TCustomExpression create_thrift_obj_from_custom_expr (const CustomExpression &custom_expr, const Catalog &catalog)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 
std::string TTypeInfo_TypeToString (const TDatumType::type &t)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 4014 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4014  {
4015  // check for compressed file or file bundle
4016  if (boost::iends_with(path, ".zip")) {
4017  // zip archive
4018  path = "/vsizip/" + path;
4019  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4020  boost::iends_with(path, ".tar.gz")) {
4021  // tar archive (compressed or uncompressed)
4022  path = "/vsitar/" + path;
4023  }
4024 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 4007 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4007  {
4008  // single gzip'd file (not an archive)?
4009  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
4010  path = "/vsigzip/" + path;
4011  }
4012 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 3985 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3985  {
3986  // do we support network file access?
3987  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
3988 
3989  // modify head of filename based on source location
3990  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
3991  if (!gdal_network) {
3993  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3994  }
3995  // invoke GDAL CURL virtual file reader
3996  path = "/vsicurl/" + path;
3997  } else if (boost::istarts_with(path, "s3://")) {
3998  if (!gdal_network) {
4000  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
4001  }
4002  // invoke GDAL S3 virtual file reader
4003  boost::replace_first(path, "s3://", "/vsis3/");
4004  }
4005 }
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2918 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::prepare_loader_generic().

2918  {
2919  if (td && td->nShards) {
2920  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2921  }
2922 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2924 of file DBHandler.cpp.

References setup::name, THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::prepare_loader_generic().

2925  {
2926  std::unordered_set<std::string> unique_names;
2927  for (const auto& name : column_names) {
2928  auto lower_name = to_lower(name);
2929  if (unique_names.find(lower_name) != unique_names.end()) {
2930  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
2931  } else {
2932  unique_names.insert(lower_name);
2933  }
2934  }
2935  for (const auto& cd : descs) {
2936  auto iter = unique_names.find(to_lower(cd->columnName));
2937  if (iter != unique_names.end()) {
2938  unique_names.erase(iter);
2939  }
2940  }
2941  if (!unique_names.empty()) {
2942  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
2943  }
2944 }
std::string to_lower(const std::string &str)
string name
Definition: setup.in.py:72
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2950 of file DBHandler.cpp.

References THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::loadTableBinaryColumnarInternal().

2951  {
2952  std::vector<int> desc_to_column_ids;
2953  if (column_names.empty()) {
2954  int col_idx = 0;
2955  for (const auto& cd : descs) {
2956  if (!cd->isGeoPhyCol) {
2957  desc_to_column_ids.push_back(col_idx);
2958  ++col_idx;
2959  }
2960  }
2961  } else {
2962  for (const auto& cd : descs) {
2963  if (!cd->isGeoPhyCol) {
2964  bool found = false;
2965  for (size_t j = 0; j < column_names.size(); ++j) {
2966  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
2967  found = true;
2968  desc_to_column_ids.push_back(j);
2969  break;
2970  }
2971  }
2972  if (!found) {
2973  if (!cd->columnType.get_notnull()) {
2974  desc_to_column_ids.push_back(-1);
2975  } else {
2976  THROW_DB_EXCEPTION("Column '" + cd->columnName +
2977  "' cannot be omitted due to NOT NULL constraint");
2978  }
2979  }
2980  }
2981  }
2982  }
2983  return desc_to_column_ids;
2984 }
std::string to_lower(const std::string &str)
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<Catalog_Namespace::CustomExpression> anonymous_namespace{DBHandler.cpp}::create_custom_expr_from_thrift_obj ( const TCustomExpression &  t_custom_expr,
const Catalog catalog 
)

Definition at line 4383 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::Catalog::getMetadataForTable(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::create_custom_expression().

4385  {
4386  if (t_custom_expr.data_source_name.empty()) {
4387  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4388  }
4389  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4390  << "Unexpected data source type: "
4391  << static_cast<int>(t_custom_expr.data_source_type);
4392  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4393  if (!td) {
4394  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4395  t_custom_expr.data_source_name + "\" that does not exist.")
4396  }
4397  DataSourceType data_source_type = DataSourceType::TABLE;
4398  return std::make_unique<CustomExpression>(
4399  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4400 }
#define CHECK(condition)
Definition: Logger.h:223
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TCustomExpression anonymous_namespace{DBHandler.cpp}::create_thrift_obj_from_custom_expr ( const CustomExpression &  custom_expr,
const Catalog catalog 
)

Definition at line 4402 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::CustomExpression::data_source_id, Catalog_Namespace::CustomExpression::data_source_type, Catalog_Namespace::CustomExpression::expression_json, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::CustomExpression::id, Catalog_Namespace::CustomExpression::is_deleted, LOG, Catalog_Namespace::CustomExpression::name, TableDescriptor::tableName, and logger::WARNING.

Referenced by DBHandler::get_custom_expressions().

4403  {
4404  TCustomExpression t_custom_expr;
4405  t_custom_expr.id = custom_expr.id;
4406  t_custom_expr.name = custom_expr.name;
4407  t_custom_expr.expression_json = custom_expr.expression_json;
4408  t_custom_expr.data_source_id = custom_expr.data_source_id;
4409  t_custom_expr.is_deleted = custom_expr.is_deleted;
4410  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4411  << "Unexpected data source type: "
4412  << static_cast<int>(custom_expr.data_source_type);
4413  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4414  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4415  if (td) {
4416  t_custom_expr.data_source_name = td->tableName;
4417  } else {
4418  LOG(WARNING)
4419  << "Custom expression references a deleted data source. Custom expression id: "
4420  << custom_expr.id << ", name: " << custom_expr.name;
4421  }
4422  return t_custom_expr;
4423 }
std::string tableName
#define LOG(tag)
Definition: Logger.h:217
#define CHECK(condition)
Definition: Logger.h:223
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::dashboard_exists ( const Catalog_Namespace::Catalog cat,
const int32_t  user_id,
const std::string &  dashboard_name 
)

Definition at line 142 of file DBHandler.cpp.

References Catalog_Namespace::Catalog::getMetadataForDashboard(), and to_string().

Referenced by DBHandler::create_dashboard().

144  {
145  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
146 }
std::string to_string(char const *&&v)
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 2205 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

2206  {
2207  std::ostringstream oss;
2208  for (const auto& [table_name, col_names] : table_col_names) {
2209  oss << ":" << table_name;
2210  for (const auto& col_name : col_names) {
2211  oss << "," << col_name;
2212  }
2213  }
2214  return oss.str();
2215 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1586 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1587  {
1588  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1589  boost::regex::extended | boost::regex::icase};
1590  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1591  boost::sregex_token_iterator end;
1592  std::unordered_set<std::string> uc_column_names;
1593  std::unordered_set<std::string> uc_column_table_qualifiers;
1594  for (; tok_it != end; ++tok_it) {
1595  std::string column_name = *tok_it;
1596  std::vector<std::string> column_tokens;
1597  boost::split(column_tokens, column_name, boost::is_any_of("."));
1598  if (column_tokens.size() == 2) {
1599  // If the column name is qualified, take user's word.
1600  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1601  } else {
1602  uc_column_names.insert(to_upper(column_name));
1603  }
1604  }
1605  return {uc_column_names, uc_column_table_qualifiers};
1606 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 4093 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4094  {
4095  // get the recursive list of all files in the archive
4096  std::vector<std::string> files =
4097  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4098 
4099  // report the list
4100  LOG(INFO) << "Found " << files.size() << " files in Archive "
4101  << remove_vsi_prefixes(archive_path);
4102  for (const auto& file : files) {
4103  LOG(INFO) << " " << file;
4104  }
4105 
4106  // scan the list for the first candidate file
4107  bool found_suitable_file = false;
4108  std::string file_name;
4109  for (const auto& file : files) {
4110  if (is_a_supported_geo_file(file)) {
4111  file_name = file;
4112  found_suitable_file = true;
4113  break;
4114  }
4115  }
4116 
4117  // if we didn't find anything
4118  if (!found_suitable_file) {
4119  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4120  remove_vsi_prefixes(archive_path);
4121  file_name.clear();
4122  }
4123 
4124  // done
4125  return file_name;
4126 }
#define LOG(tag)
Definition: Logger.h:217
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4026
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5209
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4064

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 3252 of file DBHandler.cpp.

Referenced by DBHandler::loadTableBinaryColumnarInternal().

3252  {
3253  if (!column.nulls.empty()) {
3254  return column.nulls.size();
3255  } else {
3256  // it is a very bold estimate but later we check it against REAL data
3257  // and if this function returns a wrong result (e.g. both int and string
3258  // vectors are filled with values), we get an error
3259  return column.data.int_col.size() + column.data.arr_col.size() +
3260  column.data.real_col.size() + column.data.str_col.size();
3261  }
3262 }

+ Here is the caller graph for this function:

TTableRefreshInfo anonymous_namespace{DBHandler.cpp}::get_refresh_info ( const TableDescriptor td)

Definition at line 2354 of file DBHandler.cpp.

References QueryRunner::ALL, foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE, foreign_storage::ForeignTable::APPEND_REFRESH_UPDATE_TYPE, CHECK, shared::convert_temporal_to_iso_format(), TableDescriptor::isForeignTable(), kTIMESTAMP, foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE, foreign_storage::ForeignTable::NULL_REFRESH_TIME, foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY, foreign_storage::ForeignTable::REFRESH_START_DATE_TIME_KEY, foreign_storage::ForeignTable::REFRESH_TIMING_TYPE_KEY, foreign_storage::ForeignTable::REFRESH_UPDATE_TYPE_KEY, foreign_storage::ForeignTable::SCHEDULE_REFRESH_TIMING_TYPE, and UNREACHABLE.

Referenced by DBHandler::get_table_details_impl().

2354  {
2355  CHECK(td->isForeignTable());
2356  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2357  CHECK(foreign_table);
2358  TTableRefreshInfo refresh_info;
2359  const auto& update_type =
2361  CHECK(update_type.has_value());
2362  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2363  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2364  } else if (update_type.value() ==
2366  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2367  } else {
2368  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2369  }
2370 
2371  const auto& timing_type =
2373  CHECK(timing_type.has_value());
2374  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2375  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2376  refresh_info.interval_count = -1;
2377  } else if (timing_type.value() ==
2379  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2380  const auto& start_date_time = foreign_table->getOption(
2382  CHECK(start_date_time.has_value());
2383  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2384  refresh_info.start_date_time =
2385  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2386  const auto& interval =
2387  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2388  CHECK(interval.has_value());
2389  const auto& interval_str = interval.value();
2390  refresh_info.interval_count =
2391  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2392  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2393  if (interval_type == 'H') {
2394  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2395  } else if (interval_type == 'D') {
2396  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2397  } else if (interval_type == 'S') {
2398  // This use case is for development only.
2399  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2400  } else {
2401  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2402  }
2403  } else {
2404  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2405  }
2406  if (foreign_table->last_refresh_time !=
2408  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2409  {kTIMESTAMP}, foreign_table->last_refresh_time);
2410  }
2411  if (foreign_table->next_refresh_time !=
2413  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2414  {kTIMESTAMP}, foreign_table->next_refresh_time);
2415  }
2416  return refresh_info;
2417 }
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:109
#define UNREACHABLE()
Definition: Logger.h:267
bool isForeignTable() const
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:50
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
#define CHECK(condition)
Definition: Logger.h:223
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:54
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:52

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SessionMap::iterator anonymous_namespace{DBHandler.cpp}::get_session_from_map ( const TSessionId &  session,
SessionMap session_map 
)

Definition at line 148 of file DBHandler.cpp.

References THROW_DB_EXCEPTION.

Referenced by DBHandler::expire_idle_sessions_unsafe(), and DBHandler::get_session_it_unsafe().

149  {
150  auto session_it = session_map.find(session);
151  if (session_it == session_map.end()) {
152  THROW_DB_EXCEPTION("Session not valid.");
153  }
154  return session_it;
155 }
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:129

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 4080 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4080  {
4081  if (!path_has_valid_filename(path)) {
4082  return false;
4083  }
4084  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4085  return true;
4086  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4087  boost::iends_with(path, ".tar.gz")) {
4088  return true;
4089  }
4090  return false;
4091 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4056

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path)

Definition at line 4064 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by find_first_geo_file_in_archive().

4064  {
4065  if (!path_has_valid_filename(path)) {
4066  return false;
4067  }
4068  // this is now just for files that we want to recognize
4069  // as geo when inside an archive (see below)
4070  // @TODO(se) make this more flexible?
4071  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4072  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4073  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4074  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4075  return true;
4076  }
4077  return false;
4078 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4056

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 4128 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), and validate_import_file_path_if_local().

4128  {
4129  return (!boost::istarts_with(file_path, "s3://") &&
4130  !boost::istarts_with(file_path, "http://") &&
4131  !boost::istarts_with(file_path, "https://"));
4132 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 3420 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

3420  {
3421  RecordBatchVector batches;
3422  try {
3423  // TODO(wesm): Make this simpler in general, see ARROW-1600
3424  auto stream_buffer =
3425  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3426  static_cast<int64_t>(stream.size()));
3427 
3428  arrow::io::BufferReader buf_reader(stream_buffer);
3429  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3430  ARROW_ASSIGN_OR_THROW(batch_reader,
3431  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3432 
3433  while (true) {
3434  std::shared_ptr<arrow::RecordBatch> batch;
3435  // Read batch (zero-copy) from the stream
3436  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3437  if (batch == nullptr) {
3438  break;
3439  }
3440  batches.emplace_back(std::move(batch));
3441  }
3442  } catch (const std::exception& e) {
3443  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3444  }
3445  return batches;
3446 }
#define LOG(tag)
Definition: Logger.h:217
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3405
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3407

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 4056 of file DBHandler.cpp.

References nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

4056  {
4057  auto filename = boost::filesystem::path(path).filename().string();
4058  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4059  return false;
4060  }
4061  return true;
4062 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 4048 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4048  {
4049  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4050  boost::istarts_with(path, "https://")) {
4051  return false;
4052  }
4053  return !boost::filesystem::path(path).is_absolute();
4054 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 4026 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

4026  {
4027  std::string path(path_in);
4028 
4029  // these will be first
4030  if (boost::istarts_with(path, "/vsizip/")) {
4031  boost::replace_first(path, "/vsizip/", "");
4032  } else if (boost::istarts_with(path, "/vsitar/")) {
4033  boost::replace_first(path, "/vsitar/", "");
4034  } else if (boost::istarts_with(path, "/vsigzip/")) {
4035  boost::replace_first(path, "/vsigzip/", "");
4036  }
4037 
4038  // then these
4039  if (boost::istarts_with(path, "/vsicurl/")) {
4040  boost::replace_first(path, "/vsicurl/", "");
4041  } else if (boost::istarts_with(path, "/vsis3/")) {
4042  boost::replace_first(path, "/vsis3/", "s3://");
4043  }
4044 
4045  return path;
4046 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 5130 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5130  {
5131  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5132  t == TDatumType::LINESTRING || t == TDatumType::POINT);
5133 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::TTypeInfo_TypeToString ( const TDatumType::type &  t)

Definition at line 5135 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5135  {
5136  std::stringstream ss;
5137  ss << t;
5138  return ss.str();
5139 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 4134 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_table(), and DBHandler::importGeoTableSingle().

4134  {
4135  if (is_local_file(file_path)) {
4137  file_path, ddl_utils::DataTransferType::IMPORT, true);
4138  }
4139 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:771
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4128

+ Here is the call graph for this function:

+ Here is the caller graph for this function: