OmniSciDB  2e3a973ef4
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

SessionMap::iterator get_session_from_map (const TSessionId &session, SessionMap &session_map)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
void fixup_geo_column_descriptor (TColumnType &col_type, const SQLTypes subtype, const int output_srid)
 
void check_table_not_sharded (const TableDescriptor *td)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path, bool include_gz)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 

Function Documentation

◆ add_vsi_archive_prefix()

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 3014 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3014  {
3015  // check for compressed file or file bundle
3016  if (boost::iends_with(path, ".zip")) {
3017  // zip archive
3018  path = "/vsizip/" + path;
3019  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3020  boost::iends_with(path, ".tar.gz")) {
3021  // tar archive (compressed or uncompressed)
3022  path = "/vsitar/" + path;
3023  }
3024 }
+ Here is the caller graph for this function:

◆ add_vsi_geo_prefix()

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 3007 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3007  {
3008  // single gzip'd file (not an archive)?
3009  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
3010  path = "/vsigzip/" + path;
3011  }
3012 }
+ Here is the caller graph for this function:

◆ add_vsi_network_prefix()

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 2985 of file DBHandler.cpp.

References import_export::GDAL::supportsNetworkFileAccess(), and THROW_MAPD_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

2985  {
2986  // do we support network file access?
2987  bool gdal_network = import_export::GDAL::supportsNetworkFileAccess();
2988 
2989  // modify head of filename based on source location
2990  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
2991  if (!gdal_network) {
2993  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
2994  }
2995  // invoke GDAL CURL virtual file reader
2996  path = "/vsicurl/" + path;
2997  } else if (boost::istarts_with(path, "s3://")) {
2998  if (!gdal_network) {
3000  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3001  }
3002  // invoke GDAL S3 virtual file reader
3003  boost::replace_first(path, "s3://", "/vsis3/");
3004  }
3005 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:113
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:97
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ check_table_not_sharded()

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2367 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), and DBHandler::prepare_columnar_loader().

2367  {
2368  if (td && td->nShards) {
2369  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2370  }
2371 }
+ Here is the caller graph for this function:

◆ dump_table_col_names()

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 1827 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

1828  {
1829  std::ostringstream oss;
1830  for (const auto& [table_name, col_names] : table_col_names) {
1831  oss << ":" << table_name;
1832  for (const auto& col_name : col_names) {
1833  oss << "," << col_name;
1834  }
1835  }
1836  return oss.str();
1837 }
+ Here is the caller graph for this function:

◆ extract_projection_tokens_for_completion()

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1260 of file DBHandler.cpp.

References parse_ast::end, split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1261  {
1262  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1263  boost::regex::extended | boost::regex::icase};
1264  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1265  boost::sregex_token_iterator end;
1266  std::unordered_set<std::string> uc_column_names;
1267  std::unordered_set<std::string> uc_column_table_qualifiers;
1268  for (; tok_it != end; ++tok_it) {
1269  std::string column_name = *tok_it;
1270  std::vector<std::string> column_tokens;
1271  boost::split(column_tokens, column_name, boost::is_any_of("."));
1272  if (column_tokens.size() == 2) {
1273  // If the column name is qualified, take user's word.
1274  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1275  } else {
1276  uc_column_names.insert(to_upper(column_name));
1277  }
1278  }
1279  return {uc_column_names, uc_column_table_qualifiers};
1280 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ find_first_geo_file_in_archive()

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 3095 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3096  {
3097  // get the recursive list of all files in the archive
3098  std::vector<std::string> files =
3099  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
3100 
3101  // report the list
3102  LOG(INFO) << "Found " << files.size() << " files in Archive "
3103  << remove_vsi_prefixes(archive_path);
3104  for (const auto& file : files) {
3105  LOG(INFO) << " " << file;
3106  }
3107 
3108  // scan the list for the first candidate file
3109  bool found_suitable_file = false;
3110  std::string file_name;
3111  for (const auto& file : files) {
3112  if (is_a_supported_geo_file(file, false)) {
3113  file_name = file;
3114  found_suitable_file = true;
3115  break;
3116  }
3117  }
3118 
3119  // if we didn't find anything
3120  if (!found_suitable_file) {
3121  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
3122  remove_vsi_prefixes(archive_path);
3123  file_name.clear();
3124  }
3125 
3126  // done
3127  return file_name;
3128 }
#define LOG(tag)
Definition: Logger.h:188
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:3026
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:4638
bool is_a_supported_geo_file(const std::string &path, bool include_gz)
Definition: DBHandler.cpp:3064
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fixup_geo_column_descriptor()

void anonymous_namespace{DBHandler.cpp}::fixup_geo_column_descriptor ( TColumnType &  col_type,
const SQLTypes  subtype,
const int  output_srid 
)
inline

Definition at line 1886 of file DBHandler.cpp.

Referenced by DBHandler::convert_target_metainfo(), and DBHandler::populateThriftColumnType().

1888  {
1889  col_type.col_type.precision = static_cast<int>(subtype);
1890  col_type.col_type.scale = output_srid;
1891 }
+ Here is the caller graph for this function:

◆ get_session_from_map()

SessionMap::iterator anonymous_namespace{DBHandler.cpp}::get_session_from_map ( const TSessionId &  session,
SessionMap session_map 
)

Definition at line 126 of file DBHandler.cpp.

References THROW_MAPD_EXCEPTION.

Referenced by DBHandler::get_session_it_unsafe().

127  {
128  auto session_it = session_map.find(session);
129  if (session_it == session_map.end()) {
130  THROW_MAPD_EXCEPTION("Session not valid.");
131  }
132  return session_it;
133 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:113
+ Here is the caller graph for this function:

◆ is_a_supported_archive_file()

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 3082 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3082  {
3083  if (!path_has_valid_filename(path)) {
3084  return false;
3085  }
3086  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
3087  return true;
3088  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3089  boost::iends_with(path, ".tar.gz")) {
3090  return true;
3091  }
3092  return false;
3093 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3056
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_a_supported_geo_file()

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path,
bool  include_gz 
)

Definition at line 3064 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), find_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3064  {
3065  if (!path_has_valid_filename(path)) {
3066  return false;
3067  }
3068  if (include_gz) {
3069  if (boost::iends_with(path, ".geojson.gz") || boost::iends_with(path, ".json.gz")) {
3070  return true;
3071  }
3072  }
3073  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
3074  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
3075  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
3076  boost::iends_with(path, ".gdb.zip")) {
3077  return true;
3078  }
3079  return false;
3080 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3056
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_local_file()

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 3130 of file DBHandler.cpp.

Referenced by validate_import_file_path_if_local().

3130  {
3131  return (!boost::istarts_with(file_path, "s3://") &&
3132  !boost::istarts_with(file_path, "http://") &&
3133  !boost::istarts_with(file_path, "https://"));
3134 }
+ Here is the caller graph for this function:

◆ loadArrowStream()

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 2587 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

2587  {
2588  RecordBatchVector batches;
2589  try {
2590  // TODO(wesm): Make this simpler in general, see ARROW-1600
2591  auto stream_buffer =
2592  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
2593  static_cast<int64_t>(stream.size()));
2594 
2595  arrow::io::BufferReader buf_reader(stream_buffer);
2596  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
2597  ARROW_ASSIGN_OR_THROW(batch_reader,
2598  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
2599 
2600  while (true) {
2601  std::shared_ptr<arrow::RecordBatch> batch;
2602  // Read batch (zero-copy) from the stream
2603  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
2604  if (batch == nullptr) {
2605  break;
2606  }
2607  batches.emplace_back(std::move(batch));
2608  }
2609  } catch (const std::exception& e) {
2610  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
2611  }
2612  return batches;
2613 }
#define LOG(tag)
Definition: Logger.h:188
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch > > RecordBatchVector
Definition: DBHandler.cpp:2572
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:2574
+ Here is the caller graph for this function:

◆ path_has_valid_filename()

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 3056 of file DBHandler.cpp.

References logger::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

3056  {
3057  auto filename = boost::filesystem::path(path).filename().string();
3058  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
3059  return false;
3060  }
3061  return true;
3062 }
std::string filename(char const *path)
Definition: Logger.cpp:62
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ path_is_relative()

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 3048 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3048  {
3049  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
3050  boost::istarts_with(path, "https://")) {
3051  return false;
3052  }
3053  return !boost::filesystem::path(path).is_absolute();
3054 }
+ Here is the caller graph for this function:

◆ remove_vsi_prefixes()

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 3026 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

3026  {
3027  std::string path(path_in);
3028 
3029  // these will be first
3030  if (boost::istarts_with(path, "/vsizip/")) {
3031  boost::replace_first(path, "/vsizip/", "");
3032  } else if (boost::istarts_with(path, "/vsitar/")) {
3033  boost::replace_first(path, "/vsitar/", "");
3034  } else if (boost::istarts_with(path, "/vsigzip/")) {
3035  boost::replace_first(path, "/vsigzip/", "");
3036  }
3037 
3038  // then these
3039  if (boost::istarts_with(path, "/vsicurl/")) {
3040  boost::replace_first(path, "/vsicurl/", "");
3041  } else if (boost::istarts_with(path, "/vsis3/")) {
3042  boost::replace_first(path, "/vsis3/", "s3://");
3043  }
3044 
3045  return path;
3046 }
+ Here is the caller graph for this function:

◆ TTypeInfo_IsGeo()

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 3906 of file DBHandler.cpp.

References kGEOGRAPHY, kGEOMETRY, run_benchmark_import::result, and run_benchmark_import::type.

Referenced by DBHandler::import_geo_table().

3906  {
3907  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
3908  t == TDatumType::LINESTRING || t == TDatumType::POINT);
3909 }
+ Here is the caller graph for this function:

◆ validate_import_file_path_if_local()

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 3136 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_geo_table(), and DBHandler::import_table().

3136  {
3137  if (is_local_file(file_path)) {
3139  }
3140 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:611
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:3130
+ Here is the call graph for this function:
+ Here is the caller graph for this function: