OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

SessionMap::iterator get_session_from_map (const TSessionId &session, SessionMap &session_map)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path, bool include_gz)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 3206 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3206  {
3207  // check for compressed file or file bundle
3208  if (boost::iends_with(path, ".zip")) {
3209  // zip archive
3210  path = "/vsizip/" + path;
3211  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3212  boost::iends_with(path, ".tar.gz")) {
3213  // tar archive (compressed or uncompressed)
3214  path = "/vsitar/" + path;
3215  }
3216 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 3199 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3199  {
3200  // single gzip'd file (not an archive)?
3201  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
3202  path = "/vsigzip/" + path;
3203  }
3204 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 3177 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_MAPD_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3177  {
3178  // do we support network file access?
3179  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
3180 
3181  // modify head of filename based on source location
3182  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
3183  if (!gdal_network) {
3185  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3186  }
3187  // invoke GDAL CURL virtual file reader
3188  path = "/vsicurl/" + path;
3189  } else if (boost::istarts_with(path, "s3://")) {
3190  if (!gdal_network) {
3192  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3193  }
3194  // invoke GDAL S3 virtual file reader
3195  boost::replace_first(path, "s3://", "/vsis3/");
3196  }
3197 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:114
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:109

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2404 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), and DBHandler::prepare_columnar_loader().

2404  {
2405  if (td && td->nShards) {
2406  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2407  }
2408 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2410 of file DBHandler.cpp.

References setup::name, THROW_MAPD_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), and DBHandler::prepare_columnar_loader().

2411  {
2412  std::unordered_set<std::string> unique_names;
2413  for (const auto& name : column_names) {
2414  auto lower_name = to_lower(name);
2415  if (unique_names.find(lower_name) != unique_names.end()) {
2416  THROW_MAPD_EXCEPTION("Column " + name + " is mentioned multiple times");
2417  } else {
2418  unique_names.insert(lower_name);
2419  }
2420  }
2421  for (const auto& cd : descs) {
2422  auto iter = unique_names.find(to_lower(cd->columnName));
2423  if (iter != unique_names.end()) {
2424  unique_names.erase(iter);
2425  }
2426  }
2427  if (!unique_names.empty()) {
2428  THROW_MAPD_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
2429  }
2430 }
std::string to_lower(const std::string &str)
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:114
string name
Definition: setup.py:35

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2436 of file DBHandler.cpp.

References THROW_MAPD_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::load_table_binary_columnar().

2437  {
2438  std::vector<int> desc_to_column_ids;
2439  if (column_names.empty()) {
2440  int col_idx = 0;
2441  for (const auto& cd : descs) {
2442  if (!cd->isGeoPhyCol) {
2443  desc_to_column_ids.push_back(col_idx);
2444  ++col_idx;
2445  }
2446  }
2447  } else {
2448  for (const auto& cd : descs) {
2449  if (!cd->isGeoPhyCol) {
2450  bool found = false;
2451  for (size_t j = 0; j < column_names.size(); ++j) {
2452  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
2453  found = true;
2454  desc_to_column_ids.push_back(j);
2455  break;
2456  }
2457  }
2458  if (!found) {
2459  if (!cd->columnType.get_notnull()) {
2460  desc_to_column_ids.push_back(-1);
2461  } else {
2462  THROW_MAPD_EXCEPTION("Column '" + cd->columnName +
2463  "' cannot be omitted due to NOT NULL constraint");
2464  }
2465  }
2466  }
2467  }
2468  }
2469  return desc_to_column_ids;
2470 }
std::string to_lower(const std::string &str)
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 1875 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

1876  {
1877  std::ostringstream oss;
1878  for (const auto& [table_name, col_names] : table_col_names) {
1879  oss << ":" << table_name;
1880  for (const auto& col_name : col_names) {
1881  oss << "," << col_name;
1882  }
1883  }
1884  return oss.str();
1885 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1281 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1282  {
1283  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1284  boost::regex::extended | boost::regex::icase};
1285  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1286  boost::sregex_token_iterator end;
1287  std::unordered_set<std::string> uc_column_names;
1288  std::unordered_set<std::string> uc_column_table_qualifiers;
1289  for (; tok_it != end; ++tok_it) {
1290  std::string column_name = *tok_it;
1291  std::vector<std::string> column_tokens;
1292  boost::split(column_tokens, column_name, boost::is_any_of("."));
1293  if (column_tokens.size() == 2) {
1294  // If the column name is qualified, take user's word.
1295  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1296  } else {
1297  uc_column_names.insert(to_upper(column_name));
1298  }
1299  }
1300  return {uc_column_names, uc_column_table_qualifiers};
1301 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 3287 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3288  {
3289  // get the recursive list of all files in the archive
3290  std::vector<std::string> files =
3291  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
3292 
3293  // report the list
3294  LOG(INFO) << "Found " << files.size() << " files in Archive "
3295  << remove_vsi_prefixes(archive_path);
3296  for (const auto& file : files) {
3297  LOG(INFO) << " " << file;
3298  }
3299 
3300  // scan the list for the first candidate file
3301  bool found_suitable_file = false;
3302  std::string file_name;
3303  for (const auto& file : files) {
3304  if (is_a_supported_geo_file(file, false)) {
3305  file_name = file;
3306  found_suitable_file = true;
3307  break;
3308  }
3309  }
3310 
3311  // if we didn't find anything
3312  if (!found_suitable_file) {
3313  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
3314  remove_vsi_prefixes(archive_path);
3315  file_name.clear();
3316  }
3317 
3318  // done
3319  return file_name;
3320 }
#define LOG(tag)
Definition: Logger.h:188
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:3218
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:4676
bool is_a_supported_geo_file(const std::string &path, bool include_gz)
Definition: DBHandler.cpp:3256

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 2614 of file DBHandler.cpp.

Referenced by DBHandler::load_table_binary_columnar().

2614  {
2615  if (!column.nulls.empty()) {
2616  return column.nulls.size();
2617  } else {
2618  // it is a very bold estimate but later we check it against REAL data
2619  // and if this function returns a wrong result (e.g. both int and string
2620  // vectors are filled with values), we get an error
2621  return column.data.int_col.size() + column.data.arr_col.size() +
2622  column.data.real_col.size() + column.data.str_col.size();
2623  }
2624 }

+ Here is the caller graph for this function:

SessionMap::iterator anonymous_namespace{DBHandler.cpp}::get_session_from_map ( const TSessionId &  session,
SessionMap session_map 
)

Definition at line 127 of file DBHandler.cpp.

References THROW_MAPD_EXCEPTION.

Referenced by DBHandler::get_session_it_unsafe().

128  {
129  auto session_it = session_map.find(session);
130  if (session_it == session_map.end()) {
131  THROW_MAPD_EXCEPTION("Session not valid.");
132  }
133  return session_it;
134 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:114

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 3274 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3274  {
3275  if (!path_has_valid_filename(path)) {
3276  return false;
3277  }
3278  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
3279  return true;
3280  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3281  boost::iends_with(path, ".tar.gz")) {
3282  return true;
3283  }
3284  return false;
3285 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3248

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path,
bool  include_gz 
)

Definition at line 3256 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), find_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3256  {
3257  if (!path_has_valid_filename(path)) {
3258  return false;
3259  }
3260  if (include_gz) {
3261  if (boost::iends_with(path, ".geojson.gz") || boost::iends_with(path, ".json.gz")) {
3262  return true;
3263  }
3264  }
3265  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
3266  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
3267  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
3268  boost::iends_with(path, ".gdb.zip")) {
3269  return true;
3270  }
3271  return false;
3272 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3248

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 3322 of file DBHandler.cpp.

Referenced by validate_import_file_path_if_local().

3322  {
3323  return (!boost::istarts_with(file_path, "s3://") &&
3324  !boost::istarts_with(file_path, "http://") &&
3325  !boost::istarts_with(file_path, "https://"));
3326 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 2739 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

2739  {
2740  RecordBatchVector batches;
2741  try {
2742  // TODO(wesm): Make this simpler in general, see ARROW-1600
2743  auto stream_buffer =
2744  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
2745  static_cast<int64_t>(stream.size()));
2746 
2747  arrow::io::BufferReader buf_reader(stream_buffer);
2748  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
2749  ARROW_ASSIGN_OR_THROW(batch_reader,
2750  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
2751 
2752  while (true) {
2753  std::shared_ptr<arrow::RecordBatch> batch;
2754  // Read batch (zero-copy) from the stream
2755  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
2756  if (batch == nullptr) {
2757  break;
2758  }
2759  batches.emplace_back(std::move(batch));
2760  }
2761  } catch (const std::exception& e) {
2762  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
2763  }
2764  return batches;
2765 }
#define LOG(tag)
Definition: Logger.h:188
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:2724
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:2726

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 3248 of file DBHandler.cpp.

References logger::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

3248  {
3249  auto filename = boost::filesystem::path(path).filename().string();
3250  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
3251  return false;
3252  }
3253  return true;
3254 }
std::string filename(char const *path)
Definition: Logger.cpp:62

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 3240 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3240  {
3241  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
3242  boost::istarts_with(path, "https://")) {
3243  return false;
3244  }
3245  return !boost::filesystem::path(path).is_absolute();
3246 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 3218 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

3218  {
3219  std::string path(path_in);
3220 
3221  // these will be first
3222  if (boost::istarts_with(path, "/vsizip/")) {
3223  boost::replace_first(path, "/vsizip/", "");
3224  } else if (boost::istarts_with(path, "/vsitar/")) {
3225  boost::replace_first(path, "/vsitar/", "");
3226  } else if (boost::istarts_with(path, "/vsigzip/")) {
3227  boost::replace_first(path, "/vsigzip/", "");
3228  }
3229 
3230  // then these
3231  if (boost::istarts_with(path, "/vsicurl/")) {
3232  boost::replace_first(path, "/vsicurl/", "");
3233  } else if (boost::istarts_with(path, "/vsis3/")) {
3234  boost::replace_first(path, "/vsis3/", "s3://");
3235  }
3236 
3237  return path;
3238 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 4123 of file DBHandler.cpp.

Referenced by DBHandler::import_geo_table().

4123  {
4124  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
4125  t == TDatumType::LINESTRING || t == TDatumType::POINT);
4126 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 3328 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_geo_table(), and DBHandler::import_table().

3328  {
3329  if (is_local_file(file_path)) {
3331  }
3332 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:613
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:3322

+ Here is the call graph for this function:

+ Here is the caller graph for this function: