OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

SessionMap::iterator get_session_from_map (const TSessionId &session, SessionMap &session_map)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path, bool include_gz)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 3571 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3571  {
3572  // check for compressed file or file bundle
3573  if (boost::iends_with(path, ".zip")) {
3574  // zip archive
3575  path = "/vsizip/" + path;
3576  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3577  boost::iends_with(path, ".tar.gz")) {
3578  // tar archive (compressed or uncompressed)
3579  path = "/vsitar/" + path;
3580  }
3581 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 3564 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3564  {
3565  // single gzip'd file (not an archive)?
3566  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
3567  path = "/vsigzip/" + path;
3568  }
3569 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 3542 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_MAPD_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3542  {
3543  // do we support network file access?
3544  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
3545 
3546  // modify head of filename based on source location
3547  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
3548  if (!gdal_network) {
3550  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3551  }
3552  // invoke GDAL CURL virtual file reader
3553  path = "/vsicurl/" + path;
3554  } else if (boost::istarts_with(path, "s3://")) {
3555  if (!gdal_network) {
3557  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3558  }
3559  // invoke GDAL S3 virtual file reader
3560  boost::replace_first(path, "s3://", "/vsis3/");
3561  }
3562 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:110
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:109

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2709 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::prepare_loader_generic().

2709  {
2710  if (td && td->nShards) {
2711  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2712  }
2713 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2715 of file DBHandler.cpp.

References setup::name, THROW_MAPD_EXCEPTION, and to_lower().

Referenced by DBHandler::prepare_loader_generic().

2716  {
2717  std::unordered_set<std::string> unique_names;
2718  for (const auto& name : column_names) {
2719  auto lower_name = to_lower(name);
2720  if (unique_names.find(lower_name) != unique_names.end()) {
2721  THROW_MAPD_EXCEPTION("Column " + name + " is mentioned multiple times");
2722  } else {
2723  unique_names.insert(lower_name);
2724  }
2725  }
2726  for (const auto& cd : descs) {
2727  auto iter = unique_names.find(to_lower(cd->columnName));
2728  if (iter != unique_names.end()) {
2729  unique_names.erase(iter);
2730  }
2731  }
2732  if (!unique_names.empty()) {
2733  THROW_MAPD_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
2734  }
2735 }
std::string to_lower(const std::string &str)
string name
Definition: setup.in.py:62
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:110

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2741 of file DBHandler.cpp.

References generate_TableFunctionsFactory_init::j, THROW_MAPD_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::load_table_binary_columnar_internal().

2742  {
2743  std::vector<int> desc_to_column_ids;
2744  if (column_names.empty()) {
2745  int col_idx = 0;
2746  for (const auto& cd : descs) {
2747  if (!cd->isGeoPhyCol) {
2748  desc_to_column_ids.push_back(col_idx);
2749  ++col_idx;
2750  }
2751  }
2752  } else {
2753  for (const auto& cd : descs) {
2754  if (!cd->isGeoPhyCol) {
2755  bool found = false;
2756  for (size_t j = 0; j < column_names.size(); ++j) {
2757  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
2758  found = true;
2759  desc_to_column_ids.push_back(j);
2760  break;
2761  }
2762  }
2763  if (!found) {
2764  if (!cd->columnType.get_notnull()) {
2765  desc_to_column_ids.push_back(-1);
2766  } else {
2767  THROW_MAPD_EXCEPTION("Column '" + cd->columnName +
2768  "' cannot be omitted due to NOT NULL constraint");
2769  }
2770  }
2771  }
2772  }
2773  }
2774  return desc_to_column_ids;
2775 }
std::string to_lower(const std::string &str)
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:110

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 2143 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

2144  {
2145  std::ostringstream oss;
2146  for (const auto& [table_name, col_names] : table_col_names) {
2147  oss << ":" << table_name;
2148  for (const auto& col_name : col_names) {
2149  oss << "," << col_name;
2150  }
2151  }
2152  return oss.str();
2153 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1545 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1546  {
1547  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1548  boost::regex::extended | boost::regex::icase};
1549  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1550  boost::sregex_token_iterator end;
1551  std::unordered_set<std::string> uc_column_names;
1552  std::unordered_set<std::string> uc_column_table_qualifiers;
1553  for (; tok_it != end; ++tok_it) {
1554  std::string column_name = *tok_it;
1555  std::vector<std::string> column_tokens;
1556  boost::split(column_tokens, column_name, boost::is_any_of("."));
1557  if (column_tokens.size() == 2) {
1558  // If the column name is qualified, take user's word.
1559  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1560  } else {
1561  uc_column_names.insert(to_upper(column_name));
1562  }
1563  }
1564  return {uc_column_names, uc_column_table_qualifiers};
1565 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 3652 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3653  {
3654  // get the recursive list of all files in the archive
3655  std::vector<std::string> files =
3656  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
3657 
3658  // report the list
3659  LOG(INFO) << "Found " << files.size() << " files in Archive "
3660  << remove_vsi_prefixes(archive_path);
3661  for (const auto& file : files) {
3662  LOG(INFO) << " " << file;
3663  }
3664 
3665  // scan the list for the first candidate file
3666  bool found_suitable_file = false;
3667  std::string file_name;
3668  for (const auto& file : files) {
3669  if (is_a_supported_geo_file(file, false)) {
3670  file_name = file;
3671  found_suitable_file = true;
3672  break;
3673  }
3674  }
3675 
3676  // if we didn't find anything
3677  if (!found_suitable_file) {
3678  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
3679  remove_vsi_prefixes(archive_path);
3680  file_name.clear();
3681  }
3682 
3683  // done
3684  return file_name;
3685 }
#define LOG(tag)
Definition: Logger.h:188
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:3583
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:4812
bool is_a_supported_geo_file(const std::string &path, bool include_gz)
Definition: DBHandler.cpp:3621

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 2901 of file DBHandler.cpp.

Referenced by DBHandler::load_table_binary_columnar_internal().

2901  {
2902  if (!column.nulls.empty()) {
2903  return column.nulls.size();
2904  } else {
2905  // it is a very bold estimate but later we check it against REAL data
2906  // and if this function returns a wrong result (e.g. both int and string
2907  // vectors are filled with values), we get an error
2908  return column.data.int_col.size() + column.data.arr_col.size() +
2909  column.data.real_col.size() + column.data.str_col.size();
2910  }
2911 }

+ Here is the caller graph for this function:

SessionMap::iterator anonymous_namespace{DBHandler.cpp}::get_session_from_map ( const TSessionId &  session,
SessionMap session_map 
)

Definition at line 123 of file DBHandler.cpp.

References THROW_MAPD_EXCEPTION.

Referenced by DBHandler::expire_idle_sessions_unsafe(), and DBHandler::get_session_it_unsafe().

124  {
125  auto session_it = session_map.find(session);
126  if (session_it == session_map.end()) {
127  THROW_MAPD_EXCEPTION("Session not valid.");
128  }
129  return session_it;
130 }
#define THROW_MAPD_EXCEPTION(errstr)
Definition: DBHandler.cpp:110

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 3639 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3639  {
3640  if (!path_has_valid_filename(path)) {
3641  return false;
3642  }
3643  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
3644  return true;
3645  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
3646  boost::iends_with(path, ".tar.gz")) {
3647  return true;
3648  }
3649  return false;
3650 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3613

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path,
bool  include_gz 
)

Definition at line 3621 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), find_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3621  {
3622  if (!path_has_valid_filename(path)) {
3623  return false;
3624  }
3625  if (include_gz) {
3626  if (boost::iends_with(path, ".geojson.gz") || boost::iends_with(path, ".json.gz")) {
3627  return true;
3628  }
3629  }
3630  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
3631  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
3632  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
3633  boost::iends_with(path, ".gdb.zip")) {
3634  return true;
3635  }
3636  return false;
3637 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:3613

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 3687 of file DBHandler.cpp.

Referenced by validate_import_file_path_if_local().

3687  {
3688  return (!boost::istarts_with(file_path, "s3://") &&
3689  !boost::istarts_with(file_path, "http://") &&
3690  !boost::istarts_with(file_path, "https://"));
3691 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 3128 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

3128  {
3129  RecordBatchVector batches;
3130  try {
3131  // TODO(wesm): Make this simpler in general, see ARROW-1600
3132  auto stream_buffer =
3133  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3134  static_cast<int64_t>(stream.size()));
3135 
3136  arrow::io::BufferReader buf_reader(stream_buffer);
3137  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3138  ARROW_ASSIGN_OR_THROW(batch_reader,
3139  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3140 
3141  while (true) {
3142  std::shared_ptr<arrow::RecordBatch> batch;
3143  // Read batch (zero-copy) from the stream
3144  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3145  if (batch == nullptr) {
3146  break;
3147  }
3148  batches.emplace_back(std::move(batch));
3149  }
3150  } catch (const std::exception& e) {
3151  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3152  }
3153  return batches;
3154 }
#define LOG(tag)
Definition: Logger.h:188
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3113
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3115

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 3613 of file DBHandler.cpp.

References logger::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

3613  {
3614  auto filename = boost::filesystem::path(path).filename().string();
3615  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
3616  return false;
3617  }
3618  return true;
3619 }
std::string filename(char const *path)
Definition: Logger.cpp:62

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 3605 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

3605  {
3606  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
3607  boost::istarts_with(path, "https://")) {
3608  return false;
3609  }
3610  return !boost::filesystem::path(path).is_absolute();
3611 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 3583 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

3583  {
3584  std::string path(path_in);
3585 
3586  // these will be first
3587  if (boost::istarts_with(path, "/vsizip/")) {
3588  boost::replace_first(path, "/vsizip/", "");
3589  } else if (boost::istarts_with(path, "/vsitar/")) {
3590  boost::replace_first(path, "/vsitar/", "");
3591  } else if (boost::istarts_with(path, "/vsigzip/")) {
3592  boost::replace_first(path, "/vsigzip/", "");
3593  }
3594 
3595  // then these
3596  if (boost::istarts_with(path, "/vsicurl/")) {
3597  boost::replace_first(path, "/vsicurl/", "");
3598  } else if (boost::istarts_with(path, "/vsis3/")) {
3599  boost::replace_first(path, "/vsis3/", "s3://");
3600  }
3601 
3602  return path;
3603 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 4505 of file DBHandler.cpp.

References LINESTRING, MULTIPOLYGON, POINT, and POLYGON.

Referenced by DBHandler::import_geo_table().

4505  {
4506  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
4508 }
#define LINESTRING
#define MULTIPOLYGON
#define POINT
char * t
#define POLYGON

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 3693 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_geo_table(), and DBHandler::import_table().

3693  {
3694  if (is_local_file(file_path)) {
3696  }
3697 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:613
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:3687

+ Here is the call graph for this function:

+ Here is the caller graph for this function: