OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

bool dashboard_exists (const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
TTableRefreshInfo get_refresh_info (const TableDescriptor *td)
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
std::unique_ptr
< Catalog_Namespace::CustomExpression
create_custom_expr_from_thrift_obj (const TCustomExpression &t_custom_expr, const Catalog &catalog)
 
TCustomExpression create_thrift_obj_from_custom_expr (const CustomExpression &custom_expr, const Catalog &catalog)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 
std::string TTypeInfo_TypeToString (const TDatumType::type &t)
 
std::string get_mismatch_attr_warning_text (const std::string &table_name, const std::string &file_path, const std::string &column_name, const std::string &attr, const std::string &got, const std::string &expected)
 
bool check_and_reset_in_memory_system_table (const Catalog &catalog, const TableDescriptor &td)
 
void check_in_memory_system_table_query (const std::vector< std::vector< std::string >> &selected_tables)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 4001 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4001  {
4002  // check for compressed file or file bundle
4003  if (boost::iends_with(path, ".zip")) {
4004  // zip archive
4005  path = "/vsizip/" + path;
4006  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4007  boost::iends_with(path, ".tar.gz")) {
4008  // tar archive (compressed or uncompressed)
4009  path = "/vsitar/" + path;
4010  }
4011 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 3994 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3994  {
3995  // single gzip'd file (not an archive)?
3996  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
3997  path = "/vsigzip/" + path;
3998  }
3999 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 3972 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

3972  {
3973  // do we support network file access?
3974  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
3975 
3976  // modify head of filename based on source location
3977  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
3978  if (!gdal_network) {
3980  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
3981  }
3982  // invoke GDAL CURL virtual file reader
3983  path = "/vsicurl/" + path;
3984  } else if (boost::istarts_with(path, "s3://")) {
3985  if (!gdal_network) {
3987  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
3988  }
3989  // invoke GDAL S3 virtual file reader
3990  boost::replace_first(path, "s3://", "/vsis3/");
3991  }
3992 }
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::check_and_reset_in_memory_system_table ( const Catalog catalog,
const TableDescriptor td 
)

Definition at line 6639 of file DBHandler.cpp.

References g_enable_system_tables, Catalog_Namespace::Catalog::getMetadataForTable(), lockmgr::TableLockMgrImpl< TableDataLockMgr >::getWriteLockForTable(), lockmgr::TableLockMgrImpl< TableSchemaLockMgr >::getWriteLockForTable(), TableDescriptor::is_in_memory_system_table, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, and TableDescriptor::tableName.

Referenced by check_in_memory_system_table_query(), and DBHandler::checkInMemorySystemTableQuery().

6640  {
6641  if (td.is_in_memory_system_table) {
6642  if (g_enable_system_tables) {
6643  // Reset system table fragmenter in order to force chunk metadata refetch.
6644  auto table_schema_lock =
6646  auto table_data_lock =
6648  catalog.removeFragmenterForTable(td.tableId);
6649  catalog.getMetadataForTable(td.tableId, true);
6650  return true;
6651  } else {
6652  throw std::runtime_error(
6653  "Query cannot be executed because use of system tables is currently "
6654  "disabled.");
6655  }
6656  }
6657  return false;
6658 }
std::string tableName
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
bool is_in_memory_system_table
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4064
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_in_memory_system_table_query ( const std::vector< std::vector< std::string >> &  selected_tables)

Definition at line 6660 of file DBHandler.cpp.

References CHECK, check_and_reset_in_memory_system_table(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and shared::kInfoSchemaDbName.

Referenced by DBHandler::parse_to_ra().

6661  {
6662  const auto info_schema_catalog =
6664  if (info_schema_catalog) {
6665  for (const auto& table : selected_tables) {
6666  if (table[1] == shared::kInfoSchemaDbName) {
6667  auto td = info_schema_catalog->getMetadataForTable(table[0], false);
6668  CHECK(td);
6669  check_and_reset_in_memory_system_table(*info_schema_catalog, *td);
6670  }
6671  }
6672  }
6673 }
const std::string kInfoSchemaDbName
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
bool check_and_reset_in_memory_system_table(const Catalog &catalog, const TableDescriptor &td)
Definition: DBHandler.cpp:6639

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 2887 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::prepare_loader_generic().

2887  {
2888  if (td && td->nShards) {
2889  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
2890  }
2891 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2893 of file DBHandler.cpp.

References setup::name, THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::prepare_loader_generic().

2894  {
2895  std::unordered_set<std::string> unique_names;
2896  for (const auto& name : column_names) {
2897  auto lower_name = to_lower(name);
2898  if (unique_names.find(lower_name) != unique_names.end()) {
2899  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
2900  } else {
2901  unique_names.insert(lower_name);
2902  }
2903  }
2904  for (const auto& cd : descs) {
2905  auto iter = unique_names.find(to_lower(cd->columnName));
2906  if (iter != unique_names.end()) {
2907  unique_names.erase(iter);
2908  }
2909  }
2910  if (!unique_names.empty()) {
2911  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
2912  }
2913 }
std::string to_lower(const std::string &str)
string name
Definition: setup.in.py:72
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 2919 of file DBHandler.cpp.

References THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::loadTableBinaryColumnarInternal().

2920  {
2921  std::vector<int> desc_to_column_ids;
2922  if (column_names.empty()) {
2923  int col_idx = 0;
2924  for (const auto& cd : descs) {
2925  if (!cd->isGeoPhyCol) {
2926  desc_to_column_ids.push_back(col_idx);
2927  ++col_idx;
2928  }
2929  }
2930  } else {
2931  for (const auto& cd : descs) {
2932  if (!cd->isGeoPhyCol) {
2933  bool found = false;
2934  for (size_t j = 0; j < column_names.size(); ++j) {
2935  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
2936  found = true;
2937  desc_to_column_ids.push_back(j);
2938  break;
2939  }
2940  }
2941  if (!found) {
2942  if (!cd->columnType.get_notnull()) {
2943  desc_to_column_ids.push_back(-1);
2944  } else {
2945  THROW_DB_EXCEPTION("Column '" + cd->columnName +
2946  "' cannot be omitted due to NOT NULL constraint");
2947  }
2948  }
2949  }
2950  }
2951  }
2952  return desc_to_column_ids;
2953 }
std::string to_lower(const std::string &str)
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<Catalog_Namespace::CustomExpression> anonymous_namespace{DBHandler.cpp}::create_custom_expr_from_thrift_obj ( const TCustomExpression &  t_custom_expr,
const Catalog catalog 
)

Definition at line 4373 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::Catalog::getMetadataForTable(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::create_custom_expression().

4375  {
4376  if (t_custom_expr.data_source_name.empty()) {
4377  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4378  }
4379  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4380  << "Unexpected data source type: "
4381  << static_cast<int>(t_custom_expr.data_source_type);
4382  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4383  if (!td) {
4384  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4385  t_custom_expr.data_source_name + "\" that does not exist.")
4386  }
4387  DataSourceType data_source_type = DataSourceType::TABLE;
4388  return std::make_unique<CustomExpression>(
4389  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4390 }
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TCustomExpression anonymous_namespace{DBHandler.cpp}::create_thrift_obj_from_custom_expr ( const CustomExpression &  custom_expr,
const Catalog catalog 
)

Definition at line 4392 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::CustomExpression::data_source_id, Catalog_Namespace::CustomExpression::data_source_type, Catalog_Namespace::CustomExpression::expression_json, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::CustomExpression::id, Catalog_Namespace::CustomExpression::is_deleted, LOG, Catalog_Namespace::CustomExpression::name, TableDescriptor::tableName, and logger::WARNING.

Referenced by DBHandler::get_custom_expressions().

4393  {
4394  TCustomExpression t_custom_expr;
4395  t_custom_expr.id = custom_expr.id;
4396  t_custom_expr.name = custom_expr.name;
4397  t_custom_expr.expression_json = custom_expr.expression_json;
4398  t_custom_expr.data_source_id = custom_expr.data_source_id;
4399  t_custom_expr.is_deleted = custom_expr.is_deleted;
4400  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4401  << "Unexpected data source type: "
4402  << static_cast<int>(custom_expr.data_source_type);
4403  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4404  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4405  if (td) {
4406  t_custom_expr.data_source_name = td->tableName;
4407  } else {
4408  LOG(WARNING)
4409  << "Custom expression references a deleted data source. Custom expression id: "
4410  << custom_expr.id << ", name: " << custom_expr.name;
4411  }
4412  return t_custom_expr;
4413 }
std::string tableName
#define LOG(tag)
Definition: Logger.h:285
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::dashboard_exists ( const Catalog_Namespace::Catalog cat,
const int32_t  user_id,
const std::string &  dashboard_name 
)

Definition at line 149 of file DBHandler.cpp.

References Catalog_Namespace::Catalog::getMetadataForDashboard(), and to_string().

Referenced by DBHandler::create_dashboard().

151  {
152  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
153 }
std::string to_string(char const *&&v)
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 2141 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

2142  {
2143  std::ostringstream oss;
2144  for (const auto& [table_name, col_names] : table_col_names) {
2145  oss << ":" << table_name;
2146  for (const auto& col_name : col_names) {
2147  oss << "," << col_name;
2148  }
2149  }
2150  return oss.str();
2151 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1496 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1497  {
1498  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1499  boost::regex::extended | boost::regex::icase};
1500  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1501  boost::sregex_token_iterator end;
1502  std::unordered_set<std::string> uc_column_names;
1503  std::unordered_set<std::string> uc_column_table_qualifiers;
1504  for (; tok_it != end; ++tok_it) {
1505  std::string column_name = *tok_it;
1506  std::vector<std::string> column_tokens;
1507  boost::split(column_tokens, column_name, boost::is_any_of("."));
1508  if (column_tokens.size() == 2) {
1509  // If the column name is qualified, take user's word.
1510  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1511  } else {
1512  uc_column_names.insert(to_upper(column_name));
1513  }
1514  }
1515  return {uc_column_names, uc_column_table_qualifiers};
1516 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 4080 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4081  {
4082  // get the recursive list of all files in the archive
4083  std::vector<std::string> files =
4084  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4085 
4086  // report the list
4087  LOG(INFO) << "Found " << files.size() << " files in Archive "
4088  << remove_vsi_prefixes(archive_path);
4089  for (const auto& file : files) {
4090  LOG(INFO) << " " << file;
4091  }
4092 
4093  // scan the list for the first candidate file
4094  bool found_suitable_file = false;
4095  std::string file_name;
4096  for (const auto& file : files) {
4097  if (is_a_supported_geo_file(file)) {
4098  file_name = file;
4099  found_suitable_file = true;
4100  break;
4101  }
4102  }
4103 
4104  // if we didn't find anything
4105  if (!found_suitable_file) {
4106  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4107  remove_vsi_prefixes(archive_path);
4108  file_name.clear();
4109  }
4110 
4111  // done
4112  return file_name;
4113 }
#define LOG(tag)
Definition: Logger.h:285
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4013
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5230
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4051

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 3221 of file DBHandler.cpp.

Referenced by DBHandler::loadTableBinaryColumnarInternal().

3221  {
3222  if (!column.nulls.empty()) {
3223  return column.nulls.size();
3224  } else {
3225  // it is a very bold estimate but later we check it against REAL data
3226  // and if this function returns a wrong result (e.g. both int and string
3227  // vectors are filled with values), we get an error
3228  return column.data.int_col.size() + column.data.arr_col.size() +
3229  column.data.real_col.size() + column.data.str_col.size();
3230  }
3231 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::get_mismatch_attr_warning_text ( const std::string &  table_name,
const std::string &  file_path,
const std::string &  column_name,
const std::string &  attr,
const std::string &  got,
const std::string &  expected 
)

Definition at line 5175 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5180  {
5181  return "Issue encountered in geo/raster file '" + file_path +
5182  "' while appending to table '" + table_name + "'. Column '" + column_name +
5183  "' " + attr + " mismatch (got '" + got + "', expected '" + expected + "')";
5184 }

+ Here is the caller graph for this function:

TTableRefreshInfo anonymous_namespace{DBHandler.cpp}::get_refresh_info ( const TableDescriptor td)

Definition at line 2306 of file DBHandler.cpp.

References QueryRunner::ALL, foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE, foreign_storage::ForeignTable::APPEND_REFRESH_UPDATE_TYPE, CHECK, shared::convert_temporal_to_iso_format(), TableDescriptor::isForeignTable(), kTIMESTAMP, foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE, foreign_storage::ForeignTable::NULL_REFRESH_TIME, foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY, foreign_storage::ForeignTable::REFRESH_START_DATE_TIME_KEY, foreign_storage::ForeignTable::REFRESH_TIMING_TYPE_KEY, foreign_storage::ForeignTable::REFRESH_UPDATE_TYPE_KEY, foreign_storage::ForeignTable::SCHEDULE_REFRESH_TIMING_TYPE, and UNREACHABLE.

Referenced by DBHandler::get_table_details_impl().

2306  {
2307  CHECK(td->isForeignTable());
2308  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2309  CHECK(foreign_table);
2310  TTableRefreshInfo refresh_info;
2311  const auto& update_type =
2313  CHECK(update_type.has_value());
2314  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2315  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2316  } else if (update_type.value() ==
2318  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2319  } else {
2320  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2321  }
2322 
2323  const auto& timing_type =
2325  CHECK(timing_type.has_value());
2326  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2327  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2328  refresh_info.interval_count = -1;
2329  } else if (timing_type.value() ==
2331  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2332  const auto& start_date_time = foreign_table->getOption(
2334  CHECK(start_date_time.has_value());
2335  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2336  refresh_info.start_date_time =
2337  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2338  const auto& interval =
2339  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2340  CHECK(interval.has_value());
2341  const auto& interval_str = interval.value();
2342  refresh_info.interval_count =
2343  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2344  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2345  if (interval_type == 'H') {
2346  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2347  } else if (interval_type == 'D') {
2348  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2349  } else if (interval_type == 'S') {
2350  // This use case is for development only.
2351  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2352  } else {
2353  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2354  }
2355  } else {
2356  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2357  }
2358  if (foreign_table->last_refresh_time !=
2360  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2361  {kTIMESTAMP}, foreign_table->last_refresh_time);
2362  }
2363  if (foreign_table->next_refresh_time !=
2365  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2366  {kTIMESTAMP}, foreign_table->next_refresh_time);
2367  }
2368  return refresh_info;
2369 }
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:109
#define UNREACHABLE()
Definition: Logger.h:337
bool isForeignTable() const
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:50
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
#define CHECK(condition)
Definition: Logger.h:291
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:54
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:52

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 4067 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4067  {
4068  if (!path_has_valid_filename(path)) {
4069  return false;
4070  }
4071  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4072  return true;
4073  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4074  boost::iends_with(path, ".tar.gz")) {
4075  return true;
4076  }
4077  return false;
4078 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4043

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path)

Definition at line 4051 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by find_first_geo_file_in_archive().

4051  {
4052  if (!path_has_valid_filename(path)) {
4053  return false;
4054  }
4055  // this is now just for files that we want to recognize
4056  // as geo when inside an archive (see below)
4057  // @TODO(se) make this more flexible?
4058  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4059  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4060  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4061  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4062  return true;
4063  }
4064  return false;
4065 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4043

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 4115 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), and validate_import_file_path_if_local().

4115  {
4116  return (!boost::istarts_with(file_path, "s3://") &&
4117  !boost::istarts_with(file_path, "http://") &&
4118  !boost::istarts_with(file_path, "https://"));
4119 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 3397 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

3397  {
3398  RecordBatchVector batches;
3399  try {
3400  // TODO(wesm): Make this simpler in general, see ARROW-1600
3401  auto stream_buffer =
3402  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3403  static_cast<int64_t>(stream.size()));
3404 
3405  arrow::io::BufferReader buf_reader(stream_buffer);
3406  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3407  ARROW_ASSIGN_OR_THROW(batch_reader,
3408  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3409 
3410  while (true) {
3411  std::shared_ptr<arrow::RecordBatch> batch;
3412  // Read batch (zero-copy) from the stream
3413  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3414  if (batch == nullptr) {
3415  break;
3416  }
3417  batches.emplace_back(std::move(batch));
3418  }
3419  } catch (const std::exception& e) {
3420  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3421  }
3422  return batches;
3423 }
#define LOG(tag)
Definition: Logger.h:285
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3382
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3384

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 4043 of file DBHandler.cpp.

References nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

4043  {
4044  auto filename = boost::filesystem::path(path).filename().string();
4045  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4046  return false;
4047  }
4048  return true;
4049 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 4035 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4035  {
4036  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4037  boost::istarts_with(path, "https://")) {
4038  return false;
4039  }
4040  return !boost::filesystem::path(path).is_absolute();
4041 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 4013 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

4013  {
4014  std::string path(path_in);
4015 
4016  // these will be first
4017  if (boost::istarts_with(path, "/vsizip/")) {
4018  boost::replace_first(path, "/vsizip/", "");
4019  } else if (boost::istarts_with(path, "/vsitar/")) {
4020  boost::replace_first(path, "/vsitar/", "");
4021  } else if (boost::istarts_with(path, "/vsigzip/")) {
4022  boost::replace_first(path, "/vsigzip/", "");
4023  }
4024 
4025  // then these
4026  if (boost::istarts_with(path, "/vsicurl/")) {
4027  boost::replace_first(path, "/vsicurl/", "");
4028  } else if (boost::istarts_with(path, "/vsis3/")) {
4029  boost::replace_first(path, "/vsis3/", "s3://");
4030  }
4031 
4032  return path;
4033 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 5163 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5163  {
5164  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5165  t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5166  t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5167 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::TTypeInfo_TypeToString ( const TDatumType::type &  t)

Definition at line 5169 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5169  {
5170  std::stringstream ss;
5171  ss << t;
5172  return ss.str();
5173 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 4121 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_table(), and DBHandler::importGeoTableSingle().

4121  {
4122  if (is_local_file(file_path)) {
4124  file_path, ddl_utils::DataTransferType::IMPORT, true);
4125  }
4126 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:785
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4115

+ Here is the call graph for this function:

+ Here is the caller graph for this function: