OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{DBHandler.cpp} Namespace Reference

Classes

struct  ForceDisconnect
 
struct  ProjectionTokensForCompletion
 

Functions

bool dashboard_exists (const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
 
ProjectionTokensForCompletion extract_projection_tokens_for_completion (const std::string &sql)
 
std::string dump_table_col_names (const std::map< std::string, std::vector< std::string >> &table_col_names)
 
TTableRefreshInfo get_refresh_info (const TableDescriptor *td)
 
ScopeGuard pause_and_resume_executor_queue ()
 
void check_table_not_sharded (const TableDescriptor *td)
 
void check_valid_column_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
std::vector< int > column_ids_by_names (const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
 
void log_cache_size (const Catalog_Namespace::Catalog &cat)
 
void log_system_cpu_memory_status (std::string const &query, const Catalog_Namespace::Catalog &cat)
 
std::string get_load_tag (const std::string &load_tag, const std::string &table_name)
 
std::string get_import_tag (const std::string &import_tag, const std::string &table_name, const std::string &file_path)
 
size_t get_column_size (const TColumn &column)
 
RecordBatchVector loadArrowStream (const std::string &stream)
 
void add_vsi_network_prefix (std::string &path)
 
void add_vsi_geo_prefix (std::string &path)
 
void add_vsi_archive_prefix (std::string &path)
 
std::string remove_vsi_prefixes (const std::string &path_in)
 
bool path_is_relative (const std::string &path)
 
bool path_has_valid_filename (const std::string &path)
 
bool is_a_supported_geo_file (const std::string &path)
 
bool is_a_supported_archive_file (const std::string &path)
 
std::string find_first_geo_file_in_archive (const std::string &archive_path, const import_export::CopyParams &copy_params)
 
bool is_local_file (const std::string &file_path)
 
void validate_import_file_path_if_local (const std::string &file_path)
 
std::unique_ptr
< Catalog_Namespace::CustomExpression
create_custom_expr_from_thrift_obj (const TCustomExpression &t_custom_expr, const Catalog &catalog)
 
TCustomExpression create_thrift_obj_from_custom_expr (const CustomExpression &custom_expr, const Catalog &catalog)
 
bool TTypeInfo_IsGeo (const TDatumType::type &t)
 
std::string TTypeInfo_TypeToString (const TDatumType::type &t)
 
std::string get_mismatch_attr_warning_text (const std::string &table_name, const std::string &file_path, const std::string &column_name, const std::string &attr, const std::string &got, const std::string &expected)
 
bool check_and_reset_in_memory_system_table (const Catalog &catalog, const TableDescriptor &td)
 
void check_in_memory_system_table_query (const std::vector< std::vector< std::string >> &selected_tables)
 

Function Documentation

void anonymous_namespace{DBHandler.cpp}::add_vsi_archive_prefix ( std::string &  path)

Definition at line 4210 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4210  {
4211  // check for compressed file or file bundle
4212  if (boost::iends_with(path, ".zip")) {
4213  // zip archive
4214  path = "/vsizip/" + path;
4215  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4216  boost::iends_with(path, ".tar.gz")) {
4217  // tar archive (compressed or uncompressed)
4218  path = "/vsitar/" + path;
4219  }
4220 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_geo_prefix ( std::string &  path)

Definition at line 4203 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4203  {
4204  // single gzip'd file (not an archive)?
4205  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
4206  path = "/vsigzip/" + path;
4207  }
4208 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::add_vsi_network_prefix ( std::string &  path)

Definition at line 4181 of file DBHandler.cpp.

References Geospatial::GDAL::supportsNetworkFileAccess(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4181  {
4182  // do we support network file access?
4183  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
4184 
4185  // modify head of filename based on source location
4186  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
4187  if (!gdal_network) {
4189  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
4190  }
4191  // invoke GDAL CURL virtual file reader
4192  path = "/vsicurl/" + path;
4193  } else if (boost::istarts_with(path, "s3://")) {
4194  if (!gdal_network) {
4196  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
4197  }
4198  // invoke GDAL S3 virtual file reader
4199  boost::replace_first(path, "s3://", "/vsis3/");
4200  }
4201 }
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::check_and_reset_in_memory_system_table ( const Catalog catalog,
const TableDescriptor td 
)

Definition at line 6825 of file DBHandler.cpp.

References g_enable_system_tables, Catalog_Namespace::Catalog::getMetadataForTable(), lockmgr::TableLockMgrImpl< TableDataLockMgr >::getWriteLockForTable(), lockmgr::TableLockMgrImpl< TableSchemaLockMgr >::getWriteLockForTable(), TableDescriptor::is_in_memory_system_table, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, and TableDescriptor::tableName.

Referenced by check_in_memory_system_table_query(), and DBHandler::checkInMemorySystemTableQuery().

6826  {
6827  if (td.is_in_memory_system_table) {
6828  if (g_enable_system_tables) {
6829  // Reset system table fragmenter in order to force chunk metadata refetch.
6830  auto table_schema_lock =
6832  auto table_data_lock =
6834  catalog.removeFragmenterForTable(td.tableId);
6835  catalog.getMetadataForTable(td.tableId, true);
6836  return true;
6837  } else {
6838  throw std::runtime_error(
6839  "Query cannot be executed because use of system tables is currently "
6840  "disabled.");
6841  }
6842  }
6843  return false;
6844 }
std::string tableName
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
bool is_in_memory_system_table
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4260
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_in_memory_system_table_query ( const std::vector< std::vector< std::string >> &  selected_tables)

Definition at line 6846 of file DBHandler.cpp.

References CHECK, check_and_reset_in_memory_system_table(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and shared::kInfoSchemaDbName.

Referenced by DBHandler::parse_to_ra().

6847  {
6848  const auto info_schema_catalog =
6850  if (info_schema_catalog) {
6851  for (const auto& table : selected_tables) {
6852  if (table[1] == shared::kInfoSchemaDbName) {
6853  auto td = info_schema_catalog->getMetadataForTable(table[0], false);
6854  CHECK(td);
6855  check_and_reset_in_memory_system_table(*info_schema_catalog, *td);
6856  }
6857  }
6858  }
6859 }
const std::string kInfoSchemaDbName
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK(condition)
Definition: Logger.h:291
bool check_and_reset_in_memory_system_table(const Catalog &catalog, const TableDescriptor &td)
Definition: DBHandler.cpp:6825

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_table_not_sharded ( const TableDescriptor td)

Definition at line 3099 of file DBHandler.cpp.

References TableDescriptor::nShards.

Referenced by DBHandler::prepare_loader_generic().

3099  {
3100  if (td && td->nShards) {
3101  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
3102  }
3103 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::check_valid_column_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 3105 of file DBHandler.cpp.

References setup::name, THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::prepare_loader_generic().

3106  {
3107  std::unordered_set<std::string> unique_names;
3108  for (const auto& name : column_names) {
3109  auto lower_name = to_lower(name);
3110  if (unique_names.find(lower_name) != unique_names.end()) {
3111  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
3112  } else {
3113  unique_names.insert(lower_name);
3114  }
3115  }
3116  for (const auto& cd : descs) {
3117  auto iter = unique_names.find(to_lower(cd->columnName));
3118  if (iter != unique_names.end()) {
3119  unique_names.erase(iter);
3120  }
3121  }
3122  if (!unique_names.empty()) {
3123  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
3124  }
3125 }
std::string to_lower(const std::string &str)
string name
Definition: setup.in.py:72
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<int> anonymous_namespace{DBHandler.cpp}::column_ids_by_names ( const std::list< const ColumnDescriptor * > &  descs,
const std::vector< std::string > &  column_names 
)

Definition at line 3131 of file DBHandler.cpp.

References THROW_DB_EXCEPTION, and to_lower().

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::load_table_binary_columnar().

3132  {
3133  std::vector<int> desc_to_column_ids;
3134  if (column_names.empty()) {
3135  int col_idx = 0;
3136  for (const auto& cd : descs) {
3137  if (!cd->isGeoPhyCol) {
3138  desc_to_column_ids.push_back(col_idx);
3139  ++col_idx;
3140  }
3141  }
3142  } else {
3143  for (const auto& cd : descs) {
3144  if (!cd->isGeoPhyCol) {
3145  bool found = false;
3146  for (size_t j = 0; j < column_names.size(); ++j) {
3147  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
3148  found = true;
3149  desc_to_column_ids.push_back(j);
3150  break;
3151  }
3152  }
3153  if (!found) {
3154  if (!cd->columnType.get_notnull()) {
3155  desc_to_column_ids.push_back(-1);
3156  } else {
3157  THROW_DB_EXCEPTION("Column '" + cd->columnName +
3158  "' cannot be omitted due to NOT NULL constraint");
3159  }
3160  }
3161  }
3162  }
3163  }
3164  return desc_to_column_ids;
3165 }
std::string to_lower(const std::string &str)
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<Catalog_Namespace::CustomExpression> anonymous_namespace{DBHandler.cpp}::create_custom_expr_from_thrift_obj ( const TCustomExpression &  t_custom_expr,
const Catalog catalog 
)

Definition at line 4580 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::Catalog::getMetadataForTable(), and THROW_DB_EXCEPTION.

Referenced by DBHandler::create_custom_expression().

4582  {
4583  if (t_custom_expr.data_source_name.empty()) {
4584  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4585  }
4586  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4587  << "Unexpected data source type: "
4588  << static_cast<int>(t_custom_expr.data_source_type);
4589  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4590  if (!td) {
4591  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4592  t_custom_expr.data_source_name + "\" that does not exist.")
4593  }
4594  DataSourceType data_source_type = DataSourceType::TABLE;
4595  return std::make_unique<CustomExpression>(
4596  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4597 }
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

TCustomExpression anonymous_namespace{DBHandler.cpp}::create_thrift_obj_from_custom_expr ( const CustomExpression &  custom_expr,
const Catalog catalog 
)

Definition at line 4599 of file DBHandler.cpp.

References CHECK, Catalog_Namespace::CustomExpression::data_source_id, Catalog_Namespace::CustomExpression::data_source_type, Catalog_Namespace::CustomExpression::expression_json, Catalog_Namespace::Catalog::getMetadataForTable(), Catalog_Namespace::CustomExpression::id, Catalog_Namespace::CustomExpression::is_deleted, LOG, Catalog_Namespace::CustomExpression::name, TableDescriptor::tableName, and logger::WARNING.

Referenced by DBHandler::get_custom_expressions().

4600  {
4601  TCustomExpression t_custom_expr;
4602  t_custom_expr.id = custom_expr.id;
4603  t_custom_expr.name = custom_expr.name;
4604  t_custom_expr.expression_json = custom_expr.expression_json;
4605  t_custom_expr.data_source_id = custom_expr.data_source_id;
4606  t_custom_expr.is_deleted = custom_expr.is_deleted;
4607  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4608  << "Unexpected data source type: "
4609  << static_cast<int>(custom_expr.data_source_type);
4610  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4611  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4612  if (td) {
4613  t_custom_expr.data_source_name = td->tableName;
4614  } else {
4615  LOG(WARNING)
4616  << "Custom expression references a deleted data source. Custom expression id: "
4617  << custom_expr.id << ", name: " << custom_expr.name;
4618  }
4619  return t_custom_expr;
4620 }
std::string tableName
#define LOG(tag)
Definition: Logger.h:285
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::dashboard_exists ( const Catalog_Namespace::Catalog cat,
const int32_t  user_id,
const std::string &  dashboard_name 
)

Definition at line 152 of file DBHandler.cpp.

References Catalog_Namespace::Catalog::getMetadataForDashboard(), and to_string().

Referenced by DBHandler::create_dashboard().

154  {
155  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
156 }
std::string to_string(char const *&&v)
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::dump_table_col_names ( const std::map< std::string, std::vector< std::string >> &  table_col_names)

Definition at line 2304 of file DBHandler.cpp.

Referenced by DBHandler::get_result_row_for_pixel().

2305  {
2306  std::ostringstream oss;
2307  for (const auto& [table_name, col_names] : table_col_names) {
2308  oss << ":" << table_name;
2309  for (const auto& col_name : col_names) {
2310  oss << "," << col_name;
2311  }
2312  }
2313  return oss.str();
2314 }

+ Here is the caller graph for this function:

ProjectionTokensForCompletion anonymous_namespace{DBHandler.cpp}::extract_projection_tokens_for_completion ( const std::string &  sql)

Definition at line 1637 of file DBHandler.cpp.

References split(), and to_upper().

Referenced by DBHandler::get_completion_hints().

1638  {
1639  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1640  boost::regex::extended | boost::regex::icase};
1641  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1642  boost::sregex_token_iterator end;
1643  std::unordered_set<std::string> uc_column_names;
1644  std::unordered_set<std::string> uc_column_table_qualifiers;
1645  for (; tok_it != end; ++tok_it) {
1646  std::string column_name = *tok_it;
1647  std::vector<std::string> column_tokens;
1648  boost::split(column_tokens, column_name, boost::is_any_of("."));
1649  if (column_tokens.size() == 2) {
1650  // If the column name is qualified, take user's word.
1651  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1652  } else {
1653  uc_column_names.insert(to_upper(column_name));
1654  }
1655  }
1656  return {uc_column_names, uc_column_table_qualifiers};
1657 }
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::string to_upper(const std::string &str)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive ( const std::string &  archive_path,
const import_export::CopyParams copy_params 
)

Definition at line 4289 of file DBHandler.cpp.

References import_export::Importer::gdalGetAllFilesInArchive(), logger::INFO, is_a_supported_geo_file(), LOG, and remove_vsi_prefixes().

Referenced by DBHandler::detect_column_types(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4290  {
4291  // get the recursive list of all files in the archive
4292  std::vector<std::string> files =
4293  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4294 
4295  // report the list
4296  LOG(INFO) << "Found " << files.size() << " files in Archive "
4297  << remove_vsi_prefixes(archive_path);
4298  for (const auto& file : files) {
4299  LOG(INFO) << " " << file;
4300  }
4301 
4302  // scan the list for the first candidate file
4303  bool found_suitable_file = false;
4304  std::string file_name;
4305  for (const auto& file : files) {
4306  if (is_a_supported_geo_file(file)) {
4307  file_name = file;
4308  found_suitable_file = true;
4309  break;
4310  }
4311  }
4312 
4313  // if we didn't find anything
4314  if (!found_suitable_file) {
4315  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4316  remove_vsi_prefixes(archive_path);
4317  file_name.clear();
4318  }
4319 
4320  // done
4321  return file_name;
4322 }
#define LOG(tag)
Definition: Logger.h:285
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4222
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5130
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4260

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t anonymous_namespace{DBHandler.cpp}::get_column_size ( const TColumn &  column)

Definition at line 3454 of file DBHandler.cpp.

Referenced by DBHandler::load_table_binary_columnar().

3454  {
3455  if (!column.nulls.empty()) {
3456  return column.nulls.size();
3457  } else {
3458  // it is a very bold estimate but later we check it against REAL data
3459  // and if this function returns a wrong result (e.g. both int and string
3460  // vectors are filled with values), we get an error
3461  return column.data.int_col.size() + column.data.arr_col.size() +
3462  column.data.real_col.size() + column.data.str_col.size();
3463  }
3464 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::get_import_tag ( const std::string &  import_tag,
const std::string &  table_name,
const std::string &  file_path 
)

Definition at line 3315 of file DBHandler.cpp.

Referenced by DBHandler::import_table(), and DBHandler::importGeoTableSingle().

3317  {
3318  std::ostringstream oss;
3319  oss << import_tag << "(" << table_name << ", file_path:" << file_path << ")";
3320  return oss.str();
3321 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::get_load_tag ( const std::string &  load_tag,
const std::string &  table_name 
)

Definition at line 3309 of file DBHandler.cpp.

Referenced by DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), and DBHandler::load_table_binary_columnar().

3309  {
3310  std::ostringstream oss;
3311  oss << load_tag << "(" << table_name << ")";
3312  return oss.str();
3313 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::get_mismatch_attr_warning_text ( const std::string &  table_name,
const std::string &  file_path,
const std::string &  column_name,
const std::string &  attr,
const std::string &  got,
const std::string &  expected 
)

Definition at line 5383 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5388  {
5389  return "Issue encountered in geo/raster file '" + file_path +
5390  "' while appending to table '" + table_name + "'. Column '" + column_name +
5391  "' " + attr + " mismatch (got '" + got + "', expected '" + expected + "')";
5392 }

+ Here is the caller graph for this function:

TTableRefreshInfo anonymous_namespace{DBHandler.cpp}::get_refresh_info ( const TableDescriptor td)

Definition at line 2463 of file DBHandler.cpp.

References QueryRunner::ALL, foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE, foreign_storage::ForeignTable::APPEND_REFRESH_UPDATE_TYPE, CHECK, shared::convert_temporal_to_iso_format(), TableDescriptor::isForeignTable(), kTIMESTAMP, foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE, foreign_storage::ForeignTable::NULL_REFRESH_TIME, foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY, foreign_storage::ForeignTable::REFRESH_START_DATE_TIME_KEY, foreign_storage::ForeignTable::REFRESH_TIMING_TYPE_KEY, foreign_storage::ForeignTable::REFRESH_UPDATE_TYPE_KEY, foreign_storage::ForeignTable::SCHEDULE_REFRESH_TIMING_TYPE, and UNREACHABLE.

Referenced by DBHandler::get_table_details_impl().

2463  {
2464  CHECK(td->isForeignTable());
2465  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2466  CHECK(foreign_table);
2467  TTableRefreshInfo refresh_info;
2468  const auto& update_type =
2470  CHECK(update_type.has_value());
2471  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2472  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2473  } else if (update_type.value() ==
2475  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2476  } else {
2477  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2478  }
2479 
2480  const auto& timing_type =
2482  CHECK(timing_type.has_value());
2483  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2484  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2485  refresh_info.interval_count = -1;
2486  } else if (timing_type.value() ==
2488  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2489  const auto& start_date_time = foreign_table->getOption(
2491  CHECK(start_date_time.has_value());
2492  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2493  refresh_info.start_date_time =
2494  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2495  const auto& interval =
2496  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2497  CHECK(interval.has_value());
2498  const auto& interval_str = interval.value();
2499  refresh_info.interval_count =
2500  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2501  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2502  if (interval_type == 'H') {
2503  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2504  } else if (interval_type == 'D') {
2505  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2506  } else if (interval_type == 'S') {
2507  // This use case is for development only.
2508  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2509  } else {
2510  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2511  }
2512  } else {
2513  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2514  }
2515  if (foreign_table->last_refresh_time !=
2517  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2518  {kTIMESTAMP}, foreign_table->last_refresh_time);
2519  }
2520  if (foreign_table->next_refresh_time !=
2522  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2523  {kTIMESTAMP}, foreign_table->next_refresh_time);
2524  }
2525  return refresh_info;
2526 }
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:109
#define UNREACHABLE()
Definition: Logger.h:338
bool isForeignTable() const
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:54
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:52
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
#define CHECK(condition)
Definition: Logger.h:291
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:55
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_archive_file ( const std::string &  path)

Definition at line 4276 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4276  {
4277  if (!path_has_valid_filename(path)) {
4278  return false;
4279  }
4280  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4281  return true;
4282  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4283  boost::iends_with(path, ".tar.gz")) {
4284  return true;
4285  }
4286  return false;
4287 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4252

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_a_supported_geo_file ( const std::string &  path)

Definition at line 4260 of file DBHandler.cpp.

References path_has_valid_filename().

Referenced by find_first_geo_file_in_archive().

4260  {
4261  if (!path_has_valid_filename(path)) {
4262  return false;
4263  }
4264  // this is now just for files that we want to recognize
4265  // as geo when inside an archive (see below)
4266  // @TODO(se) make this more flexible?
4267  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4268  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4269  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4270  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4271  return true;
4272  }
4273  return false;
4274 }
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4252

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::is_local_file ( const std::string &  file_path)

Definition at line 4324 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), and validate_import_file_path_if_local().

4324  {
4325  return (!boost::istarts_with(file_path, "s3://") &&
4326  !boost::istarts_with(file_path, "http://") &&
4327  !boost::istarts_with(file_path, "https://"));
4328 }

+ Here is the caller graph for this function:

RecordBatchVector anonymous_namespace{DBHandler.cpp}::loadArrowStream ( const std::string &  stream)

Definition at line 3576 of file DBHandler.cpp.

References ARROW_ASSIGN_OR_THROW, ARROW_THRIFT_THROW_NOT_OK, logger::ERROR, and LOG.

Referenced by DBHandler::load_table_binary_arrow().

3576  {
3577  RecordBatchVector batches;
3578  try {
3579  // TODO(wesm): Make this simpler in general, see ARROW-1600
3580  auto stream_buffer =
3581  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3582  static_cast<int64_t>(stream.size()));
3583 
3584  arrow::io::BufferReader buf_reader(stream_buffer);
3585  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3586  ARROW_ASSIGN_OR_THROW(batch_reader,
3587  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3588 
3589  while (true) {
3590  std::shared_ptr<arrow::RecordBatch> batch;
3591  // Read batch (zero-copy) from the stream
3592  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3593  if (batch == nullptr) {
3594  break;
3595  }
3596  batches.emplace_back(std::move(batch));
3597  }
3598  } catch (const std::exception& e) {
3599  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3600  }
3601  return batches;
3602 }
#define LOG(tag)
Definition: Logger.h:285
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3561
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3563

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::log_cache_size ( const Catalog_Namespace::Catalog cat)

Definition at line 3167 of file DBHandler.cpp.

References BASELINE_HT, BBOX_INTERSECT_AUTO_TUNER_PARAM, BBOX_INTERSECT_HT, CHUNK_METADATA, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, BoundingBoxIntersectJoinHashTable::getBoundingBoxIntersectTuningParamCache(), DataRecycler< CACHED_ITEM_TYPE, META_INFO_TYPE >::getCurrentCacheSizeForDevice(), Executor::getExecutor(), BoundingBoxIntersectJoinHashTable::getHashTableCache(), PerfectJoinHashTable::getHashTableCache(), BaselineJoinHashTable::getHashTableCache(), QueryEngine::getInstance(), Catalog_Namespace::Catalog::getTotalMemorySizeForDictionariesForDatabase(), logger::INFO, LOG, PERFECT_HT, and Executor::UNITARY_EXECUTOR_ID.

Referenced by log_system_cpu_memory_status().

3167  {
3168  std::ostringstream oss;
3169  oss << "Cache size information {";
3171  // 1. Data recycler
3172  // 1.a Resultset Recycler
3173  auto resultset_cache_size =
3174  executor->getResultSetRecyclerHolder()
3175  .getResultSetRecycler()
3176  ->getResultSetRecyclerMetricTracker()
3177  .getCurrentCacheSize(DataRecyclerUtil::CPU_DEVICE_IDENTIFIER);
3178  if (resultset_cache_size) {
3179  oss << "\"query_resultset\": " << *resultset_cache_size << " bytes, ";
3180  }
3181 
3182  // 1.b Join Hash Table Recycler
3183  auto perfect_join_ht_cache_size =
3186  auto baseline_join_ht_cache_size =
3189  auto bbox_intersect_ht_cache_size =
3193  auto bbox_intersect_ht_tuner_cache_size =
3197  auto sum_hash_table_cache_size =
3198  perfect_join_ht_cache_size + baseline_join_ht_cache_size +
3199  bbox_intersect_ht_cache_size + bbox_intersect_ht_tuner_cache_size;
3200  oss << "\"hash_tables\": " << sum_hash_table_cache_size << " bytes, ";
3201 
3202  // 1.c Chunk Metadata Recycler
3203  auto chunk_metadata_cache_size =
3204  executor->getResultSetRecyclerHolder()
3205  .getChunkMetadataRecycler()
3206  ->getCurrentCacheSizeForDevice(CacheItemType::CHUNK_METADATA,
3208  oss << "\"chunk_metadata\": " << chunk_metadata_cache_size << " bytes, ";
3209 
3210  // 2. Query Plan Dag
3211  auto query_plan_dag_cache_size =
3212  executor->getQueryPlanDagCache().getCurrentNodeMapSize();
3213  oss << "\"query_plan_dag\": " << query_plan_dag_cache_size << " bytes, ";
3214 
3215  // 3. Compiled (GPU) Code
3216  oss << "\"compiled_GPU code\": "
3217  << QueryEngine::getInstance()->gpu_code_accessor->getCacheSize() << " bytes, ";
3218 
3219  // 4. String Dictionary
3220  oss << "\"string_dictionary\": " << cat.getTotalMemorySizeForDictionariesForDatabase()
3221  << " bytes";
3222  oss << "}";
3223  LOG(INFO) << oss.str();
3224 }
#define LOG(tag)
Definition: Logger.h:285
size_t getCurrentCacheSizeForDevice(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:590
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:509
static HashtableRecycler * getHashTableCache()
size_t getTotalMemorySizeForDictionariesForDatabase() const
Definition: Catalog.cpp:2370
static HashtableRecycler * getHashTableCache()
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
static BoundingBoxIntersectTuningParamRecycler * getBoundingBoxIntersectTuningParamCache()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::log_system_cpu_memory_status ( std::string const &  query,
const Catalog_Namespace::Catalog cat 
)

Definition at line 3226 of file DBHandler.cpp.

References g_allow_memory_status_log, Catalog_Namespace::Catalog::getDataMgr(), Data_Namespace::DataMgr::getSystemMemoryUsage(), logger::INFO, LOG, and log_cache_size().

Referenced by DBHandler::import_table(), DBHandler::importGeoTableSingle(), DBHandler::load_table(), DBHandler::load_table_binary(), DBHandler::load_table_binary_arrow(), DBHandler::load_table_binary_columnar(), and DBHandler::sql_execute_impl().

3227  {
3229  std::ostringstream oss;
3230  oss << query << "\n" << cat.getDataMgr().getSystemMemoryUsage();
3231  LOG(INFO) << oss.str();
3232  log_cache_size(cat);
3233  }
3234 }
bool g_allow_memory_status_log
Definition: Execute.cpp:123
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
#define LOG(tag)
Definition: Logger.h:285
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:123
void log_cache_size(const Catalog_Namespace::Catalog &cat)
Definition: DBHandler.cpp:3167

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_has_valid_filename ( const std::string &  path)

Definition at line 4252 of file DBHandler.cpp.

References nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename().

Referenced by is_a_supported_archive_file(), and is_a_supported_geo_file().

4252  {
4253  auto filename = boost::filesystem::path(path).filename().string();
4254  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4255  return false;
4256  }
4257  return true;
4258 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::path_is_relative ( const std::string &  path)

Definition at line 4244 of file DBHandler.cpp.

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().

4244  {
4245  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4246  boost::istarts_with(path, "https://")) {
4247  return false;
4248  }
4249  return !boost::filesystem::path(path).is_absolute();
4250 }

+ Here is the caller graph for this function:

ScopeGuard anonymous_namespace{DBHandler.cpp}::pause_and_resume_executor_queue ( )

Definition at line 2873 of file DBHandler.cpp.

References g_enable_executor_resource_mgr, Executor::pause_executor_queue(), and Executor::resume_executor_queue().

Referenced by DBHandler::clear_cpu_memory(), DBHandler::clear_gpu_memory(), and DBHandler::clearRenderMemory().

2873  {
2876  return [] {
2877  // we need to resume erm queue if we throw any exception
2878  // that heavydb server can handle w/o shutting it down
2880  };
2881  }
2882  return [] {};
2883 }
static void resume_executor_queue()
Definition: Execute.cpp:5395
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:174
static void pause_executor_queue()
Definition: Execute.cpp:5386

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::remove_vsi_prefixes ( const std::string &  path_in)

Definition at line 4222 of file DBHandler.cpp.

Referenced by find_first_geo_file_in_archive().

4222  {
4223  std::string path(path_in);
4224 
4225  // these will be first
4226  if (boost::istarts_with(path, "/vsizip/")) {
4227  boost::replace_first(path, "/vsizip/", "");
4228  } else if (boost::istarts_with(path, "/vsitar/")) {
4229  boost::replace_first(path, "/vsitar/", "");
4230  } else if (boost::istarts_with(path, "/vsigzip/")) {
4231  boost::replace_first(path, "/vsigzip/", "");
4232  }
4233 
4234  // then these
4235  if (boost::istarts_with(path, "/vsicurl/")) {
4236  boost::replace_first(path, "/vsicurl/", "");
4237  } else if (boost::istarts_with(path, "/vsis3/")) {
4238  boost::replace_first(path, "/vsis3/", "s3://");
4239  }
4240 
4241  return path;
4242 }

+ Here is the caller graph for this function:

bool anonymous_namespace{DBHandler.cpp}::TTypeInfo_IsGeo ( const TDatumType::type &  t)

Definition at line 5371 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5371  {
5372  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5373  t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5374  t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5375 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{DBHandler.cpp}::TTypeInfo_TypeToString ( const TDatumType::type &  t)

Definition at line 5377 of file DBHandler.cpp.

Referenced by DBHandler::importGeoTableSingle().

5377  {
5378  std::stringstream ss;
5379  ss << t;
5380  return ss.str();
5381 }

+ Here is the caller graph for this function:

void anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local ( const std::string &  file_path)

Definition at line 4330 of file DBHandler.cpp.

References ddl_utils::IMPORT, is_local_file(), and ddl_utils::validate_allowed_file_path().

Referenced by DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), DBHandler::import_table(), and DBHandler::importGeoTableSingle().

4330  {
4331  if (is_local_file(file_path)) {
4333  file_path, ddl_utils::DataTransferType::IMPORT, true);
4334  }
4335 }
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:822
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4324

+ Here is the call graph for this function:

+ Here is the caller graph for this function: