OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ForeignTable.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ForeignTable.h"
18 #include <boost/algorithm/string/predicate.hpp>
19 #include <regex>
23 #include "Shared/DateTimeParser.h"
24 
26 bool g_enable_s3_fsi{false};
27 
28 namespace foreign_storage {
34 
35 std::vector<std::string_view> ForeignTable::getSupportedDataWrapperOptions() const {
38  } else if (foreign_server->data_wrapper_type ==
41  }
42  return {};
43 }
44 
49 }
50 
51 bool ForeignTable::isAppendMode() const {
52  auto update_mode = options.find(REFRESH_UPDATE_TYPE_KEY);
53  return (update_mode != options.end() &&
54  update_mode->second == APPEND_REFRESH_UPDATE_TYPE);
55 }
56 
57 std::string ForeignTable::getFullFilePath() const {
58  auto file_path = getOption(FILE_PATH_KEY);
59  std::optional<std::string> base_path{};
61  CHECK(storage_type);
62 
63  if (*storage_type == ForeignServer::LOCAL_FILE_STORAGE_TYPE) {
65  }
66 
67  // If both base_path and file_path are present, then concatenate. Otherwise we are just
68  // taking the one as the path. One of the two must exist, or we have failed validation.
69  CHECK(file_path || base_path);
70  const std::string separator{boost::filesystem::path::preferred_separator};
71  return std::regex_replace(
72  (base_path ? *base_path + separator : "") + (file_path ? *file_path : ""),
73  std::regex{separator + "{2,}"},
74  separator);
75 }
76 
77 // A valid path is a concatenation of the file_path and the base_path (for local storage).
78 // One of the two must be present.
80  auto file_path = getOption(FILE_PATH_KEY);
82  CHECK(storage_type) << "No storage type found in parent server. Server \""
83  << foreign_server->name << "\" is not valid.";
84 
85  if (!file_path) {
86  if (*storage_type == ForeignServer::LOCAL_FILE_STORAGE_TYPE) {
89  }
90  } else {
91  UNREACHABLE() << "Unknown foreign storage type.";
92  }
93  }
94 }
95 
97  OptionsMap options_map;
98  populateOptionsMap(std::move(options_map));
100 }
101 
102 void ForeignTable::initializeOptions(const rapidjson::Value& options) {
103  // Create the options map first because the json version is not guaranteed to be
104  // upper-case, which we need to compare reliably with alterable_options.
105  auto options_map = create_options_map(options);
106  validateSupportedOptionKeys(options_map);
107  populateOptionsMap(std::move(options_map));
109 }
110 
111 // This function can't be static because it needs to know the data wrapper type.
112 void ForeignTable::validateSupportedOptionKeys(const OptionsMap& options_map) const {
113  const auto data_wrapper_options = getSupportedDataWrapperOptions();
114  for (const auto& [key, value] : options_map) {
115  if (!contains(supported_options, key) && !contains(data_wrapper_options, key)) {
116  throw std::runtime_error{"Invalid foreign table option \"" + key + "\"."};
117  }
118  }
119 }
120 
122  auto update_type_entry =
124  CHECK(update_type_entry != options.end());
125  auto update_type_value = update_type_entry->second;
126  if (update_type_value != ALL_REFRESH_UPDATE_TYPE &&
127  update_type_value != APPEND_REFRESH_UPDATE_TYPE) {
128  std::string error_message =
129  "Invalid value \"" + update_type_value + "\" for " + REFRESH_UPDATE_TYPE_KEY +
130  " option." + " Value must be \"" + std::string{APPEND_REFRESH_UPDATE_TYPE} +
131  "\" or \"" + std::string{ALL_REFRESH_UPDATE_TYPE} + "\".";
132  throw std::runtime_error{error_message};
133  }
134 
135  auto refresh_timing_entry =
137  CHECK(refresh_timing_entry != options.end());
138  if (auto refresh_timing_value = refresh_timing_entry->second;
139  refresh_timing_value == SCHEDULE_REFRESH_TIMING_TYPE) {
140  auto start_date_entry = options.find(REFRESH_START_DATE_TIME_KEY);
141  if (start_date_entry == options.end()) {
142  throw std::runtime_error{std::string{REFRESH_START_DATE_TIME_KEY} +
143  " option must be provided for scheduled refreshes."};
144  }
145  auto start_date_time = dateTimeParse<kTIMESTAMP>(start_date_entry->second, 0);
146  int64_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
147  std::chrono::system_clock::now().time_since_epoch())
148  .count();
149  if (start_date_time < current_time) {
150  throw std::runtime_error{std::string{REFRESH_START_DATE_TIME_KEY} +
151  " cannot be a past date time."};
152  }
153 
154  auto interval_entry = options.find(REFRESH_INTERVAL_KEY);
155  if (interval_entry != options.end()) {
156  std::string interval_types{"HD"};
158  interval_types += "S";
159  }
160  boost::regex interval_regex{"^\\d{1,}[" + interval_types + "]$",
161  boost::regex::extended | boost::regex::icase};
162  if (!boost::regex_match(interval_entry->second, interval_regex)) {
163  throw std::runtime_error{"Invalid value provided for the " +
164  std::string{REFRESH_INTERVAL_KEY} + " option."};
165  }
166  }
167  } else if (refresh_timing_value != MANUAL_REFRESH_TIMING_TYPE) {
168  throw std::runtime_error{"Invalid value provided for the " +
169  std::string{REFRESH_TIMING_TYPE_KEY} +
170  " option. Value must be \"" + MANUAL_REFRESH_TIMING_TYPE +
171  "\" or \"" + SCHEDULE_REFRESH_TIMING_TYPE + "\"."};
172  }
173 }
174 
178  throw std::runtime_error(
179  "Cannot create S3 backed foreign table as AWS S3 support is currently disabled.");
180  }
181  const auto wrapper_type = foreign_server->data_wrapper_type;
182  if (wrapper_type == foreign_storage::DataWrapperType::CSV) {
184  UNREACHABLE();
185  } else {
187  }
188  } else if (wrapper_type == foreign_storage::DataWrapperType::PARQUET) {
190  } else {
191  UNREACHABLE() << "Unknown data wrapper type";
192  }
193 }
194 
195 OptionsMap ForeignTable::create_options_map(const rapidjson::Value& json_options) {
196  OptionsMap options_map;
197  CHECK(json_options.IsObject());
198  for (const auto& member : json_options.GetObject()) {
199  auto key = to_upper(member.name.GetString());
200  if (std::find(upper_case_options.begin(), upper_case_options.end(), key) !=
201  upper_case_options.end()) {
202  options_map[key] = to_upper(member.value.GetString());
203  } else {
204  options_map[key] = member.value.GetString();
205  }
206  }
207  return options_map;
208 }
209 
210 void ForeignTable::validate_alter_options(const OptionsMap& options_map) {
211  for (const auto& [key, value] : options_map) {
212  if (!contains(alterable_options, key)) {
213  throw std::runtime_error{std::string("Altering foreign table option \"") + key +
214  "\" is not currently supported."};
215  }
216  }
217 }
218 
219 void ForeignTable::throwFilePathError(const std::string_view& missing_path) const {
220  std::stringstream ss;
221  ss << "No file_path found for Foreign Table \"" << tableName
222  << "\". Table must have either set a \"" << FILE_PATH_KEY << "\" option, or its "
223  << "parent server must have set a \"" << missing_path << "\" option.";
224  throw std::runtime_error(ss.str());
225 }
226 
227 } // namespace foreign_storage
void validateDataWrapperOptions() const
std::string tableName
static const std::set< const char * > supported_options
Definition: ForeignTable.h:65
static constexpr std::string_view S3_STORAGE_TYPE
Definition: ForeignServer.h:49
static std::vector< std::string_view > getSupportedOptions()
static std::vector< std::string_view > getSupportedOptions()
static void validateOptions(const ForeignTable *foreign_table)
void initializeOptions()
Creates an empty option map for the table. Verifies that the required option keys are present and tha...
#define UNREACHABLE()
Definition: Logger.h:241
void validateOptionValues() const
Verifies the values for mapped options are valid.
void validateSupportedOptionKeys(const OptionsMap &options_map) const
Verifies that the options_map contains the keys required by a foreign table; including those specifie...
bool contains(const T &set, const std::string_view element)
Definition: ForeignTable.h:26
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:60
void throwFilePathError(const std::string_view &missing_path) const
static void validate_alter_options(const OptionsMap &options_map)
Verifies that the given options map only contains options that can be legally altered.
static constexpr std::string_view STORAGE_TYPE_KEY
Definition: ForeignServer.h:45
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:53
bool g_enable_s3_fsi
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:55
void populateOptionsMap(OptionsMap &&options_map, bool clear=false)
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:54
void validateRefreshOptionValues() const
std::optional< std::string > getOption(const std::string_view &key) const
bool isAppendMode() const
Checks if the table is in append mode.
std::vector< std::string_view > getSupportedDataWrapperOptions() const
Returns the list of required data wrapper options based on the type of data wrapper.
std::string to_upper(const std::string &str)
static const std::set< const char * > alterable_options
Definition: ForeignTable.h:79
bool g_enable_seconds_refresh
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:57
static constexpr std::string_view LOCAL_FILE_STORAGE_TYPE
Definition: ForeignServer.h:47
static OptionsMap create_options_map(const rapidjson::Value &json_options)
Creates an options map from given options. Converts options that must be upper case appropriately...
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:58
static constexpr const char * FILE_PATH_KEY
Definition: ForeignTable.h:50
std::string getFullFilePath() const
Returns the path to the source file/dir of the table. Depending on options this may result from a con...
const ForeignServer * foreign_server
Definition: ForeignTable.h:63
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:52
static constexpr std::string_view BASE_PATH_KEY
Definition: ForeignServer.h:46
#define CHECK(condition)
Definition: Logger.h:197
static constexpr char const * CSV
Definition: ForeignServer.h:37
std::map< std::string, std::string, std::less<>> OptionsMap
static void validateOptions(const ForeignTable *foreign_table)
bool validate_and_get_is_s3_select(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:59
static constexpr char const * PARQUET
Definition: ForeignServer.h:38
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:59
void validateFilePathOptionKey() const
static const std::set< const char * > upper_case_options
Definition: ForeignTable.h:72