OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
/home/jenkins-slave/workspace/core-os-doxygen/initdb.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <thrift/Thrift.h>
18 #include <array>
19 #include <boost/filesystem.hpp>
20 #include <boost/program_options.hpp>
21 #include <exception>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 
26 #include "Catalog/Catalog.h"
27 #include "Logger/Logger.h"
29 #include "Shared/SysDefinitions.h"
31 
32 #define CALCITEPORT 3279
33 
34 static const std::array<std::string, 3> SampleGeoFileNames{"us-states.json",
35  "us-counties.json",
36  "countries.json"};
37 static const std::array<std::string, 3> SampleGeoTableNames{"heavyai_us_states",
38  "heavyai_us_counties",
39  "heavyai_countries"};
40 
42 
43 static void loadGeo(std::string base_path) {
44  TSessionId session_id{};
45  SystemParameters system_parameters{};
46  AuthMetadata auth_metadata{};
47  std::string udf_filename{};
48  std::string udf_compiler_path{};
49  std::vector<std::string> udf_compiler_options{};
50 #ifdef ENABLE_GEOS
51  std::string libgeos_so_filename{};
52 #endif
53  std::vector<LeafHostInfo> db_leaves{};
54  std::vector<LeafHostInfo> string_leaves{};
55 
56  // Whitelist root path for tests by default
58  ddl_utils::FilePathWhitelist::initialize(base_path, "[\"/\"]", "[\"/\"]");
59 
60  // Based on default values observed from starting up an OmniSci DB server.
61  const bool allow_multifrag{true};
62  const bool jit_debug{false};
63  const bool intel_jit_profile{false};
64  const bool read_only{false};
65  const bool allow_loop_joins{false};
66  const bool enable_rendering{false};
67  const bool renderer_use_ppll_polys{false};
68  const bool renderer_prefer_igpu{false};
69  const unsigned renderer_vulkan_timeout_ms{300000};
70  const bool renderer_use_parallel_executors{false};
71  const bool enable_auto_clear_render_mem{false};
72  const int render_oom_retry_threshold{0};
73  const size_t render_mem_bytes{500000000};
74  const size_t max_concurrent_render_sessions{500};
75  const bool render_compositor_use_last_gpu{false};
76  const size_t reserved_gpu_mem{134217728};
77  const size_t num_reader_threads{0};
78  const bool legacy_syntax{true};
79  const int idle_session_duration{60};
80  const int max_session_duration{43200};
81  system_parameters.runtime_udf_registration_policy =
83  system_parameters.omnisci_server_port = -1;
84  system_parameters.calcite_port = 3280;
85 
86  system_parameters.aggregator = false;
87  g_leaf_count = 0;
88  g_cluster = false;
89 
91  File_Namespace::DiskCacheConfig disk_cache_config{
93  cache_level};
94 
95  auto db_handler = std::make_unique<DBHandler>(db_leaves,
96  string_leaves,
97  base_path,
98  allow_multifrag,
99  jit_debug,
100  intel_jit_profile,
101  read_only,
102  allow_loop_joins,
103  enable_rendering,
104  renderer_use_ppll_polys,
105  renderer_prefer_igpu,
106  renderer_vulkan_timeout_ms,
107  renderer_use_parallel_executors,
108  enable_auto_clear_render_mem,
109  render_oom_retry_threshold,
110  render_mem_bytes,
111  max_concurrent_render_sessions,
112  reserved_gpu_mem,
113  render_compositor_use_last_gpu,
114  num_reader_threads,
115  auth_metadata,
116  system_parameters,
117  legacy_syntax,
118  idle_session_duration,
119  max_session_duration,
120  udf_filename,
121  udf_compiler_path,
122  udf_compiler_options,
123 #ifdef ENABLE_GEOS
124  libgeos_so_filename,
125 #endif
126  disk_cache_config,
127  false);
128  db_handler->internal_connect(session_id, shared::kRootUsername, shared::kDefaultDbName);
129 
130  // Execute on CPU by default
131  db_handler->set_execution_mode(session_id, TExecuteMode::CPU);
132  TQueryResult res;
133 
134  const size_t num_samples = SampleGeoFileNames.size();
135  for (size_t i = 0; i < num_samples; i++) {
136  const std::string table_name = SampleGeoTableNames[i];
137  const std::string file_name = SampleGeoFileNames[i];
138 
139  auto file_path = boost::filesystem::path(heavyai::get_root_abs_path()) /
140  "ThirdParty" / "geo_samples" / file_name;
141 
142  if (!boost::filesystem::exists(file_path)) {
143  throw std::runtime_error(
144  "Unable to populate geo sample data. File does not exist: " +
145  file_path.string());
146  }
147 #ifdef _WIN32
148  std::string sql_string = "COPY " + table_name + " FROM '" +
149  file_path.generic_string() + "' WITH (GEO='true');";
150 #else
151  std::string sql_string =
152  "COPY " + table_name + " FROM '" + file_path.string() + "' WITH (GEO='true');";
153 #endif
154  db_handler->sql_execute(res, session_id, sql_string, true, "", -1, -1);
155  }
156 }
157 
158 int main(int argc, char* argv[]) {
159  std::string base_path;
160  bool force = false;
161  bool skip_geo = false;
162  namespace po = boost::program_options;
163 
164  po::options_description desc("Options");
165  desc.add_options()("help,h", "Print help messages ")(
166  "data",
167  po::value<std::string>(&base_path)->required(),
168  "Directory path to HeavyDB catalogs")("force,f",
169  "Force overwriting of existing HeavyDB "
170  "instance")("skip-geo",
171  "Skip inserting sample geo data");
172 
173  desc.add_options()("enable-thrift-logs",
174  po::value<bool>(&g_enable_thrift_logs)
175  ->default_value(g_enable_thrift_logs)
176  ->implicit_value(true),
177  "Enable writing messages directly from thrift to stdout/stderr.");
178 
179  logger::LogOptions log_options(argv[0]);
180  desc.add(log_options.get_options());
181 
182  po::positional_options_description positionalOptions;
183  positionalOptions.add("data", 1);
184 
185  po::variables_map vm;
186 
187  try {
188  po::store(po::command_line_parser(argc, argv)
189  .options(desc)
190  .positional(positionalOptions)
191  .run(),
192  vm);
193  if (vm.count("help")) {
194  std::cout << desc;
195  return 0;
196  }
197  if (vm.count("force")) {
198  force = true;
199  }
200  if (vm.count("skip-geo")) {
201  skip_geo = true;
202  }
203  po::notify(vm);
204  } catch (boost::program_options::error& e) {
205  std::cerr << "Usage Error: " << e.what() << std::endl;
206  return 1;
207  }
208 
209  if (!g_enable_thrift_logs) {
210  apache::thrift::GlobalOutput.setOutputFunction([](const char* msg) {});
211  }
212 
213  if (!boost::filesystem::exists(base_path)) {
214  std::cerr << "Catalog basepath " + base_path + " does not exist.\n";
215  return 1;
216  }
217  std::string catalogs_path = base_path + "/" + shared::kCatalogDirectoryName;
218  if (boost::filesystem::exists(catalogs_path)) {
219  if (force) {
220  boost::filesystem::remove_all(catalogs_path);
221  } else {
222  std::cerr << "HeavyDB catalogs directory already exists at " + catalogs_path +
223  ". Use -f to force reinitialization.\n";
224  return 1;
225  }
226  }
227  std::string data_path = base_path + "/" + shared::kDataDirectoryName;
228  if (boost::filesystem::exists(data_path)) {
229  if (force) {
230  boost::filesystem::remove_all(data_path);
231  } else {
232  std::cerr << "HeavyDB data directory already exists at " + data_path +
233  ". Use -f to force reinitialization.\n";
234  return 1;
235  }
236  }
237  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
238  if (boost::filesystem::exists(lockfiles_path)) {
239  if (force) {
240  boost::filesystem::remove_all(lockfiles_path);
241  } else {
242  std::cerr << "HeavyDB lockfiles directory already exists at " + lockfiles_path +
243  ". Use -f to force reinitialization.\n";
244  return 1;
245  }
246  }
247  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
248  if (boost::filesystem::exists(lockfiles_path2)) {
249  if (force) {
250  boost::filesystem::remove_all(lockfiles_path2);
251  } else {
252  std::cerr << "HeavyDB lockfiles catalogs directory already exists at " +
253  lockfiles_path2 + ". Use -f to force reinitialization.\n";
254  return 1;
255  }
256  }
257  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
258  if (boost::filesystem::exists(lockfiles_path3)) {
259  if (force) {
260  boost::filesystem::remove_all(lockfiles_path3);
261  } else {
262  std::cerr << "HeavyDB lockfiles data directory already exists at " +
263  lockfiles_path3 + ". Use -f to force reinitialization.\n";
264  return 1;
265  }
266  }
267  std::string export_path = base_path + "/" + shared::kDefaultExportDirName;
268  if (boost::filesystem::exists(export_path)) {
269  if (force) {
270  boost::filesystem::remove_all(export_path);
271  } else {
272  std::cerr << "HeavyDB export directory already exists at " + export_path +
273  ". Use -f to force reinitialization.\n";
274  return 1;
275  }
276  }
277  std::string disk_cache_path = base_path + "/" + shared::kDefaultDiskCacheDirName;
278  if (boost::filesystem::exists(disk_cache_path)) {
279  if (force) {
280  boost::filesystem::remove_all(disk_cache_path);
281  } else {
282  std::cerr << "HeavyDB disk cache already exists at " + disk_cache_path +
283  ". Use -f to force reinitialization.\n";
284  return 1;
285  }
286  }
287 
288  if (!boost::filesystem::create_directory(catalogs_path)) {
289  std::cerr << "Cannot create " + shared::kCatalogDirectoryName + " subdirectory under "
290  << base_path << std::endl;
291  }
292  if (!boost::filesystem::create_directory(lockfiles_path)) {
293  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
294  " subdirectory under "
295  << base_path << std::endl;
296  }
297  if (!boost::filesystem::create_directory(lockfiles_path2)) {
298  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
299  shared::kCatalogDirectoryName + " subdirectory under "
300  << base_path << std::endl;
301  }
302  if (!boost::filesystem::create_directory(lockfiles_path3)) {
303  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
304  shared::kDataDirectoryName + " subdirectory under "
305  << base_path << std::endl;
306  }
307  if (!boost::filesystem::create_directory(export_path)) {
308  std::cerr << "Cannot create " + shared::kDefaultExportDirName + " subdirectory under "
309  << base_path << std::endl;
310  }
311 
312  log_options.set_base_path(base_path);
313  logger::init(log_options);
314 
315  try {
316  SystemParameters sys_parms;
317  auto dummy = std::make_shared<Data_Namespace::DataMgr>(
318  data_path, sys_parms, nullptr, false, 0);
319  auto calcite =
320  std::make_shared<Calcite>(-1, CALCITEPORT, base_path, 1024, 5000, true, "");
321  g_base_path = base_path;
322  auto& sys_cat = Catalog_Namespace::SysCatalog::instance();
323  sys_cat.init(base_path, dummy, {}, calcite, true, false, {});
324 
325  } catch (std::exception& e) {
326  std::cerr << "Exception: " << e.what() << "\n";
327  }
328 
329  if (!skip_geo) {
330  loadGeo(base_path);
331  } else {
333  }
334 
335  return 0;
336 }
const std::string kDataDirectoryName
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:841
std::string get_root_abs_path()
const std::string kDefaultDiskCacheDirName
const std::string kDefaultExportDirName
This file contains the class specification and related data structures for Catalog.
static const std::array< std::string, 3 > SampleGeoFileNames
Definition: initdb.cpp:34
static SysCatalog & instance()
Definition: SysCatalog.h:341
const std::string kDefaultDbName
std::string g_base_path
Definition: SysCatalog.cpp:62
void init(LogOptions const &log_opts)
Definition: Logger.cpp:360
static void loadGeo(std::string base_path)
Definition: initdb.cpp:43
static const std::array< std::string, 3 > SampleGeoTableNames
Definition: initdb.cpp:37
const std::string kRootUsername
#define CALCITEPORT
Definition: initdb.cpp:32
const std::string kCatalogDirectoryName
boost::program_options::options_description const & get_options() const
void set_base_path(std::string const &base_path)
bool g_cluster
const std::string kLockfilesDirectoryName
static std::string getDefaultPath(const std::string &base_path)
static bool run
size_t g_leaf_count
Definition: ParserNode.cpp:76
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:289