OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
/home/jenkins-slave/workspace/core-os-doxygen/initdb.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <thrift/Thrift.h>
18 #include <array>
19 #include <boost/filesystem.hpp>
20 #include <boost/program_options.hpp>
21 #include <exception>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 
26 #include "Catalog/Catalog.h"
27 #include "Logger/Logger.h"
29 #include "Shared/SysDefinitions.h"
31 
32 #define CALCITEPORT 3279
33 
34 static const std::array<std::string, 3> SampleGeoFileNames{"us-states.json",
35  "us-counties.json",
36  "countries.json"};
37 static const std::array<std::string, 3> SampleGeoTableNames{"omnisci_states",
38  "omnisci_counties",
39  "omnisci_countries"};
40 
42 
43 static void loadGeo(std::string base_path) {
44  TSessionId session_id{};
45  SystemParameters system_parameters{};
46  AuthMetadata auth_metadata{};
47  std::string udf_filename{};
48  std::string udf_compiler_path{};
49  std::vector<std::string> udf_compiler_options{};
50 #ifdef ENABLE_GEOS
51  std::string libgeos_so_filename{};
52 #endif
53  std::vector<LeafHostInfo> db_leaves{};
54  std::vector<LeafHostInfo> string_leaves{};
55 
56  // Whitelist root path for tests by default
58  ddl_utils::FilePathWhitelist::initialize(base_path, "[\"/\"]", "[\"/\"]");
59 
60  // Based on default values observed from starting up an OmniSci DB server.
61  const bool allow_multifrag{true};
62  const bool jit_debug{false};
63  const bool intel_jit_profile{false};
64  const bool read_only{false};
65  const bool allow_loop_joins{false};
66  const bool enable_rendering{false};
67  const bool renderer_use_ppll_polys{false};
68  const bool renderer_prefer_igpu{false};
69  const unsigned renderer_vulkan_timeout_ms{300000};
70  const bool enable_auto_clear_render_mem{false};
71  const int render_oom_retry_threshold{0};
72  const size_t render_mem_bytes{500000000};
73  const size_t max_concurrent_render_sessions{500};
74  const bool render_compositor_use_last_gpu{false};
75  const size_t reserved_gpu_mem{134217728};
76  const size_t num_reader_threads{0};
77  const bool legacy_syntax{true};
78  const int idle_session_duration{60};
79  const int max_session_duration{43200};
80  system_parameters.runtime_udf_registration_policy =
82  system_parameters.omnisci_server_port = -1;
83  system_parameters.calcite_port = 3280;
84 
85  system_parameters.aggregator = false;
86  g_leaf_count = 0;
87  g_cluster = false;
88 
90  File_Namespace::DiskCacheConfig disk_cache_config{
92  cache_level};
93 
94  auto db_handler = std::make_unique<DBHandler>(db_leaves,
95  string_leaves,
96  base_path,
97  allow_multifrag,
98  jit_debug,
99  intel_jit_profile,
100  read_only,
101  allow_loop_joins,
102  enable_rendering,
103  renderer_use_ppll_polys,
104  renderer_prefer_igpu,
105  renderer_vulkan_timeout_ms,
106  enable_auto_clear_render_mem,
107  render_oom_retry_threshold,
108  render_mem_bytes,
109  max_concurrent_render_sessions,
110  reserved_gpu_mem,
111  render_compositor_use_last_gpu,
112  num_reader_threads,
113  auth_metadata,
114  system_parameters,
115  legacy_syntax,
116  idle_session_duration,
117  max_session_duration,
118  udf_filename,
119  udf_compiler_path,
120  udf_compiler_options,
121 #ifdef ENABLE_GEOS
122  libgeos_so_filename,
123 #endif
124  disk_cache_config,
125  false);
126  db_handler->internal_connect(session_id, shared::kRootUsername, shared::kDefaultDbName);
127 
128  // Execute on CPU by default
129  db_handler->set_execution_mode(session_id, TExecuteMode::CPU);
130  TQueryResult res;
131 
132  const size_t num_samples = SampleGeoFileNames.size();
133  for (size_t i = 0; i < num_samples; i++) {
134  const std::string table_name = SampleGeoTableNames[i];
135  const std::string file_name = SampleGeoFileNames[i];
136 
137  auto file_path = boost::filesystem::path(heavyai::get_root_abs_path()) /
138  "ThirdParty" / "geo_samples" / file_name;
139 
140  if (!boost::filesystem::exists(file_path)) {
141  throw std::runtime_error(
142  "Unable to populate geo sample data. File does not exist: " +
143  file_path.string());
144  }
145 #ifdef _WIN32
146  std::string sql_string = "COPY " + table_name + " FROM '" +
147  file_path.generic_string() + "' WITH (GEO='true');";
148 #else
149  std::string sql_string =
150  "COPY " + table_name + " FROM '" + file_path.string() + "' WITH (GEO='true');";
151 #endif
152  db_handler->sql_execute(res, session_id, sql_string, true, "", -1, -1);
153  }
154 }
155 
156 int main(int argc, char* argv[]) {
157  std::string base_path;
158  bool force = false;
159  bool skip_geo = false;
160  namespace po = boost::program_options;
161 
162  po::options_description desc("Options");
163  desc.add_options()("help,h", "Print help messages ")(
164  "data",
165  po::value<std::string>(&base_path)->required(),
166  "Directory path to HeavyDB catalogs")("force,f",
167  "Force overwriting of existing HeavyDB "
168  "instance")("skip-geo",
169  "Skip inserting sample geo data");
170 
171  desc.add_options()("enable-thrift-logs",
172  po::value<bool>(&g_enable_thrift_logs)
173  ->default_value(g_enable_thrift_logs)
174  ->implicit_value(true),
175  "Enable writing messages directly from thrift to stdout/stderr.");
176 
177  logger::LogOptions log_options(argv[0]);
178  desc.add(log_options.get_options());
179 
180  po::positional_options_description positionalOptions;
181  positionalOptions.add("data", 1);
182 
183  po::variables_map vm;
184 
185  try {
186  po::store(po::command_line_parser(argc, argv)
187  .options(desc)
188  .positional(positionalOptions)
189  .run(),
190  vm);
191  if (vm.count("help")) {
192  std::cout << desc;
193  return 0;
194  }
195  if (vm.count("force")) {
196  force = true;
197  }
198  if (vm.count("skip-geo")) {
199  skip_geo = true;
200  }
201  po::notify(vm);
202  } catch (boost::program_options::error& e) {
203  std::cerr << "Usage Error: " << e.what() << std::endl;
204  return 1;
205  }
206 
207  if (!g_enable_thrift_logs) {
208  apache::thrift::GlobalOutput.setOutputFunction([](const char* msg) {});
209  }
210 
211  if (!boost::filesystem::exists(base_path)) {
212  std::cerr << "Catalog basepath " + base_path + " does not exist.\n";
213  return 1;
214  }
215  std::string catalogs_path = base_path + "/" + shared::kCatalogDirectoryName;
216  if (boost::filesystem::exists(catalogs_path)) {
217  if (force) {
218  boost::filesystem::remove_all(catalogs_path);
219  } else {
220  std::cerr << "HeavyDB catalogs directory already exists at " + catalogs_path +
221  ". Use -f to force reinitialization.\n";
222  return 1;
223  }
224  }
225  std::string data_path = base_path + "/" + shared::kDataDirectoryName;
226  if (boost::filesystem::exists(data_path)) {
227  if (force) {
228  boost::filesystem::remove_all(data_path);
229  } else {
230  std::cerr << "HeavyDB data directory already exists at " + data_path +
231  ". Use -f to force reinitialization.\n";
232  return 1;
233  }
234  }
235  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
236  if (boost::filesystem::exists(lockfiles_path)) {
237  if (force) {
238  boost::filesystem::remove_all(lockfiles_path);
239  } else {
240  std::cerr << "HeavyDB lockfiles directory already exists at " + lockfiles_path +
241  ". Use -f to force reinitialization.\n";
242  return 1;
243  }
244  }
245  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
246  if (boost::filesystem::exists(lockfiles_path2)) {
247  if (force) {
248  boost::filesystem::remove_all(lockfiles_path2);
249  } else {
250  std::cerr << "HeavyDB lockfiles catalogs directory already exists at " +
251  lockfiles_path2 + ". Use -f to force reinitialization.\n";
252  return 1;
253  }
254  }
255  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
256  if (boost::filesystem::exists(lockfiles_path3)) {
257  if (force) {
258  boost::filesystem::remove_all(lockfiles_path3);
259  } else {
260  std::cerr << "HeavyDB lockfiles data directory already exists at " +
261  lockfiles_path3 + ". Use -f to force reinitialization.\n";
262  return 1;
263  }
264  }
265  std::string export_path = base_path + "/" + shared::kDefaultExportDirName;
266  if (boost::filesystem::exists(export_path)) {
267  if (force) {
268  boost::filesystem::remove_all(export_path);
269  } else {
270  std::cerr << "HeavyDB export directory already exists at " + export_path +
271  ". Use -f to force reinitialization.\n";
272  return 1;
273  }
274  }
275  std::string disk_cache_path = base_path + "/" + shared::kDefaultDiskCacheDirName;
276  if (boost::filesystem::exists(disk_cache_path)) {
277  if (force) {
278  boost::filesystem::remove_all(disk_cache_path);
279  } else {
280  std::cerr << "HeavyDB disk cache already exists at " + disk_cache_path +
281  ". Use -f to force reinitialization.\n";
282  return 1;
283  }
284  }
285 
286  if (!boost::filesystem::create_directory(catalogs_path)) {
287  std::cerr << "Cannot create " + shared::kCatalogDirectoryName + " subdirectory under "
288  << base_path << std::endl;
289  }
290  if (!boost::filesystem::create_directory(lockfiles_path)) {
291  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
292  " subdirectory under "
293  << base_path << std::endl;
294  }
295  if (!boost::filesystem::create_directory(lockfiles_path2)) {
296  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
297  shared::kCatalogDirectoryName + " subdirectory under "
298  << base_path << std::endl;
299  }
300  if (!boost::filesystem::create_directory(lockfiles_path3)) {
301  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
302  shared::kDataDirectoryName + " subdirectory under "
303  << base_path << std::endl;
304  }
305  if (!boost::filesystem::create_directory(export_path)) {
306  std::cerr << "Cannot create " + shared::kDefaultExportDirName + " subdirectory under "
307  << base_path << std::endl;
308  }
309 
310  log_options.set_base_path(base_path);
311  logger::init(log_options);
312 
313  try {
314  SystemParameters sys_parms;
315  auto dummy = std::make_shared<Data_Namespace::DataMgr>(
316  data_path, sys_parms, nullptr, false, 0);
317  auto calcite =
318  std::make_shared<Calcite>(-1, CALCITEPORT, base_path, 1024, 5000, true, "");
319  g_base_path = base_path;
320  auto& sys_cat = Catalog_Namespace::SysCatalog::instance();
321  sys_cat.init(base_path, dummy, {}, calcite, true, false, {});
322 
323  } catch (std::exception& e) {
324  std::cerr << "Exception: " << e.what() << "\n";
325  }
326 
327  if (!skip_geo) {
328  loadGeo(base_path);
329  } else {
331  }
332 
333  return 0;
334 }
const std::string kDataDirectoryName
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:841
std::string get_root_abs_path()
const std::string kDefaultDiskCacheDirName
const std::string kDefaultExportDirName
This file contains the class specification and related data structures for Catalog.
static const std::array< std::string, 3 > SampleGeoFileNames
Definition: initdb.cpp:34
static SysCatalog & instance()
Definition: SysCatalog.h:341
const std::string kDefaultDbName
std::string g_base_path
Definition: SysCatalog.cpp:62
void init(LogOptions const &log_opts)
Definition: Logger.cpp:308
static void loadGeo(std::string base_path)
Definition: initdb.cpp:43
static const std::array< std::string, 3 > SampleGeoTableNames
Definition: initdb.cpp:37
const std::string kRootUsername
#define CALCITEPORT
Definition: initdb.cpp:32
const std::string kCatalogDirectoryName
boost::program_options::options_description const & get_options() const
void set_base_path(std::string const &base_path)
bool g_cluster
const std::string kLockfilesDirectoryName
static std::string getDefaultPath(const std::string &base_path)
static bool run
size_t g_leaf_count
Definition: ParserNode.cpp:76
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:289