OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
/home/jenkins-slave/workspace/core-os-doxygen/initdb.cpp File Reference
#include <thrift/Thrift.h>
#include <array>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include <exception>
#include <iostream>
#include <memory>
#include <string>
#include "Catalog/Catalog.h"
#include "Logger/Logger.h"
#include "OSDependent/omnisci_path.h"
#include "ThriftHandler/DBHandler.h"
+ Include dependency graph for initdb.cpp:

Go to the source code of this file.

Macros

#define CALCITEPORT   3279
 

Functions

static void loadGeo (std::string base_path)
 
int main (int argc, char *argv[])
 

Variables

static const std::array
< std::string, 3 > 
SampleGeoFileNames
 
static const std::array
< std::string, 3 > 
SampleGeoTableNames
 
bool g_enable_thrift_logs {false}
 

Macro Definition Documentation

#define CALCITEPORT   3279

Definition at line 31 of file initdb.cpp.

Referenced by main().

Function Documentation

static void loadGeo ( std::string  base_path)
static

Definition at line 42 of file initdb.cpp.

References ddl_utils::FilePathWhitelist::clear(), File_Namespace::fsi, g_cluster, g_leaf_count, omnisci::get_root_abs_path(), File_Namespace::DiskCacheConfig::getDefaultPath(), i, ddl_utils::FilePathWhitelist::initialize(), OMNISCI_DEFAULT_DB, OMNISCI_ROOT_USER, run_benchmark_import::res, SampleGeoFileNames, and SampleGeoTableNames.

Referenced by main().

42  {
43  TSessionId session_id{};
44  SystemParameters system_parameters{};
45  AuthMetadata auth_metadata{};
46  std::string udf_filename{};
47  std::string udf_compiler_path{};
48  std::vector<std::string> udf_compiler_options{};
49 #ifdef ENABLE_GEOS
50  std::string libgeos_so_filename{};
51 #endif
52  std::vector<LeafHostInfo> db_leaves{};
53  std::vector<LeafHostInfo> string_leaves{};
54 
55  // Whitelist root path for tests by default
57  ddl_utils::FilePathWhitelist::initialize(base_path, "[\"/\"]", "[\"/\"]");
58 
59  // Based on default values observed from starting up an OmniSci DB server.
60  const bool allow_multifrag{true};
61  const bool jit_debug{false};
62  const bool intel_jit_profile{false};
63  const bool read_only{false};
64  const bool allow_loop_joins{false};
65  const bool enable_rendering{false};
66  const bool renderer_use_vulkan_driver{false};
67  const bool renderer_prefer_igpu{false};
68  const unsigned renderer_vulkan_timeout_ms{300000};
69  const bool enable_auto_clear_render_mem{false};
70  const int render_oom_retry_threshold{0};
71  const size_t render_mem_bytes{500000000};
72  const size_t max_concurrent_render_sessions{500};
73  const bool render_compositor_use_last_gpu{false};
74  const size_t reserved_gpu_mem{134217728};
75  const size_t num_reader_threads{0};
76  const bool legacy_syntax{true};
77  const int idle_session_duration{60};
78  const int max_session_duration{43200};
79  const bool enable_runtime_udf_registration{false};
80  system_parameters.omnisci_server_port = -1;
81  system_parameters.calcite_port = 3280;
82 
83  system_parameters.aggregator = false;
84  g_leaf_count = 0;
85  g_cluster = false;
86 
88  File_Namespace::DiskCacheConfig disk_cache_config{
90  cache_level};
91 
92  auto db_handler = std::make_unique<DBHandler>(db_leaves,
93  string_leaves,
94  base_path,
95  allow_multifrag,
96  jit_debug,
97  intel_jit_profile,
98  read_only,
99  allow_loop_joins,
100  enable_rendering,
101  renderer_use_vulkan_driver,
102  renderer_prefer_igpu,
103  renderer_vulkan_timeout_ms,
104  enable_auto_clear_render_mem,
105  render_oom_retry_threshold,
106  render_mem_bytes,
107  max_concurrent_render_sessions,
108  reserved_gpu_mem,
109  render_compositor_use_last_gpu,
110  num_reader_threads,
111  auth_metadata,
112  system_parameters,
113  legacy_syntax,
114  idle_session_duration,
115  max_session_duration,
116  enable_runtime_udf_registration,
117  udf_filename,
118  udf_compiler_path,
119  udf_compiler_options,
120 #ifdef ENABLE_GEOS
121  libgeos_so_filename,
122 #endif
123  disk_cache_config,
124  false);
125  db_handler->internal_connect(session_id, OMNISCI_ROOT_USER, OMNISCI_DEFAULT_DB);
126 
127  // Execute on CPU by default
128  db_handler->set_execution_mode(session_id, TExecuteMode::CPU);
129  TQueryResult res;
130 
131  const size_t num_samples = SampleGeoFileNames.size();
132  for (size_t i = 0; i < num_samples; i++) {
133  const std::string table_name = SampleGeoTableNames[i];
134  const std::string file_name = SampleGeoFileNames[i];
135 
136  const auto file_path = boost::filesystem::path(
137  omnisci::get_root_abs_path() + "/ThirdParty/geo_samples/" + file_name);
138  if (!boost::filesystem::exists(file_path)) {
139  throw std::runtime_error(
140  "Unable to populate geo sample data. File does not exist: " +
141  file_path.string());
142  }
143  std::string sql_string =
144  "COPY " + table_name + " FROM '" + file_path.string() + "' WITH (GEO='true');";
145  db_handler->sql_execute(res, session_id, sql_string, true, "", -1, -1);
146  }
147 }
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:826
static const std::array< std::string, 3 > SampleGeoFileNames
Definition: initdb.cpp:33
const std::string OMNISCI_DEFAULT_DB
Definition: SysCatalog.h:59
static const std::array< std::string, 3 > SampleGeoTableNames
Definition: initdb.cpp:36
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:60
std::string get_root_abs_path()
bool g_cluster
static std::string getDefaultPath(const std::string &base_path)
size_t g_leaf_count
Definition: ParserNode.cpp:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int main ( int  argc,
char *  argv[] 
)

Definition at line 149 of file initdb.cpp.

References CALCITEPORT, test_exceptions::data_path, Catalog_Namespace::SysCatalog::destroy(), g_base_path, g_enable_thrift_logs, logger::LogOptions::get_options(), logger::init(), Catalog_Namespace::SysCatalog::instance(), loadGeo(), run, and logger::LogOptions::set_base_path().

149  {
150  std::string base_path;
151  bool force = false;
152  bool skip_geo = false;
153  namespace po = boost::program_options;
154 
155  po::options_description desc("Options");
156  desc.add_options()("help,h", "Print help messages ")(
157  "data",
158  po::value<std::string>(&base_path)->required(),
159  "Directory path to OmniSci catalogs")("force,f",
160  "Force overwriting of existing OmniSci "
161  "instance")("skip-geo",
162  "Skip inserting sample geo data");
163 
164  desc.add_options()("enable-thrift-logs",
165  po::value<bool>(&g_enable_thrift_logs)
166  ->default_value(g_enable_thrift_logs)
167  ->implicit_value(true),
168  "Enable writing messages directly from thrift to stdout/stderr.");
169 
170  logger::LogOptions log_options(argv[0]);
171  desc.add(log_options.get_options());
172 
173  po::positional_options_description positionalOptions;
174  positionalOptions.add("data", 1);
175 
176  po::variables_map vm;
177 
178  try {
179  po::store(po::command_line_parser(argc, argv)
180  .options(desc)
181  .positional(positionalOptions)
182  .run(),
183  vm);
184  if (vm.count("help")) {
185  std::cout << desc;
186  return 0;
187  }
188  if (vm.count("force")) {
189  force = true;
190  }
191  if (vm.count("skip-geo")) {
192  skip_geo = true;
193  }
194  po::notify(vm);
195  } catch (boost::program_options::error& e) {
196  std::cerr << "Usage Error: " << e.what() << std::endl;
197  return 1;
198  }
199 
200  if (!g_enable_thrift_logs) {
201  apache::thrift::GlobalOutput.setOutputFunction([](const char* msg) {});
202  }
203 
204  if (!boost::filesystem::exists(base_path)) {
205  std::cerr << "Catalog basepath " + base_path + " does not exist.\n";
206  return 1;
207  }
208  std::string catalogs_path = base_path + "/mapd_catalogs";
209  if (boost::filesystem::exists(catalogs_path)) {
210  if (force) {
211  boost::filesystem::remove_all(catalogs_path);
212  } else {
213  std::cerr << "OmniSci catalogs already initialized at " + base_path +
214  ". Use -f to force reinitialization.\n";
215  return 1;
216  }
217  }
218  std::string data_path = base_path + "/mapd_data";
219  if (boost::filesystem::exists(data_path)) {
220  if (force) {
221  boost::filesystem::remove_all(data_path);
222  } else {
223  std::cerr << "OmniSci data directory already exists at " + base_path +
224  ". Use -f to force reinitialization.\n";
225  return 1;
226  }
227  }
228  std::string export_path = base_path + "/mapd_export";
229  if (boost::filesystem::exists(export_path)) {
230  if (force) {
231  boost::filesystem::remove_all(export_path);
232  } else {
233  std::cerr << "OmniSci export directory already exists at " + base_path +
234  ". Use -f to force reinitialization.\n";
235  return 1;
236  }
237  }
238  std::string disk_cache_path = base_path + "/omnisci_disk_cache";
239  if (boost::filesystem::exists(disk_cache_path)) {
240  if (force) {
241  boost::filesystem::remove_all(disk_cache_path);
242  } else {
243  std::cerr << "OmniSci disk cache already exists at " + disk_cache_path +
244  ". Use -f to force reinitialization.\n";
245  return 1;
246  }
247  }
248  if (!boost::filesystem::create_directory(catalogs_path)) {
249  std::cerr << "Cannot create mapd_catalogs subdirectory under " << base_path
250  << std::endl;
251  }
252  if (!boost::filesystem::create_directory(export_path)) {
253  std::cerr << "Cannot create mapd_export subdirectory under " << base_path
254  << std::endl;
255  }
256 
257  log_options.set_base_path(base_path);
258  logger::init(log_options);
259 
260  try {
261  SystemParameters sys_parms;
262  auto dummy = std::make_shared<Data_Namespace::DataMgr>(
263  data_path, sys_parms, nullptr, false, 0);
264  auto calcite =
265  std::make_shared<Calcite>(-1, CALCITEPORT, base_path, 1024, 5000, true, "");
266  g_base_path = base_path;
267  auto& sys_cat = Catalog_Namespace::SysCatalog::instance();
268  sys_cat.init(base_path, dummy, {}, calcite, true, false, {});
269 
270  if (!skip_geo) {
271  loadGeo(base_path);
272  }
273 
274  } catch (std::exception& e) {
275  std::cerr << "Exception: " << e.what() << "\n";
276  }
277 
279  return 0;
280 }
static SysCatalog & instance()
Definition: SysCatalog.h:326
std::string g_base_path
Definition: SysCatalog.cpp:63
void init(LogOptions const &log_opts)
Definition: Logger.cpp:305
static void loadGeo(std::string base_path)
Definition: initdb.cpp:42
#define CALCITEPORT
Definition: initdb.cpp:31
static bool run
bool g_enable_thrift_logs
Definition: initdb.cpp:40

+ Here is the call graph for this function:

Variable Documentation

bool g_enable_thrift_logs {false}

Definition at line 40 of file initdb.cpp.

const std::array<std::string, 3> SampleGeoFileNames
static
Initial value:
{"us-states.json",
"us-counties.json",
"countries.json"}

Definition at line 33 of file initdb.cpp.

Referenced by loadGeo().

const std::array<std::string, 3> SampleGeoTableNames
static
Initial value:
{"omnisci_states",
"omnisci_counties",
"omnisci_countries"}

Definition at line 36 of file initdb.cpp.

Referenced by loadGeo().