OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
/home/jenkins-slave/workspace/core-os-doxygen/initdb.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <thrift/Thrift.h>
18 #include <array>
19 #include <boost/filesystem.hpp>
20 #include <boost/program_options.hpp>
21 #include <exception>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 
26 #include "Catalog/Catalog.h"
27 #include "ImportExport/Importer.h"
29 #include "Shared/Logger.h"
30 #include "Shared/mapdpath.h"
31 
32 #define CALCITEPORT 3279
33 
34 static const std::array<std::string, 3> SampleGeoFileNames{"us-states.json",
35  "us-counties.json",
36  "countries.json"};
37 static const std::array<std::string, 3> SampleGeoTableNames{"omnisci_states",
38  "omnisci_counties",
39  "omnisci_countries"};
40 
42 
43 int main(int argc, char* argv[]) {
44  std::string base_path;
45  bool force = false;
46  bool skip_geo = false;
47  namespace po = boost::program_options;
48 
49  po::options_description desc("Options");
50  desc.add_options()("help,h", "Print help messages ")(
51  "data",
52  po::value<std::string>(&base_path)->required(),
53  "Directory path to OmniSci catalogs")(
54  "force,f", "Force overwriting of existing OmniSci instance")(
55  "skip-geo", "Skip inserting sample geo data");
56 
57  desc.add_options()("enable-thrift-logs",
58  po::value<bool>(&g_enable_thrift_logs)
59  ->default_value(g_enable_thrift_logs)
60  ->implicit_value(true),
61  "Enable writing messages directly from thrift to stdout/stderr.");
62 
63  logger::LogOptions log_options(argv[0]);
64  desc.add(log_options.get_options());
65 
66  po::positional_options_description positionalOptions;
67  positionalOptions.add("data", 1);
68 
69  po::variables_map vm;
70 
71  try {
72  po::store(po::command_line_parser(argc, argv)
73  .options(desc)
74  .positional(positionalOptions)
75  .run(),
76  vm);
77  if (vm.count("help")) {
78  std::cout << desc;
79  return 0;
80  }
81  if (vm.count("force")) {
82  force = true;
83  }
84  if (vm.count("skip-geo")) {
85  skip_geo = true;
86  }
87  po::notify(vm);
88  } catch (boost::program_options::error& e) {
89  std::cerr << "Usage Error: " << e.what() << std::endl;
90  return 1;
91  }
92 
93  if (!g_enable_thrift_logs) {
94  apache::thrift::GlobalOutput.setOutputFunction([](const char* msg) {});
95  }
96 
97  if (!boost::filesystem::exists(base_path)) {
98  std::cerr << "Catalog basepath " + base_path + " does not exist.\n";
99  return 1;
100  }
101  std::string catalogs_path = base_path + "/mapd_catalogs";
102  if (boost::filesystem::exists(catalogs_path)) {
103  if (force) {
104  boost::filesystem::remove_all(catalogs_path);
105  } else {
106  std::cerr << "OmniSci catalogs already initialized at " + base_path +
107  ". Use -f to force reinitialization.\n";
108  return 1;
109  }
110  }
111  std::string data_path = base_path + "/mapd_data";
112  if (boost::filesystem::exists(data_path)) {
113  if (force) {
114  boost::filesystem::remove_all(data_path);
115  } else {
116  std::cerr << "OmniSci data directory already exists at " + base_path +
117  ". Use -f to force reinitialization.\n";
118  return 1;
119  }
120  }
121  std::string export_path = base_path + "/mapd_export";
122  if (boost::filesystem::exists(export_path)) {
123  if (force) {
124  boost::filesystem::remove_all(export_path);
125  } else {
126  std::cerr << "OmniSci export directory already exists at " + base_path +
127  ". Use -f to force reinitialization.\n";
128  return 1;
129  }
130  }
131  if (!boost::filesystem::create_directory(catalogs_path)) {
132  std::cerr << "Cannot create mapd_catalogs subdirectory under " << base_path
133  << std::endl;
134  }
135  if (!boost::filesystem::create_directory(export_path)) {
136  std::cerr << "Cannot create mapd_export subdirectory under " << base_path
137  << std::endl;
138  }
139 
140  log_options.set_base_path(base_path);
141  logger::init(log_options);
142 
143  try {
144  SystemParameters sys_parms;
145  auto dummy =
146  std::make_shared<Data_Namespace::DataMgr>(data_path, sys_parms, false, 0);
147  auto calcite =
148  std::make_shared<Calcite>(-1, CALCITEPORT, base_path, 1024, 5000, true, "");
149  auto& sys_cat = Catalog_Namespace::SysCatalog::instance();
150  sys_cat.init(base_path, dummy, {}, calcite, true, false, {});
151 
152  if (!skip_geo) {
153  // Add geo samples to the system database using the root user
155  const std::string db_name(OMNISCI_DEFAULT_DB);
156  CHECK(sys_cat.getMetadataForDB(db_name, cur_db));
158  base_path, cur_db, dummy, std::vector<LeafHostInfo>(), calcite, false);
160  CHECK(sys_cat.getMetadataForUser(OMNISCI_ROOT_USER, user));
161 
162  QueryRunner::ImportDriver import_driver(cat, user);
163 
164  const size_t num_samples = SampleGeoFileNames.size();
165  for (size_t i = 0; i < num_samples; i++) {
166  const std::string table_name = SampleGeoTableNames[i];
167  const std::string file_name = SampleGeoFileNames[i];
168 
169  const auto file_path = boost::filesystem::path(
170  mapd_root_abs_path() + "/ThirdParty/geo_samples/" + file_name);
171  if (!boost::filesystem::exists(file_path)) {
172  throw std::runtime_error(
173  "Unable to populate geo sample data. File does not exist: " +
174  file_path.string());
175  }
176 
177  import_driver.importGeoTable(file_path.string(), table_name, true, true, false);
178  }
179  }
180 
181  } catch (std::exception& e) {
182  std::cerr << "Exception: " << e.what() << "\n";
183  }
184  return 0;
185 }
std::string cat(Ts &&...args)
static std::shared_ptr< Catalog > get(const std::string &dbName)
Definition: Catalog.cpp:3658
std::string mapd_root_abs_path()
Definition: mapdpath.h:30
boost::program_options::options_description const & get_options() const
Definition: Logger.cpp:79
This file contains the class specification and related data structures for Catalog.
static const std::array< std::string, 3 > SampleGeoFileNames
Definition: initdb.cpp:34
static SysCatalog & instance()
Definition: SysCatalog.h:288
CHECK(cgen_state)
void init(LogOptions const &log_opts)
Definition: Logger.cpp:276
const std::string OMNISCI_DEFAULT_DB
Definition: SysCatalog.h:58
static const std::array< std::string, 3 > SampleGeoTableNames
Definition: initdb.cpp:37
bool g_enable_thrift_logs
Definition: initdb.cpp:41
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:59
#define CALCITEPORT
Definition: initdb.cpp:32
void set_base_path(std::string const &base_path)
Definition: Logger.cpp:93
static bool run