OmniSciDB  8a228a1076
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <iostream>
22 
23 #include "CommandLineOptions.h"
24 #include "LeafHostInfo.h"
25 #include "MapDRelease.h"
27 #include "Shared/Compressor.h"
29 #include "Utils/DdlUtils.h"
30 
31 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
32 
33 extern std::string cluster_command_line_arg;
34 
36 
37 extern bool g_use_table_device_offset;
39 
40 extern int64_t g_large_ndv_threshold;
41 extern size_t g_large_ndv_multiplier;
42 
43 unsigned connect_timeout{20000};
44 unsigned recv_timeout{300000};
45 unsigned send_timeout{300000};
46 bool with_keepalive{false};
47 
51  }
55 }
56 
58  help_desc.add_options()("help,h", "Show available options.");
59  help_desc.add_options()(
60  "allow-cpu-retry",
61  po::value<bool>(&g_allow_cpu_retry)
62  ->default_value(g_allow_cpu_retry)
63  ->implicit_value(true),
64  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
65  help_desc.add_options()("allow-loop-joins",
66  po::value<bool>(&allow_loop_joins)
67  ->default_value(allow_loop_joins)
68  ->implicit_value(true),
69  "Enable loop joins.");
70  help_desc.add_options()("bigint-count",
71  po::value<bool>(&g_bigint_count)
72  ->default_value(g_bigint_count)
73  ->implicit_value(true),
74  "Use 64-bit count.");
75  help_desc.add_options()("calcite-max-mem",
76  po::value<size_t>(&system_parameters.calcite_max_mem)
77  ->default_value(system_parameters.calcite_max_mem),
78  "Max memory available to calcite JVM.");
79  if (!dist_v5_) {
80  help_desc.add_options()("calcite-port",
81  po::value<int>(&system_parameters.calcite_port)
82  ->default_value(system_parameters.calcite_port),
83  "Calcite port number.");
84  }
85  help_desc.add_options()("config",
86  po::value<std::string>(&system_parameters.config_file),
87  "Path to server configuration file.");
88  help_desc.add_options()("cpu-buffer-mem-bytes",
89  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
90  ->default_value(system_parameters.cpu_buffer_mem_bytes),
91  "Size of memory reserved for CPU buffers, in bytes.");
92 
93  help_desc.add_options()(
94  "cpu-only",
95  po::value<bool>(&cpu_only)->default_value(cpu_only)->implicit_value(true),
96  "Run on CPU only, even if GPUs are available.");
97  help_desc.add_options()("cuda-block-size",
98  po::value<size_t>(&system_parameters.cuda_block_size)
99  ->default_value(system_parameters.cuda_block_size),
100  "Size of block to use on GPU.");
101  help_desc.add_options()("cuda-grid-size",
102  po::value<size_t>(&system_parameters.cuda_grid_size)
103  ->default_value(system_parameters.cuda_grid_size),
104  "Size of grid to use on GPU.");
105  if (!dist_v5_) {
106  help_desc.add_options()(
107  "data",
108  po::value<std::string>(&base_path)->required()->default_value("data"),
109  "Directory path to OmniSci data storage (catalogs, raw data, log files, etc).");
110  positional_options.add("data", 1);
111  }
112  help_desc.add_options()("db-query-list",
113  po::value<std::string>(&db_query_file),
114  "Path to file containing OmniSci warmup queries.");
115  help_desc.add_options()(
116  "exit-after-warmup",
117  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
118  "Exit after OmniSci warmup queries.");
119  help_desc.add_options()("dynamic-watchdog-time-limit",
120  po::value<unsigned>(&dynamic_watchdog_time_limit)
121  ->default_value(dynamic_watchdog_time_limit)
122  ->implicit_value(10000),
123  "Dynamic watchdog time limit, in milliseconds.");
124  help_desc.add_options()("enable-debug-timer",
125  po::value<bool>(&g_enable_debug_timer)
126  ->default_value(g_enable_debug_timer)
127  ->implicit_value(true),
128  "Enable debug timer logging.");
129  help_desc.add_options()("enable-dynamic-watchdog",
130  po::value<bool>(&enable_dynamic_watchdog)
131  ->default_value(enable_dynamic_watchdog)
132  ->implicit_value(true),
133  "Enable dynamic watchdog.");
134  help_desc.add_options()("enable-filter-push-down",
135  po::value<bool>(&g_enable_filter_push_down)
136  ->default_value(g_enable_filter_push_down)
137  ->implicit_value(true),
138  "Enable filter push down through joins.");
139  help_desc.add_options()("enable-overlaps-hashjoin",
140  po::value<bool>(&g_enable_overlaps_hashjoin)
141  ->default_value(g_enable_overlaps_hashjoin)
142  ->implicit_value(true),
143  "Enable the overlaps hash join framework allowing for range "
144  "join (e.g. spatial overlaps) computation using a hash table.");
145  help_desc.add_options()("enable-hashjoin-many-to-many",
146  po::value<bool>(&g_enable_hashjoin_many_to_many)
147  ->default_value(g_enable_hashjoin_many_to_many)
148  ->implicit_value(true),
149  "Enable the overlaps hash join framework allowing for range "
150  "join (e.g. spatial overlaps) computation using a hash table.");
151  help_desc.add_options()("enable-runtime-query-interrupt",
152  po::value<bool>(&enable_runtime_query_interrupt)
153  ->default_value(enable_runtime_query_interrupt)
154  ->implicit_value(true),
155  "Enable runtime query interrupt.");
156  help_desc.add_options()("runtime-query-interrupt-frequency",
157  po::value<unsigned>(&runtime_query_interrupt_frequency)
158  ->default_value(runtime_query_interrupt_frequency)
159  ->implicit_value(1000),
160  "A frequency of checking the request of runtime query "
161  "interrupt from user (in millisecond).");
162  help_desc.add_options()("use-estimator-result-cache",
163  po::value<bool>(&use_estimator_result_cache)
164  ->default_value(use_estimator_result_cache)
165  ->implicit_value(true),
166  "Use estimator result cache.");
167  if (!dist_v5_) {
168  help_desc.add_options()(
169  "enable-string-dict-hash-cache",
170  po::value<bool>(&g_cache_string_hash)
171  ->default_value(g_cache_string_hash)
172  ->implicit_value(true),
173  "Cache string hash values in the string dictionary server during import.");
174  }
175  help_desc.add_options()(
176  "enable-thrift-logs",
177  po::value<bool>(&g_enable_thrift_logs)
178  ->default_value(g_enable_thrift_logs)
179  ->implicit_value(true),
180  "Enable writing messages directly from thrift to stdout/stderr.");
181  help_desc.add_options()("enable-watchdog",
182  po::value<bool>(&enable_watchdog)
183  ->default_value(enable_watchdog)
184  ->implicit_value(true),
185  "Enable watchdog.");
186  help_desc.add_options()(
187  "filter-push-down-low-frac",
188  po::value<float>(&g_filter_push_down_low_frac)
189  ->default_value(g_filter_push_down_low_frac)
190  ->implicit_value(g_filter_push_down_low_frac),
191  "Lower threshold for selectivity of filters that are pushed down.");
192  help_desc.add_options()(
193  "filter-push-down-high-frac",
194  po::value<float>(&g_filter_push_down_high_frac)
195  ->default_value(g_filter_push_down_high_frac)
196  ->implicit_value(g_filter_push_down_high_frac),
197  "Higher threshold for selectivity of filters that are pushed down.");
198  help_desc.add_options()("filter-push-down-passing-row-ubound",
199  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
201  ->implicit_value(g_filter_push_down_passing_row_ubound),
202  "Upperbound on the number of rows that should pass the filter "
203  "if the selectivity is less than "
204  "the high fraction threshold.");
205  help_desc.add_options()("from-table-reordering",
206  po::value<bool>(&g_from_table_reordering)
207  ->default_value(g_from_table_reordering)
208  ->implicit_value(true),
209  "Enable automatic table reordering in FROM clause.");
210  help_desc.add_options()("gpu-buffer-mem-bytes",
211  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
212  ->default_value(system_parameters.gpu_buffer_mem_bytes),
213  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
214  help_desc.add_options()("gpu-input-mem-limit",
215  po::value<double>(&system_parameters.gpu_input_mem_limit)
216  ->default_value(system_parameters.gpu_input_mem_limit),
217  "Force query to CPU when input data memory usage exceeds this "
218  "percentage of available GPU memory.");
219  help_desc.add_options()(
220  "hll-precision-bits",
221  po::value<int>(&g_hll_precision_bits)
222  ->default_value(g_hll_precision_bits)
223  ->implicit_value(g_hll_precision_bits),
224  "Number of bits used from the hash value used to specify the bucket number.");
225  if (!dist_v5_) {
226  help_desc.add_options()("http-port",
227  po::value<int>(&http_port)->default_value(http_port),
228  "HTTP port number.");
229  }
230  help_desc.add_options()(
231  "idle-session-duration",
232  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
233  "Maximum duration of idle session.");
234  help_desc.add_options()("inner-join-fragment-skipping",
235  po::value<bool>(&g_inner_join_fragment_skipping)
236  ->default_value(g_inner_join_fragment_skipping)
237  ->implicit_value(true),
238  "Enable/disable inner join fragment skipping. This feature is "
239  "considered stable and is enabled by default. This "
240  "parameter will be removed in a future release.");
241  help_desc.add_options()(
242  "max-session-duration",
243  po::value<int>(&max_session_duration)->default_value(max_session_duration),
244  "Maximum duration of active session.");
245  help_desc.add_options()(
246  "null-div-by-zero",
247  po::value<bool>(&g_null_div_by_zero)
248  ->default_value(g_null_div_by_zero)
249  ->implicit_value(true),
250  "Return null on division by zero instead of throwing an exception.");
251  help_desc.add_options()(
252  "num-reader-threads",
253  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
254  "Number of reader threads to use.");
255  help_desc.add_options()(
256  "overlaps-max-table-size-bytes",
257  po::value<size_t>(&g_overlaps_max_table_size_bytes)
258  ->default_value(g_overlaps_max_table_size_bytes),
259  "The maximum size in bytes of the hash table for an overlaps hash join.");
260  if (!dist_v5_) {
261  help_desc.add_options()("port,p",
262  po::value<int>(&system_parameters.omnisci_server_port)
263  ->default_value(system_parameters.omnisci_server_port),
264  "TCP Port number.");
265  }
266  help_desc.add_options()("num-gpus",
267  po::value<int>(&num_gpus)->default_value(num_gpus),
268  "Number of gpus to use.");
269  help_desc.add_options()(
270  "read-only",
271  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
272  "Enable read-only mode.");
273 
274  help_desc.add_options()(
275  "res-gpu-mem",
276  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
277  "Reduces GPU memory available to the OmniSci allocator by this amount. Used for "
278  "compiled code cache and ancillary GPU functions and other processes that may also "
279  "be using the GPU concurrent with OmniSciDB.");
280 
281  help_desc.add_options()("start-gpu",
282  po::value<int>(&start_gpu)->default_value(start_gpu),
283  "First gpu to use.");
284  help_desc.add_options()("trivial-loop-join-threshold",
285  po::value<unsigned>(&g_trivial_loop_join_threshold)
286  ->default_value(g_trivial_loop_join_threshold)
287  ->implicit_value(1000),
288  "The maximum number of rows in the inner table of a loop join "
289  "considered to be trivially small.");
290  help_desc.add_options()("verbose",
291  po::value<bool>(&verbose_logging)
292  ->default_value(verbose_logging)
293  ->implicit_value(true),
294  "Write additional debug log messages to server logs.");
295  help_desc.add_options()(
296  "enable-runtime-udf",
297  po::value<bool>(&enable_runtime_udf)
298  ->default_value(enable_runtime_udf)
299  ->implicit_value(true),
300  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
301  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
302  "Compiler server, packaged separately.");
303  help_desc.add_options()("version,v", "Print Version Number.");
304  help_desc.add_options()("enable-experimental-string-functions",
307  ->implicit_value(true),
308  "Enable experimental string functions.");
309 #ifdef ENABLE_FSI
310  help_desc.add_options()(
311  "enable-fsi",
312  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
313  "Enable foreign storage interface.");
314  help_desc.add_options()("encryption-key-store",
315  po::value<std::string>(&encryption_key_store_path),
316  "Path to directory where encryption related keys will reside.");
317  help_desc.add_options()("disk-cache-path",
318  po::value<std::string>(&disk_cache_config.path),
319  "Specify the path for the disk cache.");
320  help_desc.add_options()("enable-disk-cache",
321  po::value<bool>(&(disk_cache_config.is_enabled))
322  ->default_value(true)
323  ->implicit_value(true),
324  "Enable caching of table data on disk.");
325  help_desc.add_options()(
326  "disk-cache-entry-limit",
327  po::value<std::size_t>(&(disk_cache_config.entry_limit))->default_value(1024),
328  "Specify the size of the the disk cache.");
329 #endif // ENABLE_FSI
330  help_desc.add_options()(
331  "enable-interoperability",
332  po::value<bool>(&g_enable_interop)
333  ->default_value(g_enable_interop)
334  ->implicit_value(true),
335  "Enable offloading of query portions to an external execution engine.");
336  help_desc.add_options()("enable-union",
337  po::value<bool>(&g_enable_union)
338  ->default_value(g_enable_union)
339  ->implicit_value(true),
340  "Enable UNION ALL SQL clause.");
341  help_desc.add_options()(
342  "calcite-service-timeout",
343  po::value<size_t>(&system_parameters.calcite_timeout)
344  ->default_value(system_parameters.calcite_timeout),
345  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
346  "schema changes or when running large numbers of parallel queries.");
347  help_desc.add_options()("calcite-service-keepalive",
348  po::value<size_t>(&system_parameters.calcite_keepalive)
349  ->default_value(system_parameters.calcite_keepalive)
350  ->implicit_value(true),
351  "Enable keepalive on Calcite connections.");
352  help_desc.add_options()(
353  "stringdict-parallelizm",
354  po::value<bool>(&g_enable_stringdict_parallel)
355  ->default_value(g_enable_stringdict_parallel)
356  ->implicit_value(true),
357  "Allow StringDictionary to parallelize loads using multiple threads");
358  help_desc.add_options()("log-user-origin",
359  po::value<bool>(&log_user_origin)
360  ->default_value(log_user_origin)
361  ->implicit_value(true),
362  "Lookup the origin of inbound connections by IP address/DNS "
363  "name, and print this information as part of stdlog.");
365 }
366 
368  developer_desc.add_options()("dev-options", "Print internal developer options.");
369  developer_desc.add_options()(
370  "enable-calcite-view-optimize",
373  ->implicit_value(true),
374  "Enable additional calcite (query plan) optimizations when a view is part of the "
375  "query.");
376  developer_desc.add_options()(
377  "enable-columnar-output",
378  po::value<bool>(&g_enable_columnar_output)
379  ->default_value(g_enable_columnar_output)
380  ->implicit_value(true),
381  "Enable columnar output for intermediate/final query steps.");
382  developer_desc.add_options()("enable-legacy-syntax",
383  po::value<bool>(&enable_legacy_syntax)
384  ->default_value(enable_legacy_syntax)
385  ->implicit_value(true),
386  "Enable legacy syntax.");
387  developer_desc.add_options()(
388  "enable-multifrag",
389  po::value<bool>(&allow_multifrag)
390  ->default_value(allow_multifrag)
391  ->implicit_value(true),
392  "Enable execution over multiple fragments in a single round-trip to GPU.");
393  developer_desc.add_options()(
394  "enable-shared-mem-group-by",
395  po::value<bool>(&g_enable_smem_group_by)
396  ->default_value(g_enable_smem_group_by)
397  ->implicit_value(true),
398  "Enable using GPU shared memory for some GROUP BY queries.");
399  developer_desc.add_options()("num-executors",
400  po::value<int>(&system_parameters.num_executors)
401  ->default_value(system_parameters.num_executors),
402  "Number of executors to run in parallel.");
403  developer_desc.add_options()(
404  "gpu-shared-mem-threshold",
405  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
406  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
407  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
408  developer_desc.add_options()(
409  "enable-shared-mem-grouped-non-count-agg",
410  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
411  ->default_value(g_enable_smem_grouped_non_count_agg)
412  ->implicit_value(true),
413  "Enable using GPU shared memory for grouped non-count aggregate queries.");
414  developer_desc.add_options()(
415  "enable-shared-mem-non-grouped-agg",
416  po::value<bool>(&g_enable_smem_non_grouped_agg)
417  ->default_value(g_enable_smem_non_grouped_agg)
418  ->implicit_value(true),
419  "Enable using GPU shared memory for non-grouped aggregate queries.");
420  developer_desc.add_options()("enable-direct-columnarization",
421  po::value<bool>(&g_enable_direct_columnarization)
422  ->default_value(g_enable_direct_columnarization)
423  ->implicit_value(true),
424  "Enables/disables a more optimized columnarization method "
425  "for intermediate steps in multi-step queries.");
426  developer_desc.add_options()(
427  "offset-device-by-table-id",
428  po::value<bool>(&g_use_table_device_offset)
429  ->default_value(g_use_table_device_offset)
430  ->implicit_value(true),
431  "Enables/disables offseting the chosen device ID by the table ID for a given "
432  "fragment. This improves balance of fragments across GPUs.");
433  developer_desc.add_options()("enable-window-functions",
434  po::value<bool>(&g_enable_window_functions)
435  ->default_value(g_enable_window_functions)
436  ->implicit_value(true),
437  "Enable experimental window function support.");
438  developer_desc.add_options()("enable-table-functions",
439  po::value<bool>(&g_enable_table_functions)
440  ->default_value(g_enable_table_functions)
441  ->implicit_value(true),
442  "Enable experimental table functions support.");
443  developer_desc.add_options()(
444  "jit-debug-ir",
445  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
446  "Enable runtime debugger support for the JIT. Note that this flag is "
447  "incompatible "
448  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
449  "`/tmp/mapdquery`.");
450  developer_desc.add_options()(
451  "intel-jit-profile",
452  po::value<bool>(&intel_jit_profile)
453  ->default_value(intel_jit_profile)
454  ->implicit_value(true),
455  "Enable runtime support for the JIT code profiling using Intel VTune.");
456  developer_desc.add_options()(
457  "enable-modern-thread-pool",
458  po::value<bool>(&g_use_tbb_pool)
459  ->default_value(g_use_tbb_pool)
460  ->implicit_value(true),
461  "Enable a new thread pool implementation for queuing kernels for execution.");
462  developer_desc.add_options()(
463  "skip-intermediate-count",
464  po::value<bool>(&g_skip_intermediate_count)
465  ->default_value(g_skip_intermediate_count)
466  ->implicit_value(true),
467  "Skip pre-flight counts for intermediate projections with no filters.");
468  developer_desc.add_options()(
469  "strip-join-covered-quals",
470  po::value<bool>(&g_strip_join_covered_quals)
471  ->default_value(g_strip_join_covered_quals)
472  ->implicit_value(true),
473  "Remove quals from the filtered count if they are covered by a "
474  "join condition (currently only ST_Contains).");
475 
476  developer_desc.add_options()(
477  "min-cpu-slab-size",
478  po::value<size_t>(&system_parameters.min_cpu_slab_size)
479  ->default_value(system_parameters.min_cpu_slab_size),
480  "Min slab size (size of memory allocations) for CPU buffer pool.");
481  developer_desc.add_options()(
482  "max-cpu-slab-size",
483  po::value<size_t>(&system_parameters.max_cpu_slab_size)
484  ->default_value(system_parameters.max_cpu_slab_size),
485  "Max CPU buffer pool slab size (size of memory allocations). Note if "
486  "there is not enough free memory to accomodate the target slab size, smaller "
487  "slabs will be allocated, down to the minimum size specified by "
488  "min-cpu-slab-size.");
489  developer_desc.add_options()(
490  "min-gpu-slab-size",
491  po::value<size_t>(&system_parameters.min_gpu_slab_size)
492  ->default_value(system_parameters.min_gpu_slab_size),
493  "Min slab size (size of memory allocations) for GPU buffer pools.");
494  developer_desc.add_options()(
495  "max-gpu-slab-size",
496  po::value<size_t>(&system_parameters.max_gpu_slab_size)
497  ->default_value(system_parameters.max_gpu_slab_size),
498  "Max GPU buffer pool slab size (size of memory allocations). Note if "
499  "there is not enough free memory to accomodate the target slab size, smaller "
500  "slabs will be allocated, down to the minimum size speified by "
501  "min-gpu-slab-size.");
502 
503  developer_desc.add_options()(
504  "max-output-projection-allocation-bytes",
505  po::value<size_t>(&g_max_memory_allocation_size)
506  ->default_value(g_max_memory_allocation_size),
507  "Maximum allocation size for a fixed output buffer allocation for projection "
508  "queries with no pre-flight count. Default is the maximum slab size (sizes "
509  "greater "
510  "than the maximum slab size have no affect). Requires bump allocator.");
511  developer_desc.add_options()(
512  "min-output-projection-allocation-bytes",
513  po::value<size_t>(&g_min_memory_allocation_size)
514  ->default_value(g_min_memory_allocation_size),
515  "Minimum allocation size for a fixed output buffer allocation for projection "
516  "queries with no pre-flight count. If an allocation of this size cannot be "
517  "obtained, the query will be retried with different execution parameters and/or "
518  "on "
519  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
520  developer_desc.add_options()("enable-bump-allocator",
521  po::value<bool>(&g_enable_bump_allocator)
522  ->default_value(g_enable_bump_allocator)
523  ->implicit_value(true),
524  "Enable the bump allocator for projection queries on "
525  "GPU. The bump allocator will "
526  "allocate a fixed size buffer for each query, track the "
527  "number of rows passing the "
528  "kernel during query execution, and copy back only the "
529  "rows that passed the kernel "
530  "to CPU after execution. When disabled, pre-flight "
531  "count queries are used to size "
532  "the output buffer for projection queries.");
533  developer_desc.add_options()(
534  "code-cache-eviction-percent",
535  po::value<float>(&g_fraction_code_cache_to_evict)
536  ->default_value(g_fraction_code_cache_to_evict),
537  "Percentage of the GPU code cache to evict if an out of memory error is "
538  "encountered while attempting to place generated code on the GPU.");
539 
540  developer_desc.add_options()("ssl-cert",
541  po::value<std::string>(&system_parameters.ssl_cert_file)
542  ->default_value(std::string("")),
543  "SSL Validated public certficate.");
544 
545  developer_desc.add_options()("ssl-private-key",
546  po::value<std::string>(&system_parameters.ssl_key_file)
547  ->default_value(std::string("")),
548  "SSL private key file.");
549  // Note ssl_trust_store is passed through to Calcite via system_parameters
550  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
551  developer_desc.add_options()("ssl-trust-store",
552  po::value<std::string>(&system_parameters.ssl_trust_store)
553  ->default_value(std::string("")),
554  "SSL public CA certifcates (java trust store) to validate "
555  "TLS connections (passed through to the Calcite server).");
556 
557  developer_desc.add_options()(
558  "ssl-trust-password",
559  po::value<std::string>(&system_parameters.ssl_trust_password)
560  ->default_value(std::string("")),
561  "SSL password for java trust store provided via --ssl-trust-store parameter.");
562 
563  developer_desc.add_options()(
564  "ssl-trust-ca",
565  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
566  ->default_value(std::string("")),
567  "SSL public CA certificates to validate TLS connection(as a client).");
568 
569  developer_desc.add_options()(
570  "ssl-trust-ca-server",
571  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
572  "SSL public CA certificates to validate TLS connection(as a server).");
573 
574  developer_desc.add_options()("ssl-keystore",
575  po::value<std::string>(&system_parameters.ssl_keystore)
576  ->default_value(std::string("")),
577  "SSL server credentials as a java key store (passed "
578  "through to the Calcite server).");
579 
580  developer_desc.add_options()(
581  "ssl-keystore-password",
582  po::value<std::string>(&system_parameters.ssl_keystore_password)
583  ->default_value(std::string("")),
584  "SSL password for java keystore, provide by via --ssl-keystore.");
585 
586  developer_desc.add_options()(
587  "udf",
588  po::value<std::string>(&udf_file_name),
589  "Load user defined extension functions from this file at startup. The file is "
590  "expected to be a C/C++ file with extension .cpp.");
591 
592  developer_desc.add_options()(
593  "udf-compiler-path",
594  po::value<std::string>(&udf_compiler_path),
595  "Provide absolute path to clang++ used in udf compilation.");
596 
597  developer_desc.add_options()("udf-compiler-options",
598  po::value<std::vector<std::string>>(&udf_compiler_options),
599  "Specify compiler options to tailor udf compilation.");
600 
601 #ifdef ENABLE_GEOS
602  developer_desc.add_options()("libgeos-so-filename",
603  po::value<std::string>(&libgeos_so_filename),
604  "Specify libgeos shared object filename to be used for "
605  "geos-backed geo opertations.");
606 #endif
607  developer_desc.add_options()(
608  "large-ndv-threshold",
609  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
610  developer_desc.add_options()(
611  "large-ndv-multiplier",
612  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
613 }
614 
615 namespace {
616 
617 std::stringstream sanitize_config_file(std::ifstream& in) {
618  // Strip the web section out of the config file so boost can validate program options
619  std::stringstream ss;
620  std::string line;
621  while (std::getline(in, line)) {
622  // Skip config file only options
623  if (!boost::starts_with(line, "allowed-import-paths") &&
624  !boost::starts_with(line, "allowed-export-paths")) {
625  ss << line << "\n";
626  }
627  if (line == "[web]") {
628  break;
629  }
630  }
631  return ss;
632 }
633 
634 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
635  if (!filename.empty()) {
636  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
637  if (!boost::filesystem::exists(filename)) {
638  std::cerr << desc << " " << filename << " does not exist." << std::endl;
639  return false;
640  }
641  }
642  return true;
643 }
644 
646  if (!filename.empty()) {
648  }
649 }
650 
651 } // namespace
652 
654  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
655  if (!boost::filesystem::exists(base_path)) {
656  throw std::runtime_error("OmniSci base directory does not exist at " + base_path);
657  }
658 }
659 
661  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
662  const auto data_path = boost::filesystem::path(base_path) / "mapd_data";
663  if (!boost::filesystem::exists(data_path)) {
664  throw std::runtime_error("OmniSci data directory does not exist at '" + base_path +
665  "'");
666  }
667 
668  {
669  const auto lock_file = boost::filesystem::path(base_path) / "omnisci_server_pid.lck";
670  auto pid = std::to_string(getpid());
671 
672  int pid_fd = open(lock_file.c_str(), O_RDWR | O_CREAT, 0644);
673  if (pid_fd == -1) {
674  auto err = std::string("Failed to open PID file ") + lock_file.c_str() + ". " +
675  strerror(errno) + ".";
676  throw std::runtime_error(err);
677  }
678  if (lockf(pid_fd, F_TLOCK, 0) == -1) {
679  close(pid_fd);
680  auto err = std::string("Another OmniSci Server is using data directory ") +
681  base_path + ".";
682  throw std::runtime_error(err);
683  }
684  if (ftruncate(pid_fd, 0) == -1) {
685  close(pid_fd);
686  auto err = std::string("Failed to truncate PID file ") + lock_file.c_str() + ". " +
687  strerror(errno) + ".";
688  throw std::runtime_error(err);
689  }
690  if (write(pid_fd, pid.c_str(), pid.length()) == -1) {
691  close(pid_fd);
692  auto err = std::string("Failed to write PID file ") + lock_file.c_str() + ". " +
693  strerror(errno) + ".";
694  throw std::runtime_error(err);
695  }
696  }
697  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
698  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
699  throw std::runtime_error("File containing DB queries " + db_query_file +
700  " does not exist.");
701  }
702  const auto db_file =
703  boost::filesystem::path(base_path) / "mapd_catalogs" / OMNISCI_SYSTEM_CATALOG;
704  if (!boost::filesystem::exists(db_file)) {
705  { // check old system catalog existsense
706  const auto db_file = boost::filesystem::path(base_path) / "mapd_catalogs/mapd";
707  if (!boost::filesystem::exists(db_file)) {
708  throw std::runtime_error("OmniSci system catalog " + OMNISCI_SYSTEM_CATALOG +
709  " does not exist.");
710  }
711  }
712  }
713  if (license_path.length() == 0) {
714  license_path = base_path + "/omnisci.license";
715  }
716 
717  // add all parameters to be displayed on startup
718  LOG(INFO) << "OmniSci started with data directory at '" << base_path << "'";
719  if (vm.count("license-path")) {
720  LOG(INFO) << "License key path set to '" << license_path << "'";
721  }
722  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
723  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
725  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
726  }
727  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
729  LOG(INFO) << " A frequency of checking runtime query interrupt request is set to "
730  << runtime_query_interrupt_frequency << " (in ms.)";
731  }
732 
733  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
734 
735  LOG(INFO) << " Maximum Idle session duration " << idle_session_duration;
736 
737  LOG(INFO) << " Maximum active session duration " << max_session_duration;
738 
740 
744 
745  if (g_enable_fsi) {
746  if (disk_cache_config.path.empty()) {
747  disk_cache_config.path = base_path + "/omnisci_disk_cache";
748  }
750  }
751 
754 
755  // If passed in, blacklist all security config files
764 }
765 
767  int argc,
768  char const* const* argv,
769  const bool should_init_logging) {
770  po::options_description all_desc("All options");
771  all_desc.add(help_desc).add(developer_desc);
772 
773  try {
774  po::store(po::command_line_parser(argc, argv)
775  .options(all_desc)
776  .positional(positional_options)
777  .run(),
778  vm);
779  po::notify(vm);
780 
781  if (vm.count("config")) {
782  std::ifstream settings_file(system_parameters.config_file);
783 
784  auto sanitized_settings = sanitize_config_file(settings_file);
785 
786  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
787  po::notify(vm);
788  settings_file.close();
789  }
790 
791  if (should_init_logging) {
792  init_logging();
793  }
794 
796  return 1;
797  }
798  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
799  return 1;
800  }
802  return 1;
803  }
805  return 1;
806  }
808  return 1;
809  }
811  return 1;
812  }
813 
814  if (vm.count("help")) {
815  std::cerr << "Usage: omnisci_server <data directory path> [-p <port number>] "
816  "[--http-port <http port number>] [--flush-log] [--version|-v]"
817  << std::endl
818  << std::endl;
819  std::cout << help_desc << std::endl;
820  return 0;
821  }
822  if (vm.count("dev-options")) {
823  std::cout << "Usage: omnisci_server <data directory path> [-p <port number>] "
824  "[--http-port <http port number>] [--flush-log] [--version|-v]"
825  << std::endl
826  << std::endl;
827  std::cout << developer_desc << std::endl;
828  return 0;
829  }
830  if (vm.count("version")) {
831  std::cout << "OmniSci Version: " << MAPD_RELEASE << std::endl;
832  return 0;
833  }
834 
841  } catch (po::error& e) {
842  std::cerr << "Usage Error: " << e.what() << std::endl;
843  return 1;
844  }
845 
846  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
847  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
848  return 1;
849  }
850 
852  LOG(INFO) << " From clause table reordering is disabled";
853  }
854 
856  LOG(INFO) << " Filter push down for JOIN is enabled";
857  }
858 
859  if (vm.count("udf")) {
860  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
861 
862  if (!boost::filesystem::exists(udf_file_name)) {
863  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
864  return 1;
865  }
866 
867  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
868  }
869 
870  if (vm.count("udf-compiler-path")) {
871  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
872  }
873 
874  auto trim_string = [](std::string& s) {
875  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
876  };
877 
878  if (vm.count("udf-compiler-options")) {
879  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
880  }
881 
882  if (enable_runtime_udf) {
883  LOG(INFO) << " Runtime user defined extension functions enabled globally.";
884  }
885 
886  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
887  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
888  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
889  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
890  boost::is_any_of("\"'"));
891 
892  if (!system_parameters.ha_group_id.empty()) {
893  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
895  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
896  return 5;
897  } else {
898  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
899  }
900  if (system_parameters.ha_brokers.empty()) {
901  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
902  return 6;
903  } else {
904  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
905  }
906  if (system_parameters.ha_shared_data.empty()) {
907  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
908  return 7;
909  } else {
910  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
911  }
912  }
913  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
914  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
915  LOG(INFO) << " Min CPU buffer pool slab size " << system_parameters.min_cpu_slab_size;
916  LOG(INFO) << " Max CPU buffer pool slab size " << system_parameters.max_cpu_slab_size;
917  LOG(INFO) << " Min GPU buffer pool slab size " << system_parameters.min_gpu_slab_size;
918  LOG(INFO) << " Max GPU buffer pool slab size " << system_parameters.max_gpu_slab_size;
919  LOG(INFO) << " calcite JVM max memory " << system_parameters.calcite_max_mem;
920  LOG(INFO) << " OmniSci Server Port " << system_parameters.omnisci_server_port;
921  LOG(INFO) << " OmniSci Calcite Port " << system_parameters.calcite_port;
922  LOG(INFO) << " Enable Calcite view optimize "
924 
925  LOG(INFO) << " Allow Local Auth Fallback: "
926  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
927 
928  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
929  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
930  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
931  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
932  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
933  boost::algorithm::trim_if(authMetadata.restToken, boost::is_any_of("\"'"));
934  boost::algorithm::trim_if(authMetadata.restUrl, boost::is_any_of("\"'"));
935 
936  return boost::none;
937 }
std::string distinguishedName
Definition: AuthMetadata.h:25
unsigned connect_timeout
std::string filename(char const *path)
Definition: Logger.cpp:62
float g_filter_push_down_low_frac
Definition: Execute.cpp:87
std::string ldapQueryUrl
Definition: AuthMetadata.h:26
bool g_enable_smem_group_by
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
logger::LogOptions log_options_
bool g_strip_join_covered_quals
Definition: Execute.cpp:95
unsigned runtime_query_interrupt_frequency
int64_t g_large_ndv_threshold
bool g_enable_direct_columnarization
Definition: Execute.cpp:106
std::string ha_shared_data
std::string udf_compiler_path
bool g_skip_intermediate_count
po::options_description help_desc
#define LOG(tag)
Definition: Logger.h:188
bool enable_calcite_view_optimize
bool g_enable_union
DiskCacheConfig disk_cache_config
bool g_enable_debug_timer
Definition: Logger.cpp:17
std::string ldapRoleRegex
Definition: AuthMetadata.h:27
static void initializeFromConfigFile(const std::string &server_config_path)
Definition: DdlUtils.cpp:664
const std::string OMNISCI_SYSTEM_CATALOG
Definition: SysCatalog.h:57
float g_fraction_code_cache_to_evict
boost::program_options::options_description const & get_options() const
Definition: Logger.cpp:79
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:89
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:75
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:80
int g_hll_precision_bits
std::string config_file
std::string to_string(char const *&&v)
std::string encryption_key_store_path
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:91
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:82
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:119
bool g_null_div_by_zero
Definition: Execute.cpp:79
std::string restToken
Definition: AuthMetadata.h:31
bool g_enable_interop
std::string ha_brokers
std::string ssl_trust_ca_file
size_t g_large_ndv_multiplier
bool g_enable_columnar_output
Definition: Execute.cpp:90
std::string ssl_trust_store
bool g_from_table_reordering
Definition: Execute.cpp:81
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:92
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
std::string restUrl
Definition: AuthMetadata.h:30
float g_filter_push_down_high_frac
Definition: Execute.cpp:88
std::string uri
Definition: AuthMetadata.h:24
bool g_bigint_count
size_t g_max_memory_allocation_size
Definition: Execute.cpp:100
std::string ha_unique_server_id
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:94
bool g_enable_thrift_logs
std::string ca_file_name
Definition: AuthMetadata.h:33
std::string ssl_key_file
AuthMetadata authMetadata
bool g_enable_window_functions
Definition: Execute.cpp:98
size_t g_min_memory_allocation_size
Definition: Execute.cpp:101
bool with_keepalive
unsigned g_runtime_query_interrupt_frequency
Definition: Execute.cpp:110
std::string ldapSuperUserRole
Definition: AuthMetadata.h:28
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:116
bool g_enable_experimental_string_functions
std::vector< std::string > udf_compiler_options
static const std::string nodeIds_token
bool g_cache_string_hash
Definition: Execute.cpp:93
Severity severity_
Definition: Logger.h:118
std::string ssl_keystore_password
std::string ssl_trust_password
bool g_enable_filter_push_down
Definition: Execute.cpp:86
bool g_use_estimator_result_cache
Definition: Execute.cpp:109
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:87
bool g_enable_bump_allocator
Definition: Execute.cpp:104
bool allowLocalAuthFallback
Definition: AuthMetadata.h:34
bool g_use_table_device_offset
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:125
po::positional_options_description positional_options
void set_base_path(std::string const &base_path)
Definition: Logger.cpp:93
std::string ssl_keystore
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:106
bool g_allow_cpu_retry
Definition: Execute.cpp:78
bool g_enable_watchdog
Definition: Execute.cpp:74
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:727
po::options_description developer_desc
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:43
static bool run
po::variables_map vm
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:77
bool g_enable_fsi
Definition: Catalog.cpp:90
bool g_use_tbb_pool
Definition: Execute.cpp:76
std::string ha_group_id
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:108
bool g_enable_table_functions
Definition: Execute.cpp:99
std::string cluster_command_line_arg
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
size_t g_gpu_smem_threshold
Definition: Execute.cpp:111
std::string ssl_cert_file