OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <iostream>
22 
23 #include "CommandLineOptions.h"
24 #include "LeafHostInfo.h"
25 #include "MapDRelease.h"
27 #include "Shared/Compressor.h"
29 #include "Utils/DdlUtils.h"
30 
31 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
32 
33 extern std::string cluster_command_line_arg;
34 
36 
37 extern bool g_use_table_device_offset;
39 extern bool g_cache_string_hash;
40 
41 extern int64_t g_large_ndv_threshold;
42 extern size_t g_large_ndv_multiplier;
43 extern int64_t g_bitmap_memory_limit;
44 extern bool g_enable_calcite_ddl_parser;
45 
46 unsigned connect_timeout{20000};
47 unsigned recv_timeout{300000};
48 unsigned send_timeout{300000};
49 bool with_keepalive{false};
50 
54  }
58 }
59 
61  help_desc.add_options()("help,h", "Show available options.");
62  help_desc.add_options()(
63  "allow-cpu-retry",
64  po::value<bool>(&g_allow_cpu_retry)
65  ->default_value(g_allow_cpu_retry)
66  ->implicit_value(true),
67  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
68  help_desc.add_options()("allow-loop-joins",
69  po::value<bool>(&allow_loop_joins)
70  ->default_value(allow_loop_joins)
71  ->implicit_value(true),
72  "Enable loop joins.");
73  help_desc.add_options()("bigint-count",
74  po::value<bool>(&g_bigint_count)
75  ->default_value(g_bigint_count)
76  ->implicit_value(true),
77  "Use 64-bit count.");
78  help_desc.add_options()("calcite-max-mem",
79  po::value<size_t>(&system_parameters.calcite_max_mem)
80  ->default_value(system_parameters.calcite_max_mem),
81  "Max memory available to calcite JVM.");
82  if (!dist_v5_) {
83  help_desc.add_options()("calcite-port",
84  po::value<int>(&system_parameters.calcite_port)
85  ->default_value(system_parameters.calcite_port),
86  "Calcite port number.");
87  }
88  help_desc.add_options()("config",
89  po::value<std::string>(&system_parameters.config_file),
90  "Path to server configuration file.");
91  help_desc.add_options()("cpu-buffer-mem-bytes",
92  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
93  ->default_value(system_parameters.cpu_buffer_mem_bytes),
94  "Size of memory reserved for CPU buffers, in bytes.");
95 
96  help_desc.add_options()(
97  "cpu-only",
98  po::value<bool>(&cpu_only)->default_value(cpu_only)->implicit_value(true),
99  "Run on CPU only, even if GPUs are available.");
100  help_desc.add_options()("cuda-block-size",
101  po::value<size_t>(&system_parameters.cuda_block_size)
102  ->default_value(system_parameters.cuda_block_size),
103  "Size of block to use on GPU.");
104  help_desc.add_options()("cuda-grid-size",
105  po::value<size_t>(&system_parameters.cuda_grid_size)
106  ->default_value(system_parameters.cuda_grid_size),
107  "Size of grid to use on GPU.");
108  if (!dist_v5_) {
109  help_desc.add_options()(
110  "data",
111  po::value<std::string>(&base_path)->required()->default_value("data"),
112  "Directory path to OmniSci data storage (catalogs, raw data, log files, etc).");
113  positional_options.add("data", 1);
114  }
115  help_desc.add_options()("db-query-list",
116  po::value<std::string>(&db_query_file),
117  "Path to file containing OmniSci warmup queries.");
118  help_desc.add_options()(
119  "exit-after-warmup",
120  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
121  "Exit after OmniSci warmup queries.");
122  help_desc.add_options()("dynamic-watchdog-time-limit",
123  po::value<unsigned>(&dynamic_watchdog_time_limit)
124  ->default_value(dynamic_watchdog_time_limit)
125  ->implicit_value(10000),
126  "Dynamic watchdog time limit, in milliseconds.");
127  help_desc.add_options()("enable-debug-timer",
128  po::value<bool>(&g_enable_debug_timer)
129  ->default_value(g_enable_debug_timer)
130  ->implicit_value(true),
131  "Enable debug timer logging.");
132  help_desc.add_options()("enable-dynamic-watchdog",
133  po::value<bool>(&enable_dynamic_watchdog)
134  ->default_value(enable_dynamic_watchdog)
135  ->implicit_value(true),
136  "Enable dynamic watchdog.");
137  help_desc.add_options()("enable-filter-push-down",
138  po::value<bool>(&g_enable_filter_push_down)
139  ->default_value(g_enable_filter_push_down)
140  ->implicit_value(true),
141  "Enable filter push down through joins.");
142  help_desc.add_options()("enable-overlaps-hashjoin",
143  po::value<bool>(&g_enable_overlaps_hashjoin)
144  ->default_value(g_enable_overlaps_hashjoin)
145  ->implicit_value(true),
146  "Enable the overlaps hash join framework allowing for range "
147  "join (e.g. spatial overlaps) computation using a hash table.");
148  help_desc.add_options()("enable-hashjoin-many-to-many",
149  po::value<bool>(&g_enable_hashjoin_many_to_many)
150  ->default_value(g_enable_hashjoin_many_to_many)
151  ->implicit_value(true),
152  "Enable the overlaps hash join framework allowing for range "
153  "join (e.g. spatial overlaps) computation using a hash table.");
154  help_desc.add_options()("enable-runtime-query-interrupt",
155  po::value<bool>(&enable_runtime_query_interrupt)
156  ->default_value(enable_runtime_query_interrupt)
157  ->implicit_value(true),
158  "Enable runtime query interrupt.");
159  help_desc.add_options()("pending-query-interrupt-freq",
160  po::value<unsigned>(&pending_query_interrupt_freq)
161  ->default_value(pending_query_interrupt_freq)
162  ->implicit_value(1000),
163  "A frequency of checking the request of pending query "
164  "interrupt from user (in millisecond).");
165  help_desc.add_options()(
166  "running-query-interrupt-freq",
167  po::value<double>(&running_query_interrupt_freq)
168  ->default_value(running_query_interrupt_freq)
169  ->implicit_value(0.5),
170  "A frequency of checking the request of running query "
171  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
172  help_desc.add_options()("use-estimator-result-cache",
173  po::value<bool>(&use_estimator_result_cache)
174  ->default_value(use_estimator_result_cache)
175  ->implicit_value(true),
176  "Use estimator result cache.");
177  if (!dist_v5_) {
178  help_desc.add_options()(
179  "enable-string-dict-hash-cache",
180  po::value<bool>(&g_cache_string_hash)
181  ->default_value(g_cache_string_hash)
182  ->implicit_value(true),
183  "Cache string hash values in the string dictionary server during import.");
184  }
185  help_desc.add_options()(
186  "enable-thrift-logs",
187  po::value<bool>(&g_enable_thrift_logs)
188  ->default_value(g_enable_thrift_logs)
189  ->implicit_value(true),
190  "Enable writing messages directly from thrift to stdout/stderr.");
191  help_desc.add_options()("enable-watchdog",
192  po::value<bool>(&enable_watchdog)
193  ->default_value(enable_watchdog)
194  ->implicit_value(true),
195  "Enable watchdog.");
196  help_desc.add_options()(
197  "filter-push-down-low-frac",
198  po::value<float>(&g_filter_push_down_low_frac)
199  ->default_value(g_filter_push_down_low_frac)
200  ->implicit_value(g_filter_push_down_low_frac),
201  "Lower threshold for selectivity of filters that are pushed down.");
202  help_desc.add_options()(
203  "filter-push-down-high-frac",
204  po::value<float>(&g_filter_push_down_high_frac)
205  ->default_value(g_filter_push_down_high_frac)
206  ->implicit_value(g_filter_push_down_high_frac),
207  "Higher threshold for selectivity of filters that are pushed down.");
208  help_desc.add_options()("filter-push-down-passing-row-ubound",
209  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
211  ->implicit_value(g_filter_push_down_passing_row_ubound),
212  "Upperbound on the number of rows that should pass the filter "
213  "if the selectivity is less than "
214  "the high fraction threshold.");
215  help_desc.add_options()("from-table-reordering",
216  po::value<bool>(&g_from_table_reordering)
217  ->default_value(g_from_table_reordering)
218  ->implicit_value(true),
219  "Enable automatic table reordering in FROM clause.");
220  help_desc.add_options()("gpu-buffer-mem-bytes",
221  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
222  ->default_value(system_parameters.gpu_buffer_mem_bytes),
223  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
224  help_desc.add_options()("gpu-input-mem-limit",
225  po::value<double>(&system_parameters.gpu_input_mem_limit)
226  ->default_value(system_parameters.gpu_input_mem_limit),
227  "Force query to CPU when input data memory usage exceeds this "
228  "percentage of available GPU memory.");
229  help_desc.add_options()(
230  "hll-precision-bits",
231  po::value<int>(&g_hll_precision_bits)
232  ->default_value(g_hll_precision_bits)
233  ->implicit_value(g_hll_precision_bits),
234  "Number of bits used from the hash value used to specify the bucket number.");
235  if (!dist_v5_) {
236  help_desc.add_options()("http-port",
237  po::value<int>(&http_port)->default_value(http_port),
238  "HTTP port number.");
239  }
240  help_desc.add_options()(
241  "idle-session-duration",
242  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
243  "Maximum duration of idle session.");
244  help_desc.add_options()("inner-join-fragment-skipping",
245  po::value<bool>(&g_inner_join_fragment_skipping)
246  ->default_value(g_inner_join_fragment_skipping)
247  ->implicit_value(true),
248  "Enable/disable inner join fragment skipping. This feature is "
249  "considered stable and is enabled by default. This "
250  "parameter will be removed in a future release.");
251  help_desc.add_options()(
252  "max-session-duration",
253  po::value<int>(&max_session_duration)->default_value(max_session_duration),
254  "Maximum duration of active session.");
255  help_desc.add_options()(
256  "null-div-by-zero",
257  po::value<bool>(&g_null_div_by_zero)
258  ->default_value(g_null_div_by_zero)
259  ->implicit_value(true),
260  "Return null on division by zero instead of throwing an exception.");
261  help_desc.add_options()(
262  "num-reader-threads",
263  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
264  "Number of reader threads to use.");
265  help_desc.add_options()(
266  "overlaps-max-table-size-bytes",
267  po::value<size_t>(&g_overlaps_max_table_size_bytes)
268  ->default_value(g_overlaps_max_table_size_bytes),
269  "The maximum size in bytes of the hash table for an overlaps hash join.");
270  if (!dist_v5_) {
271  help_desc.add_options()("port,p",
272  po::value<int>(&system_parameters.omnisci_server_port)
273  ->default_value(system_parameters.omnisci_server_port),
274  "TCP Port number.");
275  }
276  help_desc.add_options()("num-gpus",
277  po::value<int>(&num_gpus)->default_value(num_gpus),
278  "Number of gpus to use.");
279  help_desc.add_options()(
280  "read-only",
281  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
282  "Enable read-only mode.");
283 
284  help_desc.add_options()(
285  "res-gpu-mem",
286  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
287  "Reduces GPU memory available to the OmniSci allocator by this amount. Used for "
288  "compiled code cache and ancillary GPU functions and other processes that may also "
289  "be using the GPU concurrent with OmniSciDB.");
290 
291  help_desc.add_options()("start-gpu",
292  po::value<int>(&start_gpu)->default_value(start_gpu),
293  "First gpu to use.");
294  help_desc.add_options()("trivial-loop-join-threshold",
295  po::value<unsigned>(&g_trivial_loop_join_threshold)
296  ->default_value(g_trivial_loop_join_threshold)
297  ->implicit_value(1000),
298  "The maximum number of rows in the inner table of a loop join "
299  "considered to be trivially small.");
300  help_desc.add_options()("verbose",
301  po::value<bool>(&verbose_logging)
302  ->default_value(verbose_logging)
303  ->implicit_value(true),
304  "Write additional debug log messages to server logs.");
305  help_desc.add_options()(
306  "enable-runtime-udf",
307  po::value<bool>(&enable_runtime_udf)
308  ->default_value(enable_runtime_udf)
309  ->implicit_value(true),
310  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
311  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
312  "Compiler server, packaged separately.");
313  help_desc.add_options()("version,v", "Print Version Number.");
314  help_desc.add_options()("enable-experimental-string-functions",
317  ->implicit_value(true),
318  "Enable experimental string functions.");
319 #ifdef ENABLE_FSI
320  help_desc.add_options()(
321  "enable-fsi",
322  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
323  "Enable foreign storage interface.");
324  help_desc.add_options()("encryption-key-store",
325  po::value<std::string>(&encryption_key_store_path),
326  "Path to directory where encryption related keys will reside.");
327  help_desc.add_options()("disk-cache-level",
328  po::value<std::string>(&(disk_cache_level))
329  ->default_value("fsi")
330  ->implicit_value("fsi"),
331  "Specify level of disk cache. Valid options are 'fsi', "
332  "'non_fsi, 'none', and 'all'.");
333  help_desc.add_options()("disk-cache-path",
334  po::value<std::string>(&disk_cache_config.path),
335  "Specify the path for the disk cache.");
336  help_desc.add_options()(
337  "disk-cache-size-limit",
338  po::value<std::size_t>(&(disk_cache_config.size_limit)),
339  "Specify the maximum size of the the disk cache per table in bytes.");
340 #endif // ENABLE_FSI
341  help_desc.add_options()(
342  "enable-interoperability",
343  po::value<bool>(&g_enable_interop)
344  ->default_value(g_enable_interop)
345  ->implicit_value(true),
346  "Enable offloading of query portions to an external execution engine.");
347  help_desc.add_options()("enable-union",
348  po::value<bool>(&g_enable_union)
349  ->default_value(g_enable_union)
350  ->implicit_value(true),
351  "Enable UNION ALL SQL clause.");
352  help_desc.add_options()(
353  "calcite-service-timeout",
354  po::value<size_t>(&system_parameters.calcite_timeout)
355  ->default_value(system_parameters.calcite_timeout),
356  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
357  "schema changes or when running large numbers of parallel queries.");
358  help_desc.add_options()("calcite-service-keepalive",
359  po::value<size_t>(&system_parameters.calcite_keepalive)
360  ->default_value(system_parameters.calcite_keepalive)
361  ->implicit_value(true),
362  "Enable keepalive on Calcite connections.");
363  help_desc.add_options()(
364  "stringdict-parallelizm",
365  po::value<bool>(&g_enable_stringdict_parallel)
366  ->default_value(g_enable_stringdict_parallel)
367  ->implicit_value(true),
368  "Allow StringDictionary to parallelize loads using multiple threads");
369  help_desc.add_options()("log-user-origin",
370  po::value<bool>(&log_user_origin)
371  ->default_value(log_user_origin)
372  ->implicit_value(true),
373  "Lookup the origin of inbound connections by IP address/DNS "
374  "name, and print this information as part of stdlog.");
376 }
377 
379  developer_desc.add_options()("dev-options", "Print internal developer options.");
380  developer_desc.add_options()(
381  "enable-calcite-view-optimize",
384  ->implicit_value(true),
385  "Enable additional calcite (query plan) optimizations when a view is part of the "
386  "query.");
387  developer_desc.add_options()(
388  "enable-columnar-output",
389  po::value<bool>(&g_enable_columnar_output)
390  ->default_value(g_enable_columnar_output)
391  ->implicit_value(true),
392  "Enable columnar output for intermediate/final query steps.");
393  developer_desc.add_options()("enable-legacy-syntax",
394  po::value<bool>(&enable_legacy_syntax)
395  ->default_value(enable_legacy_syntax)
396  ->implicit_value(true),
397  "Enable legacy syntax.");
398  developer_desc.add_options()(
399  "enable-multifrag",
400  po::value<bool>(&allow_multifrag)
401  ->default_value(allow_multifrag)
402  ->implicit_value(true),
403  "Enable execution over multiple fragments in a single round-trip to GPU.");
404  developer_desc.add_options()(
405  "enable-shared-mem-group-by",
406  po::value<bool>(&g_enable_smem_group_by)
407  ->default_value(g_enable_smem_group_by)
408  ->implicit_value(true),
409  "Enable using GPU shared memory for some GROUP BY queries.");
410  developer_desc.add_options()("num-executors",
411  po::value<int>(&system_parameters.num_executors)
412  ->default_value(system_parameters.num_executors),
413  "Number of executors to run in parallel.");
414  developer_desc.add_options()(
415  "gpu-shared-mem-threshold",
416  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
417  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
418  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
419  developer_desc.add_options()(
420  "enable-shared-mem-grouped-non-count-agg",
421  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
422  ->default_value(g_enable_smem_grouped_non_count_agg)
423  ->implicit_value(true),
424  "Enable using GPU shared memory for grouped non-count aggregate queries.");
425  developer_desc.add_options()(
426  "enable-shared-mem-non-grouped-agg",
427  po::value<bool>(&g_enable_smem_non_grouped_agg)
428  ->default_value(g_enable_smem_non_grouped_agg)
429  ->implicit_value(true),
430  "Enable using GPU shared memory for non-grouped aggregate queries.");
431  developer_desc.add_options()("enable-direct-columnarization",
432  po::value<bool>(&g_enable_direct_columnarization)
433  ->default_value(g_enable_direct_columnarization)
434  ->implicit_value(true),
435  "Enables/disables a more optimized columnarization method "
436  "for intermediate steps in multi-step queries.");
437  developer_desc.add_options()(
438  "offset-device-by-table-id",
439  po::value<bool>(&g_use_table_device_offset)
440  ->default_value(g_use_table_device_offset)
441  ->implicit_value(true),
442  "Enables/disables offseting the chosen device ID by the table ID for a given "
443  "fragment. This improves balance of fragments across GPUs.");
444  developer_desc.add_options()("enable-window-functions",
445  po::value<bool>(&g_enable_window_functions)
446  ->default_value(g_enable_window_functions)
447  ->implicit_value(true),
448  "Enable experimental window function support.");
449  developer_desc.add_options()("enable-table-functions",
450  po::value<bool>(&g_enable_table_functions)
451  ->default_value(g_enable_table_functions)
452  ->implicit_value(true),
453  "Enable experimental table functions support.");
454  developer_desc.add_options()(
455  "jit-debug-ir",
456  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
457  "Enable runtime debugger support for the JIT. Note that this flag is "
458  "incompatible "
459  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
460  "`/tmp/mapdquery`.");
461  developer_desc.add_options()(
462  "intel-jit-profile",
463  po::value<bool>(&intel_jit_profile)
464  ->default_value(intel_jit_profile)
465  ->implicit_value(true),
466  "Enable runtime support for the JIT code profiling using Intel VTune.");
467  developer_desc.add_options()(
468  "enable-modern-thread-pool",
469  po::value<bool>(&g_use_tbb_pool)
470  ->default_value(g_use_tbb_pool)
471  ->implicit_value(true),
472  "Enable a new thread pool implementation for queuing kernels for execution.");
473  developer_desc.add_options()(
474  "skip-intermediate-count",
475  po::value<bool>(&g_skip_intermediate_count)
476  ->default_value(g_skip_intermediate_count)
477  ->implicit_value(true),
478  "Skip pre-flight counts for intermediate projections with no filters.");
479  developer_desc.add_options()(
480  "strip-join-covered-quals",
481  po::value<bool>(&g_strip_join_covered_quals)
482  ->default_value(g_strip_join_covered_quals)
483  ->implicit_value(true),
484  "Remove quals from the filtered count if they are covered by a "
485  "join condition (currently only ST_Contains).");
486 
487  developer_desc.add_options()(
488  "min-cpu-slab-size",
489  po::value<size_t>(&system_parameters.min_cpu_slab_size)
490  ->default_value(system_parameters.min_cpu_slab_size),
491  "Min slab size (size of memory allocations) for CPU buffer pool.");
492  developer_desc.add_options()(
493  "max-cpu-slab-size",
494  po::value<size_t>(&system_parameters.max_cpu_slab_size)
495  ->default_value(system_parameters.max_cpu_slab_size),
496  "Max CPU buffer pool slab size (size of memory allocations). Note if "
497  "there is not enough free memory to accomodate the target slab size, smaller "
498  "slabs will be allocated, down to the minimum size specified by "
499  "min-cpu-slab-size.");
500  developer_desc.add_options()(
501  "min-gpu-slab-size",
502  po::value<size_t>(&system_parameters.min_gpu_slab_size)
503  ->default_value(system_parameters.min_gpu_slab_size),
504  "Min slab size (size of memory allocations) for GPU buffer pools.");
505  developer_desc.add_options()(
506  "max-gpu-slab-size",
507  po::value<size_t>(&system_parameters.max_gpu_slab_size)
508  ->default_value(system_parameters.max_gpu_slab_size),
509  "Max GPU buffer pool slab size (size of memory allocations). Note if "
510  "there is not enough free memory to accomodate the target slab size, smaller "
511  "slabs will be allocated, down to the minimum size speified by "
512  "min-gpu-slab-size.");
513 
514  developer_desc.add_options()(
515  "max-output-projection-allocation-bytes",
516  po::value<size_t>(&g_max_memory_allocation_size)
517  ->default_value(g_max_memory_allocation_size),
518  "Maximum allocation size for a fixed output buffer allocation for projection "
519  "queries with no pre-flight count. Default is the maximum slab size (sizes "
520  "greater "
521  "than the maximum slab size have no affect). Requires bump allocator.");
522  developer_desc.add_options()(
523  "min-output-projection-allocation-bytes",
524  po::value<size_t>(&g_min_memory_allocation_size)
525  ->default_value(g_min_memory_allocation_size),
526  "Minimum allocation size for a fixed output buffer allocation for projection "
527  "queries with no pre-flight count. If an allocation of this size cannot be "
528  "obtained, the query will be retried with different execution parameters and/or "
529  "on "
530  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
531  developer_desc.add_options()("enable-bump-allocator",
532  po::value<bool>(&g_enable_bump_allocator)
533  ->default_value(g_enable_bump_allocator)
534  ->implicit_value(true),
535  "Enable the bump allocator for projection queries on "
536  "GPU. The bump allocator will "
537  "allocate a fixed size buffer for each query, track the "
538  "number of rows passing the "
539  "kernel during query execution, and copy back only the "
540  "rows that passed the kernel "
541  "to CPU after execution. When disabled, pre-flight "
542  "count queries are used to size "
543  "the output buffer for projection queries.");
544  developer_desc.add_options()(
545  "code-cache-eviction-percent",
546  po::value<float>(&g_fraction_code_cache_to_evict)
547  ->default_value(g_fraction_code_cache_to_evict),
548  "Percentage of the GPU code cache to evict if an out of memory error is "
549  "encountered while attempting to place generated code on the GPU.");
550 
551  developer_desc.add_options()("ssl-cert",
552  po::value<std::string>(&system_parameters.ssl_cert_file)
553  ->default_value(std::string("")),
554  "SSL Validated public certficate.");
555 
556  developer_desc.add_options()("ssl-private-key",
557  po::value<std::string>(&system_parameters.ssl_key_file)
558  ->default_value(std::string("")),
559  "SSL private key file.");
560  // Note ssl_trust_store is passed through to Calcite via system_parameters
561  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
562  developer_desc.add_options()("ssl-trust-store",
563  po::value<std::string>(&system_parameters.ssl_trust_store)
564  ->default_value(std::string("")),
565  "SSL public CA certifcates (java trust store) to validate "
566  "TLS connections (passed through to the Calcite server).");
567 
568  developer_desc.add_options()(
569  "ssl-trust-password",
570  po::value<std::string>(&system_parameters.ssl_trust_password)
571  ->default_value(std::string("")),
572  "SSL password for java trust store provided via --ssl-trust-store parameter.");
573 
574  developer_desc.add_options()(
575  "ssl-trust-ca",
576  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
577  ->default_value(std::string("")),
578  "SSL public CA certificates to validate TLS connection(as a client).");
579 
580  developer_desc.add_options()(
581  "ssl-trust-ca-server",
582  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
583  "SSL public CA certificates to validate TLS connection(as a server).");
584 
585  developer_desc.add_options()("ssl-keystore",
586  po::value<std::string>(&system_parameters.ssl_keystore)
587  ->default_value(std::string("")),
588  "SSL server credentials as a java key store (passed "
589  "through to the Calcite server).");
590 
591  developer_desc.add_options()(
592  "ssl-keystore-password",
593  po::value<std::string>(&system_parameters.ssl_keystore_password)
594  ->default_value(std::string("")),
595  "SSL password for java keystore, provide by via --ssl-keystore.");
596 
597  developer_desc.add_options()(
598  "udf",
599  po::value<std::string>(&udf_file_name),
600  "Load user defined extension functions from this file at startup. The file is "
601  "expected to be a C/C++ file with extension .cpp.");
602 
603  developer_desc.add_options()(
604  "udf-compiler-path",
605  po::value<std::string>(&udf_compiler_path),
606  "Provide absolute path to clang++ used in udf compilation.");
607 
608  developer_desc.add_options()("udf-compiler-options",
609  po::value<std::vector<std::string>>(&udf_compiler_options),
610  "Specify compiler options to tailor udf compilation.");
611 
612 #ifdef ENABLE_GEOS
613  developer_desc.add_options()("libgeos-so-filename",
614  po::value<std::string>(&libgeos_so_filename),
615  "Specify libgeos shared object filename to be used for "
616  "geos-backed geo opertations.");
617 #endif
618  developer_desc.add_options()(
619  "large-ndv-threshold",
620  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
621  developer_desc.add_options()(
622  "large-ndv-multiplier",
623  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
624  developer_desc.add_options()(
625  "bitmap-memory-limit",
626  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
627  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
628  "size of the group by buffer (entry count in Query Memory Descriptor) and "
629  "multiplying it by the number of count distinct expression and the size of bitmap "
630  "required for each. For approx_count_distinct this is typically 8192 bytes.");
631  developer_desc.add_options()(
632  "enable-filter-function",
633  po::value<bool>(&g_enable_filter_function)
634  ->default_value(g_enable_filter_function)
635  ->implicit_value(true),
636  "Enable the filter function protection feature for the SQL JIT compiler. "
637  "Normally should be on but techs might want to disable for troubleshooting.");
638  developer_desc.add_options()(
639  "enable-calcite-ddl",
640  po::value<bool>(&g_enable_calcite_ddl_parser)
641  ->default_value(g_enable_calcite_ddl_parser)
642  ->implicit_value(true),
643  "Enable using Calcite for supported DDL parsing when available.");
644 }
645 
646 namespace {
647 
648 std::stringstream sanitize_config_file(std::ifstream& in) {
649  // Strip the web section out of the config file so boost can validate program options
650  std::stringstream ss;
651  std::string line;
652  while (std::getline(in, line)) {
653  // Skip config file only options
654  if (!boost::starts_with(line, "allowed-import-paths") &&
655  !boost::starts_with(line, "allowed-export-paths")) {
656  ss << line << "\n";
657  }
658  if (line == "[web]") {
659  break;
660  }
661  }
662  return ss;
663 }
664 
665 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
666  if (!filename.empty()) {
667  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
668  if (!boost::filesystem::exists(filename)) {
669  std::cerr << desc << " " << filename << " does not exist." << std::endl;
670  return false;
671  }
672  }
673  return true;
674 }
675 
677  if (!filename.empty()) {
679  }
680 }
681 
682 } // namespace
683 
685  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
686  if (!boost::filesystem::exists(base_path)) {
687  throw std::runtime_error("OmniSci base directory does not exist at " + base_path);
688  }
689 }
690 
692  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
693  const auto data_path = boost::filesystem::path(base_path) / "mapd_data";
694  if (!boost::filesystem::exists(data_path)) {
695  throw std::runtime_error("OmniSci data directory does not exist at '" + base_path +
696  "'");
697  }
698 
699  {
700  const auto lock_file = boost::filesystem::path(base_path) / "omnisci_server_pid.lck";
701  auto pid = std::to_string(getpid());
702 
703  int pid_fd = open(lock_file.c_str(), O_RDWR | O_CREAT, 0644);
704  if (pid_fd == -1) {
705  auto err = std::string("Failed to open PID file ") + lock_file.c_str() + ". " +
706  strerror(errno) + ".";
707  throw std::runtime_error(err);
708  }
709  if (lockf(pid_fd, F_TLOCK, 0) == -1) {
710  close(pid_fd);
711  auto err = std::string("Another OmniSci Server is using data directory ") +
712  base_path + ".";
713  throw std::runtime_error(err);
714  }
715  if (ftruncate(pid_fd, 0) == -1) {
716  close(pid_fd);
717  auto err = std::string("Failed to truncate PID file ") + lock_file.c_str() + ". " +
718  strerror(errno) + ".";
719  throw std::runtime_error(err);
720  }
721  if (write(pid_fd, pid.c_str(), pid.length()) == -1) {
722  close(pid_fd);
723  auto err = std::string("Failed to write PID file ") + lock_file.c_str() + ". " +
724  strerror(errno) + ".";
725  throw std::runtime_error(err);
726  }
727  }
728  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
729  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
730  throw std::runtime_error("File containing DB queries " + db_query_file +
731  " does not exist.");
732  }
733  const auto db_file =
734  boost::filesystem::path(base_path) / "mapd_catalogs" / OMNISCI_SYSTEM_CATALOG;
735  if (!boost::filesystem::exists(db_file)) {
736  { // check old system catalog existsense
737  const auto db_file = boost::filesystem::path(base_path) / "mapd_catalogs/mapd";
738  if (!boost::filesystem::exists(db_file)) {
739  throw std::runtime_error("OmniSci system catalog " + OMNISCI_SYSTEM_CATALOG +
740  " does not exist.");
741  }
742  }
743  }
744  if (license_path.length() == 0) {
745  license_path = base_path + "/omnisci.license";
746  }
747 
748  // add all parameters to be displayed on startup
749  LOG(INFO) << "OmniSci started with data directory at '" << base_path << "'";
750  if (vm.count("license-path")) {
751  LOG(INFO) << "License key path set to '" << license_path << "'";
752  }
753  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
754  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
756  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
757  }
758  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
760  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
761  << pending_query_interrupt_freq << " (in ms.)";
762  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
763  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
764  }
765 
766  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
767 
768  LOG(INFO) << " Maximum Idle session duration " << idle_session_duration;
769 
770  LOG(INFO) << " Maximum active session duration " << max_session_duration;
771 
773 
777 
778  if (disk_cache_level == "fsi") {
779  if (g_enable_fsi) {
781  LOG(INFO) << "Disk cache enabled for foreign tables only";
782  } else {
783  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
784  "disk cache disabled";
785  }
786  } else if (disk_cache_level == "all") {
788  LOG(INFO) << "Disk cache enabled for all tables";
789  } else if (disk_cache_level == "non_fsi") {
791  LOG(INFO) << "Disk cache enabled for non-FSI tables";
792  } else if (disk_cache_level == "none") {
794  LOG(INFO) << "Disk cache disabled";
795  } else {
797  LOG(INFO) << "Non-recognized value for disk-cache-level {" << disk_cache_level
798  << "}. Defaulted to disk cache disabled";
799  }
800 
801  if (disk_cache_config.path.empty()) {
802  disk_cache_config.path = base_path + "/omnisci_disk_cache";
803  }
805 
808 
809  // If passed in, blacklist all security config files
818 }
819 
821  int argc,
822  char const* const* argv,
823  const bool should_init_logging) {
824  po::options_description all_desc("All options");
825  all_desc.add(help_desc).add(developer_desc);
826 
827  try {
828  po::store(po::command_line_parser(argc, argv)
829  .options(all_desc)
830  .positional(positional_options)
831  .run(),
832  vm);
833  po::notify(vm);
834 
835  if (vm.count("help")) {
836  std::cerr << "Usage: omnisci_server <data directory path> [-p <port number>] "
837  "[--http-port <http port number>] [--flush-log] [--version|-v]"
838  << std::endl
839  << std::endl;
840  std::cout << help_desc << std::endl;
841  return 0;
842  }
843  if (vm.count("dev-options")) {
844  std::cout << "Usage: omnisci_server <data directory path> [-p <port number>] "
845  "[--http-port <http port number>] [--flush-log] [--version|-v]"
846  << std::endl
847  << std::endl;
848  std::cout << developer_desc << std::endl;
849  return 0;
850  }
851  if (vm.count("version")) {
852  std::cout << "OmniSci Version: " << MAPD_RELEASE << std::endl;
853  return 0;
854  }
855 
856  if (vm.count("config")) {
857  std::ifstream settings_file(system_parameters.config_file);
858 
859  auto sanitized_settings = sanitize_config_file(settings_file);
860 
861  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
862  po::notify(vm);
863  settings_file.close();
864  }
865 
866  if (should_init_logging) {
867  init_logging();
868  }
869 
871  return 1;
872  }
873  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
874  return 1;
875  }
877  return 1;
878  }
880  return 1;
881  }
883  return 1;
884  }
886  return 1;
887  }
888 
896  } catch (po::error& e) {
897  std::cerr << "Usage Error: " << e.what() << std::endl;
898  return 1;
899  }
900 
901  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
902  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
903  return 1;
904  }
905 
907  LOG(INFO) << " From clause table reordering is disabled";
908  }
909 
911  LOG(INFO) << " Filter push down for JOIN is enabled";
912  }
913 
914  if (vm.count("udf")) {
915  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
916 
917  if (!boost::filesystem::exists(udf_file_name)) {
918  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
919  return 1;
920  }
921 
922  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
923  }
924 
925  if (vm.count("udf-compiler-path")) {
926  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
927  }
928 
929  auto trim_string = [](std::string& s) {
930  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
931  };
932 
933  if (vm.count("udf-compiler-options")) {
934  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
935  }
936 
937  if (enable_runtime_udf) {
938  LOG(INFO) << " Runtime user defined extension functions enabled globally.";
939  }
940 
941  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
942  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
943  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
944  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
945  boost::is_any_of("\"'"));
946 
947  if (!system_parameters.ha_group_id.empty()) {
948  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
950  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
951  return 5;
952  } else {
953  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
954  }
955  if (system_parameters.ha_brokers.empty()) {
956  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
957  return 6;
958  } else {
959  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
960  }
961  if (system_parameters.ha_shared_data.empty()) {
962  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
963  return 7;
964  } else {
965  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
966  }
967  }
968  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
969  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
970  LOG(INFO) << " Min CPU buffer pool slab size " << system_parameters.min_cpu_slab_size;
971  LOG(INFO) << " Max CPU buffer pool slab size " << system_parameters.max_cpu_slab_size;
972  LOG(INFO) << " Min GPU buffer pool slab size " << system_parameters.min_gpu_slab_size;
973  LOG(INFO) << " Max GPU buffer pool slab size " << system_parameters.max_gpu_slab_size;
974  LOG(INFO) << " calcite JVM max memory " << system_parameters.calcite_max_mem;
975  LOG(INFO) << " OmniSci Server Port " << system_parameters.omnisci_server_port;
976  LOG(INFO) << " OmniSci Calcite Port " << system_parameters.calcite_port;
977  LOG(INFO) << " Enable Calcite view optimize "
979 
980  LOG(INFO) << " Allow Local Auth Fallback: "
981  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
982 
983  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
984  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
985  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
986  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
987  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
988 
989  return boost::none;
990 }
std::string distinguishedName
Definition: AuthMetadata.h:25
DiskCacheLevel enabled_level
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
unsigned connect_timeout
std::string filename(char const *path)
Definition: Logger.cpp:62
double g_running_query_interrupt_freq
Definition: Execute.cpp:110
bool g_enable_smem_group_by
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
float g_filter_push_down_low_frac
Definition: Execute.cpp:87
std::string ldapQueryUrl
Definition: AuthMetadata.h:26
bool g_enable_watchdog
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
logger::LogOptions log_options_
bool g_strip_join_covered_quals
Definition: Execute.cpp:94
tuple line
Definition: parse_ast.py:10
bool g_enable_direct_columnarization
Definition: Execute.cpp:105
std::string ha_shared_data
std::string udf_compiler_path
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:109
po::options_description help_desc
#define LOG(tag)
Definition: Logger.h:188
bool enable_calcite_view_optimize
bool g_enable_union
DiskCacheConfig disk_cache_config
bool g_enable_debug_timer
Definition: Logger.cpp:17
std::string ldapRoleRegex
Definition: AuthMetadata.h:27
static void initializeFromConfigFile(const std::string &server_config_path)
Definition: DdlUtils.cpp:663
const std::string OMNISCI_SYSTEM_CATALOG
Definition: SysCatalog.h:55
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:89
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:74
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:80
int g_hll_precision_bits
std::string config_file
std::string to_string(char const *&&v)
std::string encryption_key_store_path
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:91
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:82
boost::program_options::options_description const & get_options() const
Definition: Logger.cpp:79
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:119
bool g_null_div_by_zero
Definition: Execute.cpp:79
bool g_enable_interop
std::string ha_brokers
std::string ssl_trust_ca_file
bool g_enable_columnar_output
Definition: Execute.cpp:90
void close(const int fd)
Definition: omnisci_fs.cpp:68
std::string ssl_trust_store
int64_t g_bitmap_memory_limit
bool g_from_table_reordering
Definition: Execute.cpp:81
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:92
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
float g_filter_push_down_high_frac
Definition: Execute.cpp:88
bool g_enable_thrift_logs
Definition: initdb.cpp:42
std::string uri
Definition: AuthMetadata.h:24
bool g_bigint_count
size_t g_max_memory_allocation_size
Definition: Execute.cpp:99
std::string ha_unique_server_id
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:93
std::string ca_file_name
Definition: AuthMetadata.h:31
std::string ssl_key_file
AuthMetadata authMetadata
bool g_enable_calcite_ddl_parser
Definition: ParserNode.cpp:72
bool g_enable_window_functions
Definition: Execute.cpp:97
size_t g_min_memory_allocation_size
Definition: Execute.cpp:100
bool with_keepalive
std::string ldapSuperUserRole
Definition: AuthMetadata.h:28
unsigned pending_query_interrupt_freq
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:116
bool g_enable_experimental_string_functions
std::vector< std::string > udf_compiler_options
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:76
bool g_cache_string_hash
float g_fraction_code_cache_to_evict
Severity severity_
Definition: Logger.h:118
std::string ssl_keystore_password
std::string ssl_trust_password
bool g_enable_filter_push_down
Definition: Execute.cpp:86
bool g_use_estimator_result_cache
Definition: Execute.cpp:108
bool g_enable_bump_allocator
Definition: Execute.cpp:103
bool allowLocalAuthFallback
Definition: AuthMetadata.h:32
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:126
po::positional_options_description positional_options
void set_base_path(std::string const &base_path)
Definition: Logger.cpp:93
std::string ssl_keystore
bool g_allow_cpu_retry
Definition: Execute.cpp:78
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:726
po::options_description developer_desc
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:43
std::string disk_cache_level
static bool run
po::variables_map vm
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:77
bool g_enable_fsi
Definition: Catalog.cpp:91
bool g_use_tbb_pool
Definition: Execute.cpp:75
std::string ha_group_id
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:107
size_t g_large_ndv_multiplier
bool g_enable_table_functions
Definition: Execute.cpp:98
std::string cluster_command_line_arg
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
size_t g_gpu_smem_threshold
Definition: Execute.cpp:111
std::string ssl_cert_file