OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <filesystem>
22 #include <iostream>
23 
24 #include "CommandLineOptions.h"
25 #include "LeafHostInfo.h"
26 #include "MapDRelease.h"
28 #include "Shared/Compressor.h"
30 #include "Utils/DdlUtils.h"
31 
32 #ifdef _WIN32
33 #include <io.h>
34 #endif
35 
36 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
37 
38 extern std::string cluster_command_line_arg;
39 
41 
42 extern bool g_use_table_device_offset;
44 extern bool g_cache_string_hash;
47 extern int64_t g_large_ndv_threshold;
48 extern size_t g_large_ndv_multiplier;
49 extern int64_t g_bitmap_memory_limit;
50 extern bool g_enable_calcite_ddl_parser;
51 extern bool g_enable_seconds_refresh;
52 extern size_t g_approx_quantile_buffer;
53 extern size_t g_approx_quantile_centroids;
54 extern size_t g_parallel_top_min;
55 extern size_t g_parallel_top_max;
57 extern bool g_enable_system_tables;
58 #ifdef ENABLE_MEMKIND
59 extern std::string g_pmem_path;
60 #endif
61 
62 namespace Catalog_Namespace {
63 extern bool g_log_user_id;
64 }
65 
66 unsigned connect_timeout{20000};
67 unsigned recv_timeout{300000};
68 unsigned send_timeout{300000};
69 bool with_keepalive{false};
70 
74  }
78 }
79 
81  help_desc.add_options()("help,h", "Show available options.");
82  help_desc.add_options()(
83  "allow-cpu-retry",
84  po::value<bool>(&g_allow_cpu_retry)
85  ->default_value(g_allow_cpu_retry)
86  ->implicit_value(true),
87  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
88  help_desc.add_options()("allow-loop-joins",
89  po::value<bool>(&allow_loop_joins)
90  ->default_value(allow_loop_joins)
91  ->implicit_value(true),
92  "Enable loop joins.");
93  help_desc.add_options()("bigint-count",
94  po::value<bool>(&g_bigint_count)
95  ->default_value(g_bigint_count)
96  ->implicit_value(true),
97  "Use 64-bit count.");
98  help_desc.add_options()("calcite-max-mem",
99  po::value<size_t>(&system_parameters.calcite_max_mem)
100  ->default_value(system_parameters.calcite_max_mem),
101  "Max memory available to calcite JVM.");
102  if (!dist_v5_) {
103  help_desc.add_options()("calcite-port",
104  po::value<int>(&system_parameters.calcite_port)
105  ->default_value(system_parameters.calcite_port),
106  "Calcite port number.");
107  }
108  help_desc.add_options()("config",
109  po::value<std::string>(&system_parameters.config_file),
110  "Path to server configuration file.");
111  help_desc.add_options()("cpu-buffer-mem-bytes",
112  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
113  ->default_value(system_parameters.cpu_buffer_mem_bytes),
114  "Size of memory reserved for CPU buffers, in bytes.");
115 
116  help_desc.add_options()("cpu-only",
117  po::value<bool>(&system_parameters.cpu_only)
118  ->default_value(system_parameters.cpu_only)
119  ->implicit_value(true),
120  "Run on CPU only, even if GPUs are available.");
121  help_desc.add_options()("cuda-block-size",
122  po::value<size_t>(&system_parameters.cuda_block_size)
123  ->default_value(system_parameters.cuda_block_size),
124  "Size of block to use on GPU.");
125  help_desc.add_options()("cuda-grid-size",
126  po::value<size_t>(&system_parameters.cuda_grid_size)
127  ->default_value(system_parameters.cuda_grid_size),
128  "Size of grid to use on GPU.");
129  if (!dist_v5_) {
130  help_desc.add_options()(
131  "data",
132  po::value<std::string>(&base_path)->required()->default_value("data"),
133  "Directory path to OmniSci data storage (catalogs, raw data, log files, etc).");
134  positional_options.add("data", 1);
135  }
136  help_desc.add_options()("db-query-list",
137  po::value<std::string>(&db_query_file),
138  "Path to file containing OmniSci warmup queries.");
139  help_desc.add_options()(
140  "exit-after-warmup",
141  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
142  "Exit after OmniSci warmup queries.");
143  help_desc.add_options()("dynamic-watchdog-time-limit",
144  po::value<unsigned>(&dynamic_watchdog_time_limit)
145  ->default_value(dynamic_watchdog_time_limit)
146  ->implicit_value(10000),
147  "Dynamic watchdog time limit, in milliseconds.");
148  help_desc.add_options()("enable-data-recycler",
149  po::value<bool>(&enable_data_recycler)
150  ->default_value(enable_data_recycler)
151  ->implicit_value(true),
152  "Use data recycler.");
153  help_desc.add_options()("use-hashtable-cache",
154  po::value<bool>(&use_hashtable_cache)
155  ->default_value(use_hashtable_cache)
156  ->implicit_value(true),
157  "Use hashtable cache.");
158  help_desc.add_options()(
159  "hashtable-cache-total-bytes",
160  po::value<size_t>(&hashtable_cache_total_bytes)
161  ->default_value(hashtable_cache_total_bytes)
162  ->implicit_value(4294967296),
163  "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
164  help_desc.add_options()("max-cacheable-hashtable-size-bytes",
165  po::value<size_t>(&max_cacheable_hashtable_size_bytes)
166  ->default_value(max_cacheable_hashtable_size_bytes)
167  ->implicit_value(2147483648),
168  "The maximum size of hashtable that is available to cache, in "
169  "bytes (default: 2GB).");
170  help_desc.add_options()("enable-debug-timer",
171  po::value<bool>(&g_enable_debug_timer)
172  ->default_value(g_enable_debug_timer)
173  ->implicit_value(true),
174  "Enable debug timer logging.");
175  help_desc.add_options()("enable-dynamic-watchdog",
176  po::value<bool>(&enable_dynamic_watchdog)
177  ->default_value(enable_dynamic_watchdog)
178  ->implicit_value(true),
179  "Enable dynamic watchdog.");
180  help_desc.add_options()("enable-filter-push-down",
181  po::value<bool>(&g_enable_filter_push_down)
182  ->default_value(g_enable_filter_push_down)
183  ->implicit_value(true),
184  "Enable filter push down through joins.");
185  help_desc.add_options()("enable-overlaps-hashjoin",
186  po::value<bool>(&g_enable_overlaps_hashjoin)
187  ->default_value(g_enable_overlaps_hashjoin)
188  ->implicit_value(true),
189  "Enable the overlaps hash join framework allowing for range "
190  "join (e.g. spatial overlaps) computation using a hash table.");
191  help_desc.add_options()("enable-hashjoin-many-to-many",
192  po::value<bool>(&g_enable_hashjoin_many_to_many)
193  ->default_value(g_enable_hashjoin_many_to_many)
194  ->implicit_value(true),
195  "Enable the overlaps hash join framework allowing for range "
196  "join (e.g. spatial overlaps) computation using a hash table.");
197  help_desc.add_options()("enable-distance-rangejoin",
198  po::value<bool>(&g_enable_distance_rangejoin)
199  ->default_value(g_enable_distance_rangejoin)
200  ->implicit_value(true),
201  "Enable accelerating point distance joins with a hash table. "
202  "This rewrites ST_Distance when using an upperbound (<= X).");
203  help_desc.add_options()("enable-runtime-query-interrupt",
204  po::value<bool>(&enable_runtime_query_interrupt)
205  ->default_value(enable_runtime_query_interrupt)
206  ->implicit_value(true),
207  "Enable runtime query interrupt.");
208  help_desc.add_options()("enable-non-kernel-time-query-interrupt",
211  ->implicit_value(true),
212  "Enable non-kernel time query interrupt.");
213  help_desc.add_options()("pending-query-interrupt-freq",
214  po::value<unsigned>(&pending_query_interrupt_freq)
215  ->default_value(pending_query_interrupt_freq)
216  ->implicit_value(1000),
217  "A frequency of checking the request of pending query "
218  "interrupt from user (in millisecond).");
219  help_desc.add_options()(
220  "running-query-interrupt-freq",
221  po::value<double>(&running_query_interrupt_freq)
222  ->default_value(running_query_interrupt_freq)
223  ->implicit_value(0.5),
224  "A frequency of checking the request of running query "
225  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
226  help_desc.add_options()("use-estimator-result-cache",
227  po::value<bool>(&use_estimator_result_cache)
228  ->default_value(use_estimator_result_cache)
229  ->implicit_value(true),
230  "Use estimator result cache.");
231  if (!dist_v5_) {
232  help_desc.add_options()(
233  "enable-string-dict-hash-cache",
234  po::value<bool>(&g_cache_string_hash)
235  ->default_value(g_cache_string_hash)
236  ->implicit_value(true),
237  "Cache string hash values in the string dictionary server during import.");
238  }
239  help_desc.add_options()(
240  "enable-thrift-logs",
241  po::value<bool>(&g_enable_thrift_logs)
242  ->default_value(g_enable_thrift_logs)
243  ->implicit_value(true),
244  "Enable writing messages directly from thrift to stdout/stderr.");
245  help_desc.add_options()("enable-watchdog",
246  po::value<bool>(&enable_watchdog)
247  ->default_value(enable_watchdog)
248  ->implicit_value(true),
249  "Enable watchdog.");
250  help_desc.add_options()(
251  "filter-push-down-low-frac",
252  po::value<float>(&g_filter_push_down_low_frac)
253  ->default_value(g_filter_push_down_low_frac)
254  ->implicit_value(g_filter_push_down_low_frac),
255  "Lower threshold for selectivity of filters that are pushed down.");
256  help_desc.add_options()(
257  "filter-push-down-high-frac",
258  po::value<float>(&g_filter_push_down_high_frac)
259  ->default_value(g_filter_push_down_high_frac)
260  ->implicit_value(g_filter_push_down_high_frac),
261  "Higher threshold for selectivity of filters that are pushed down.");
262  help_desc.add_options()("filter-push-down-passing-row-ubound",
263  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
265  ->implicit_value(g_filter_push_down_passing_row_ubound),
266  "Upperbound on the number of rows that should pass the filter "
267  "if the selectivity is less than "
268  "the high fraction threshold.");
269  help_desc.add_options()("from-table-reordering",
270  po::value<bool>(&g_from_table_reordering)
271  ->default_value(g_from_table_reordering)
272  ->implicit_value(true),
273  "Enable automatic table reordering in FROM clause.");
274  help_desc.add_options()("gpu-buffer-mem-bytes",
275  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
276  ->default_value(system_parameters.gpu_buffer_mem_bytes),
277  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
278  help_desc.add_options()("gpu-input-mem-limit",
279  po::value<double>(&system_parameters.gpu_input_mem_limit)
280  ->default_value(system_parameters.gpu_input_mem_limit),
281  "Force query to CPU when input data memory usage exceeds this "
282  "percentage of available GPU memory.");
283  help_desc.add_options()(
284  "hll-precision-bits",
285  po::value<int>(&g_hll_precision_bits)
286  ->default_value(g_hll_precision_bits)
287  ->implicit_value(g_hll_precision_bits),
288  "Number of bits used from the hash value used to specify the bucket number.");
289  if (!dist_v5_) {
290  help_desc.add_options()("http-port",
291  po::value<int>(&http_port)->default_value(http_port),
292  "HTTP port number.");
293  }
294  help_desc.add_options()(
295  "idle-session-duration",
296  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
297  "Maximum duration of idle session.");
298  help_desc.add_options()("inner-join-fragment-skipping",
299  po::value<bool>(&g_inner_join_fragment_skipping)
300  ->default_value(g_inner_join_fragment_skipping)
301  ->implicit_value(true),
302  "Enable/disable inner join fragment skipping. This feature is "
303  "considered stable and is enabled by default. This "
304  "parameter will be removed in a future release.");
305  help_desc.add_options()(
306  "max-session-duration",
307  po::value<int>(&max_session_duration)->default_value(max_session_duration),
308  "Maximum duration of active session.");
309  help_desc.add_options()("num-sessions",
310  po::value<int>(&system_parameters.num_sessions)
311  ->default_value(system_parameters.num_sessions),
312  "Maximum number of active session.");
313  help_desc.add_options()(
314  "null-div-by-zero",
315  po::value<bool>(&g_null_div_by_zero)
316  ->default_value(g_null_div_by_zero)
317  ->implicit_value(true),
318  "Return null on division by zero instead of throwing an exception.");
319  help_desc.add_options()(
320  "num-reader-threads",
321  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
322  "Number of reader threads to use.");
323  help_desc.add_options()(
324  "max-import-threads",
325  po::value<size_t>(&g_max_import_threads)->default_value(g_max_import_threads),
326  "Max number of default import threads to use (num hardware threads will be used "
327  "instead if lower). Can be overriden with copy statement threads option).");
328  help_desc.add_options()(
329  "overlaps-max-table-size-bytes",
330  po::value<size_t>(&g_overlaps_max_table_size_bytes)
331  ->default_value(g_overlaps_max_table_size_bytes),
332  "The maximum size in bytes of the hash table for an overlaps hash join.");
333  help_desc.add_options()("overlaps-target-entries-per-bin",
334  po::value<double>(&g_overlaps_target_entries_per_bin)
335  ->default_value(g_overlaps_target_entries_per_bin),
336  "The target number of hash entries per bin for overlaps join");
337  if (!dist_v5_) {
338  help_desc.add_options()("port,p",
339  po::value<int>(&system_parameters.omnisci_server_port)
340  ->default_value(system_parameters.omnisci_server_port),
341  "TCP Port number.");
342  }
343  help_desc.add_options()("num-gpus",
344  po::value<int>(&system_parameters.num_gpus)
345  ->default_value(system_parameters.num_gpus),
346  "Number of gpus to use.");
347  help_desc.add_options()(
348  "read-only",
349  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
350  "Enable read-only mode.");
351 
352  help_desc.add_options()(
353  "res-gpu-mem",
354  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
355  "Reduces GPU memory available to the OmniSci allocator by this amount. Used for "
356  "compiled code cache and ancillary GPU functions and other processes that may also "
357  "be using the GPU concurrent with OmniSciDB.");
358 
359  help_desc.add_options()("start-gpu",
360  po::value<int>(&system_parameters.start_gpu)
361  ->default_value(system_parameters.start_gpu),
362  "First gpu to use.");
363  help_desc.add_options()("trivial-loop-join-threshold",
364  po::value<unsigned>(&g_trivial_loop_join_threshold)
365  ->default_value(g_trivial_loop_join_threshold)
366  ->implicit_value(1000),
367  "The maximum number of rows in the inner table of a loop join "
368  "considered to be trivially small.");
369  help_desc.add_options()("verbose",
370  po::value<bool>(&verbose_logging)
371  ->default_value(verbose_logging)
372  ->implicit_value(true),
373  "Write additional debug log messages to server logs.");
374  help_desc.add_options()(
375  "enable-runtime-udf",
376  po::value<bool>(&enable_runtime_udf)
377  ->default_value(enable_runtime_udf)
378  ->implicit_value(true),
379  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
380  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
381  "Compiler server, packaged separately.");
382  help_desc.add_options()("version,v", "Print Version Number.");
383  help_desc.add_options()("enable-experimental-string-functions",
386  ->implicit_value(true),
387  "Enable experimental string functions.");
388  help_desc.add_options()(
389  "enable-fsi",
390  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
391  "Enable foreign storage interface.");
392 
393 #ifdef ENABLE_IMPORT_PARQUET
394  help_desc.add_options()("enable-parquet-import-fsi",
395  po::value<bool>(&g_enable_parquet_import_fsi)
396  ->default_value(g_enable_parquet_import_fsi)
397  ->implicit_value(true),
398  "Enable foreign storage interface based parquet import.");
399 #endif
400 
401  help_desc.add_options()("disk-cache-path",
402  po::value<std::string>(&disk_cache_config.path),
403  "Specify the path for the disk cache.");
404 
405  help_desc.add_options()(
406  "disk-cache-level",
407  po::value<std::string>(&(disk_cache_level))->default_value("foreign_tables"),
408  "Specify level of disk cache. Valid options are 'foreign_tables', "
409  "'local_tables', 'none', and 'all'.");
410 
411  help_desc.add_options()("disk-cache-size",
412  po::value<size_t>(&(disk_cache_config.size_limit)),
413  "Specify a maximum size for the disk cache in bytes.");
414 
415 #ifdef HAVE_AWS_S3
416  help_desc.add_options()(
417  "allow-s3-server-privileges",
418  po::value<bool>(&g_allow_s3_server_privileges)
419  ->default_value(g_allow_s3_server_privileges)
420  ->implicit_value(true),
421  "Allow S3 server privileges, if IAM user credentials are not provided. Credentials "
422  "may be specified with "
423  "environment variables (such as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, etc), "
424  "an AWS credentials file, or when running on an EC2 instance, with an IAM role "
425  "that is attached to the instance.");
426 #endif // defined(HAVE_AWS_S3)
427  help_desc.add_options()(
428  "enable-interoperability",
429  po::value<bool>(&g_enable_interop)
430  ->default_value(g_enable_interop)
431  ->implicit_value(true),
432  "Enable offloading of query portions to an external execution engine.");
433  help_desc.add_options()("enable-union",
434  po::value<bool>(&g_enable_union)
435  ->default_value(g_enable_union)
436  ->implicit_value(true),
437  "Enable UNION ALL SQL clause.");
438  help_desc.add_options()(
439  "calcite-service-timeout",
440  po::value<size_t>(&system_parameters.calcite_timeout)
441  ->default_value(system_parameters.calcite_timeout),
442  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
443  "schema changes or when running large numbers of parallel queries.");
444  help_desc.add_options()("calcite-service-keepalive",
445  po::value<size_t>(&system_parameters.calcite_keepalive)
446  ->default_value(system_parameters.calcite_keepalive)
447  ->implicit_value(true),
448  "Enable keepalive on Calcite connections.");
449  help_desc.add_options()(
450  "stringdict-parallelizm",
451  po::value<bool>(&g_enable_stringdict_parallel)
452  ->default_value(g_enable_stringdict_parallel)
453  ->implicit_value(true),
454  "Allow StringDictionary to parallelize loads using multiple threads");
455  help_desc.add_options()(
456  "log-user-id",
457  po::value<bool>(&Catalog_Namespace::g_log_user_id)
458  ->default_value(Catalog_Namespace::g_log_user_id)
459  ->implicit_value(true),
460  "Log userId integer in place of the userName (when available).");
461  help_desc.add_options()("log-user-origin",
462  po::value<bool>(&log_user_origin)
463  ->default_value(log_user_origin)
464  ->implicit_value(true),
465  "Lookup the origin of inbound connections by IP address/DNS "
466  "name, and print this information as part of stdlog.");
467  help_desc.add_options()(
468  "allowed-import-paths",
469  po::value<std::string>(&allowed_import_paths),
470  "List of allowed root paths that can be used in import operations.");
471  help_desc.add_options()(
472  "allowed-export-paths",
473  po::value<std::string>(&allowed_export_paths),
474  "List of allowed root paths that can be used in export operations.");
475  help_desc.add_options()("enable-system-tables",
476  po::value<bool>(&g_enable_system_tables)
477  ->default_value(g_enable_system_tables)
478  ->implicit_value(true),
479  "Enable use of system tables.");
480 #ifdef ENABLE_MEMKIND
481  help_desc.add_options()("enable-tiered-cpu-mem",
482  po::value<bool>(&g_enable_tiered_cpu_mem)
483  ->default_value(g_enable_tiered_cpu_mem)
484  ->implicit_value(true),
485  "Enable additional tiers of CPU memory (PMEM, etc...)");
486  help_desc.add_options()("pmem-size", po::value<size_t>(&g_pmem_size)->default_value(0));
487  help_desc.add_options()("pmem-path", po::value<std::string>(&g_pmem_path));
488 #endif
489 
491 }
492 
494  developer_desc.add_options()("dev-options", "Print internal developer options.");
495  developer_desc.add_options()(
496  "enable-calcite-view-optimize",
499  ->implicit_value(true),
500  "Enable additional calcite (query plan) optimizations when a view is part of the "
501  "query.");
502  developer_desc.add_options()(
503  "enable-columnar-output",
504  po::value<bool>(&g_enable_columnar_output)
505  ->default_value(g_enable_columnar_output)
506  ->implicit_value(true),
507  "Enable columnar output for intermediate/final query steps.");
508  developer_desc.add_options()(
509  "enable-left-join-filter-hoisting",
510  po::value<bool>(&g_enable_left_join_filter_hoisting)
511  ->default_value(g_enable_left_join_filter_hoisting)
512  ->implicit_value(true),
513  "Enable hoisting left hand side filters through left joins.");
514  developer_desc.add_options()("optimize-row-init",
515  po::value<bool>(&g_optimize_row_initialization)
516  ->default_value(g_optimize_row_initialization)
517  ->implicit_value(true),
518  "Optimize row initialization.");
519  developer_desc.add_options()("enable-legacy-syntax",
520  po::value<bool>(&enable_legacy_syntax)
521  ->default_value(enable_legacy_syntax)
522  ->implicit_value(true),
523  "Enable legacy syntax.");
524  developer_desc.add_options()(
525  "enable-multifrag",
526  po::value<bool>(&allow_multifrag)
527  ->default_value(allow_multifrag)
528  ->implicit_value(true),
529  "Enable execution over multiple fragments in a single round-trip to GPU.");
530  developer_desc.add_options()("enable-lazy-fetch",
531  po::value<bool>(&g_enable_lazy_fetch)
532  ->default_value(g_enable_lazy_fetch)
533  ->implicit_value(true),
534  "Enable lazy fetch columns in query results.");
535  developer_desc.add_options()(
536  "enable-shared-mem-group-by",
537  po::value<bool>(&g_enable_smem_group_by)
538  ->default_value(g_enable_smem_group_by)
539  ->implicit_value(true),
540  "Enable using GPU shared memory for some GROUP BY queries.");
541  developer_desc.add_options()("num-executors",
542  po::value<int>(&system_parameters.num_executors)
543  ->default_value(system_parameters.num_executors),
544  "Number of executors to run in parallel.");
545  developer_desc.add_options()(
546  "gpu-shared-mem-threshold",
547  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
548  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
549  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
550  developer_desc.add_options()(
551  "enable-shared-mem-grouped-non-count-agg",
552  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
553  ->default_value(g_enable_smem_grouped_non_count_agg)
554  ->implicit_value(true),
555  "Enable using GPU shared memory for grouped non-count aggregate queries.");
556  developer_desc.add_options()(
557  "enable-shared-mem-non-grouped-agg",
558  po::value<bool>(&g_enable_smem_non_grouped_agg)
559  ->default_value(g_enable_smem_non_grouped_agg)
560  ->implicit_value(true),
561  "Enable using GPU shared memory for non-grouped aggregate queries.");
562  developer_desc.add_options()("enable-direct-columnarization",
563  po::value<bool>(&g_enable_direct_columnarization)
564  ->default_value(g_enable_direct_columnarization)
565  ->implicit_value(true),
566  "Enables/disables a more optimized columnarization method "
567  "for intermediate steps in multi-step queries.");
568  developer_desc.add_options()(
569  "offset-device-by-table-id",
570  po::value<bool>(&g_use_table_device_offset)
571  ->default_value(g_use_table_device_offset)
572  ->implicit_value(true),
573  "Enables/disables offseting the chosen device ID by the table ID for a given "
574  "fragment. This improves balance of fragments across GPUs.");
575  developer_desc.add_options()("enable-window-functions",
576  po::value<bool>(&g_enable_window_functions)
577  ->default_value(g_enable_window_functions)
578  ->implicit_value(true),
579  "Enable experimental window function support.");
580  developer_desc.add_options()("enable-table-functions",
581  po::value<bool>(&g_enable_table_functions)
582  ->default_value(g_enable_table_functions)
583  ->implicit_value(true),
584  "Enable experimental table functions support.");
585  developer_desc.add_options()(
586  "jit-debug-ir",
587  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
588  "Enable runtime debugger support for the JIT. Note that this flag is "
589  "incompatible "
590  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
591  "`/tmp/mapdquery`.");
592  developer_desc.add_options()(
593  "intel-jit-profile",
594  po::value<bool>(&intel_jit_profile)
595  ->default_value(intel_jit_profile)
596  ->implicit_value(true),
597  "Enable runtime support for the JIT code profiling using Intel VTune.");
598  developer_desc.add_options()(
599  "enable-cpu-sub-tasks",
600  po::value<bool>(&g_enable_cpu_sub_tasks)
601  ->default_value(g_enable_cpu_sub_tasks)
602  ->implicit_value(true),
603  "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
604  "load balance and decrease reduction overhead.");
605  developer_desc.add_options()(
606  "cpu-sub-task-size",
607  po::value<size_t>(&g_cpu_sub_task_size)->default_value(g_cpu_sub_task_size),
608  "Set CPU sub-task size in rows.");
609  developer_desc.add_options()(
610  "skip-intermediate-count",
611  po::value<bool>(&g_skip_intermediate_count)
612  ->default_value(g_skip_intermediate_count)
613  ->implicit_value(true),
614  "Skip pre-flight counts for intermediate projections with no filters.");
615  developer_desc.add_options()(
616  "strip-join-covered-quals",
617  po::value<bool>(&g_strip_join_covered_quals)
618  ->default_value(g_strip_join_covered_quals)
619  ->implicit_value(true),
620  "Remove quals from the filtered count if they are covered by a "
621  "join condition (currently only ST_Contains).");
622 
623  developer_desc.add_options()(
624  "min-cpu-slab-size",
625  po::value<size_t>(&system_parameters.min_cpu_slab_size)
626  ->default_value(system_parameters.min_cpu_slab_size),
627  "Min slab size (size of memory allocations) for CPU buffer pool.");
628  developer_desc.add_options()(
629  "max-cpu-slab-size",
630  po::value<size_t>(&system_parameters.max_cpu_slab_size)
631  ->default_value(system_parameters.max_cpu_slab_size),
632  "Max CPU buffer pool slab size (size of memory allocations). Note if "
633  "there is not enough free memory to accomodate the target slab size, smaller "
634  "slabs will be allocated, down to the minimum size specified by "
635  "min-cpu-slab-size.");
636  developer_desc.add_options()(
637  "min-gpu-slab-size",
638  po::value<size_t>(&system_parameters.min_gpu_slab_size)
639  ->default_value(system_parameters.min_gpu_slab_size),
640  "Min slab size (size of memory allocations) for GPU buffer pools.");
641  developer_desc.add_options()(
642  "max-gpu-slab-size",
643  po::value<size_t>(&system_parameters.max_gpu_slab_size)
644  ->default_value(system_parameters.max_gpu_slab_size),
645  "Max GPU buffer pool slab size (size of memory allocations). Note if "
646  "there is not enough free memory to accomodate the target slab size, smaller "
647  "slabs will be allocated, down to the minimum size speified by "
648  "min-gpu-slab-size.");
649 
650  developer_desc.add_options()(
651  "max-output-projection-allocation-bytes",
652  po::value<size_t>(&g_max_memory_allocation_size)
653  ->default_value(g_max_memory_allocation_size),
654  "Maximum allocation size for a fixed output buffer allocation for projection "
655  "queries with no pre-flight count. Default is the maximum slab size (sizes "
656  "greater "
657  "than the maximum slab size have no affect). Requires bump allocator.");
658  developer_desc.add_options()(
659  "min-output-projection-allocation-bytes",
660  po::value<size_t>(&g_min_memory_allocation_size)
661  ->default_value(g_min_memory_allocation_size),
662  "Minimum allocation size for a fixed output buffer allocation for projection "
663  "queries with no pre-flight count. If an allocation of this size cannot be "
664  "obtained, the query will be retried with different execution parameters and/or "
665  "on "
666  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
667  developer_desc.add_options()("enable-bump-allocator",
668  po::value<bool>(&g_enable_bump_allocator)
669  ->default_value(g_enable_bump_allocator)
670  ->implicit_value(true),
671  "Enable the bump allocator for projection queries on "
672  "GPU. The bump allocator will "
673  "allocate a fixed size buffer for each query, track the "
674  "number of rows passing the "
675  "kernel during query execution, and copy back only the "
676  "rows that passed the kernel "
677  "to CPU after execution. When disabled, pre-flight "
678  "count queries are used to size "
679  "the output buffer for projection queries.");
680  developer_desc.add_options()(
681  "code-cache-eviction-percent",
682  po::value<float>(&g_fraction_code_cache_to_evict)
683  ->default_value(g_fraction_code_cache_to_evict),
684  "Percentage of the GPU code cache to evict if an out of memory error is "
685  "encountered while attempting to place generated code on the GPU.");
686 
687  developer_desc.add_options()("ssl-cert",
688  po::value<std::string>(&system_parameters.ssl_cert_file)
689  ->default_value(std::string("")),
690  "SSL Validated public certficate.");
691 
692  developer_desc.add_options()("ssl-private-key",
693  po::value<std::string>(&system_parameters.ssl_key_file)
694  ->default_value(std::string("")),
695  "SSL private key file.");
696  // Note ssl_trust_store is passed through to Calcite via system_parameters
697  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
698  developer_desc.add_options()("ssl-trust-store",
699  po::value<std::string>(&system_parameters.ssl_trust_store)
700  ->default_value(std::string("")),
701  "SSL public CA certifcates (java trust store) to validate "
702  "TLS connections (passed through to the Calcite server).");
703 
704  developer_desc.add_options()(
705  "ssl-trust-password",
706  po::value<std::string>(&system_parameters.ssl_trust_password)
707  ->default_value(std::string("")),
708  "SSL password for java trust store provided via --ssl-trust-store parameter.");
709 
710  developer_desc.add_options()(
711  "ssl-trust-ca",
712  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
713  ->default_value(std::string("")),
714  "SSL public CA certificates to validate TLS connection(as a client).");
715 
716  developer_desc.add_options()(
717  "ssl-trust-ca-server",
718  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
719  "SSL public CA certificates to validate TLS connection(as a server).");
720 
721  developer_desc.add_options()("ssl-keystore",
722  po::value<std::string>(&system_parameters.ssl_keystore)
723  ->default_value(std::string("")),
724  "SSL server credentials as a java key store (passed "
725  "through to the Calcite server).");
726 
727  developer_desc.add_options()(
728  "ssl-keystore-password",
729  po::value<std::string>(&system_parameters.ssl_keystore_password)
730  ->default_value(std::string("")),
731  "SSL password for java keystore, provide by via --ssl-keystore.");
732 
733  developer_desc.add_options()(
734  "udf",
735  po::value<std::string>(&udf_file_name),
736  "Load user defined extension functions from this file at startup. The file is "
737  "expected to be a C/C++ file with extension .cpp.");
738 
739  developer_desc.add_options()(
740  "udf-compiler-path",
741  po::value<std::string>(&udf_compiler_path),
742  "Provide absolute path to clang++ used in udf compilation.");
743 
744  developer_desc.add_options()("udf-compiler-options",
745  po::value<std::vector<std::string>>(&udf_compiler_options),
746  "Specify compiler options to tailor udf compilation.");
747 
748 #ifdef ENABLE_GEOS
749  developer_desc.add_options()("libgeos-so-filename",
750  po::value<std::string>(&libgeos_so_filename),
751  "Specify libgeos shared object filename to be used for "
752  "geos-backed geo opertations.");
753 #endif
754  developer_desc.add_options()(
755  "large-ndv-threshold",
756  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
757  developer_desc.add_options()(
758  "large-ndv-multiplier",
759  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
760  developer_desc.add_options()("approx_quantile_buffer",
761  po::value<size_t>(&g_approx_quantile_buffer)
762  ->default_value(g_approx_quantile_buffer));
763  developer_desc.add_options()("approx_quantile_centroids",
764  po::value<size_t>(&g_approx_quantile_centroids)
765  ->default_value(g_approx_quantile_centroids));
766  developer_desc.add_options()(
767  "bitmap-memory-limit",
768  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
769  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
770  "size of the group by buffer (entry count in Query Memory Descriptor) and "
771  "multiplying it by the number of count distinct expression and the size of bitmap "
772  "required for each. For approx_count_distinct this is typically 8192 bytes.");
773  developer_desc.add_options()(
774  "enable-filter-function",
775  po::value<bool>(&g_enable_filter_function)
776  ->default_value(g_enable_filter_function)
777  ->implicit_value(true),
778  "Enable the filter function protection feature for the SQL JIT compiler. "
779  "Normally should be on but techs might want to disable for troubleshooting.");
780  developer_desc.add_options()(
781  "enable-idp-temporary-users",
782  po::value<bool>(&g_enable_idp_temporary_users)
783  ->default_value(g_enable_idp_temporary_users)
784  ->implicit_value(true),
785  "Enable temporary users for SAML and LDAP logins on read-only servers. "
786  "Normally should be on but techs might want to disable for troubleshooting.");
787  developer_desc.add_options()(
788  "enable-calcite-ddl",
789  po::value<bool>(&g_enable_calcite_ddl_parser)
790  ->default_value(g_enable_calcite_ddl_parser)
791  ->implicit_value(true),
792  "Enable using Calcite for supported DDL parsing when available.");
793  developer_desc.add_options()(
794  "enable-seconds-refresh-interval",
795  po::value<bool>(&g_enable_seconds_refresh)
796  ->default_value(g_enable_seconds_refresh)
797  ->implicit_value(true),
798  "Enable foreign table seconds refresh interval for testing purposes.");
799  developer_desc.add_options()("enable-auto-metadata-update",
800  po::value<bool>(&g_enable_auto_metadata_update)
801  ->default_value(g_enable_auto_metadata_update)
802  ->implicit_value(true),
803  "Enable automatic metadata update.");
804  developer_desc.add_options()(
805  "parallel-top-min",
806  po::value<size_t>(&g_parallel_top_min)->default_value(g_parallel_top_min),
807  "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
808  "parallelTop() to sort.");
809  developer_desc.add_options()(
810  "parallel-top-max",
811  po::value<size_t>(&g_parallel_top_max)->default_value(g_parallel_top_max),
812  "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
813  "watchdog.");
814  developer_desc.add_options()("vacuum-min-selectivity",
815  po::value<float>(&g_vacuum_min_selectivity)
816  ->default_value(g_vacuum_min_selectivity),
817  "Minimum selectivity for automatic vacuuming. "
818  "This specifies the percentage (with a value of 0 "
819  "implying 0% and a value of 1 implying 100%) of "
820  "deleted rows in a fragment at which to perform "
821  "automatic vacuuming. A number greater than 1 can "
822  "be used to disable automatic vacuuming.");
823  developer_desc.add_options()("enable-automatic-ir-metadata",
824  po::value<bool>(&g_enable_automatic_ir_metadata)
825  ->default_value(g_enable_automatic_ir_metadata)
826  ->implicit_value(true),
827  "Enable automatic IR metadata (debug builds only).");
828  developer_desc.add_options()(
829  "estimator-failure-max-groupby-size",
830  po::value<size_t>(&g_estimator_failure_max_groupby_size)
831  ->default_value(g_estimator_failure_max_groupby_size),
832  "Maximum size of the groupby buffer if the estimator fails. By default we use the "
833  "number of tuples in the table up to this value.");
834  help_desc.add_options()(
835  "allow-query-step-cpu-retry",
836  po::value<bool>(&g_allow_query_step_cpu_retry)
837  ->default_value(g_allow_query_step_cpu_retry)
838  ->implicit_value(true),
839  R"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
840 }
841 
842 namespace {
843 
844 std::stringstream sanitize_config_file(std::ifstream& in) {
845  // Strip the web section out of the config file so boost can validate program options
846  std::stringstream ss;
847  std::string line;
848  while (std::getline(in, line)) {
849  ss << line << "\n";
850  if (line == "[web]") {
851  break;
852  }
853  }
854  return ss;
855 }
856 
857 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
858  if (!filename.empty()) {
859  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
860  if (!boost::filesystem::exists(filename)) {
861  std::cerr << desc << " " << filename << " does not exist." << std::endl;
862  return false;
863  }
864  }
865  return true;
866 }
867 
869  if (!filename.empty()) {
871  }
872 }
873 
874 } // namespace
875 
877  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
878  if (!boost::filesystem::exists(base_path)) {
879  throw std::runtime_error("OmniSci base directory does not exist at " + base_path);
880  }
881 }
882 
884  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
885  const auto data_path = boost::filesystem::path(base_path) / "mapd_data";
886  if (!boost::filesystem::exists(data_path)) {
887  throw std::runtime_error("OmniSci data directory does not exist at '" + base_path +
888  "'");
889  }
890 
891  {
892  const auto lock_file = boost::filesystem::path(base_path) / "omnisci_server_pid.lck";
893  auto pid = std::to_string(getpid());
894 
895  int pid_fd = omnisci::open(lock_file.string().c_str(), O_RDWR | O_CREAT, 0644);
896  if (pid_fd == -1) {
897  auto err = std::string("Failed to open PID file ") + lock_file.string().c_str() +
898  ". " + strerror(errno) + ".";
899  throw std::runtime_error(err);
900  }
901 // TODO: support lock on Windows
902 #ifndef _WIN32
903  if (lockf(pid_fd, F_TLOCK, 0) == -1) {
904  omnisci::close(pid_fd);
905  auto err = std::string("Another OmniSci Server is using data directory ") +
906  base_path + ".";
907  throw std::runtime_error(err);
908  }
909 #endif
910  if (omnisci::ftruncate(pid_fd, 0) == -1) {
911  omnisci::close(pid_fd);
912  auto err = std::string("Failed to truncate PID file ") +
913  lock_file.string().c_str() + ". " + strerror(errno) + ".";
914  throw std::runtime_error(err);
915  }
916  if (write(pid_fd, pid.c_str(), pid.length()) == -1) {
917  omnisci::close(pid_fd);
918  auto err = std::string("Failed to write PID file ") + lock_file.string().c_str() +
919  ". " + strerror(errno) + ".";
920  throw std::runtime_error(err);
921  }
922  }
923  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
924  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
925  throw std::runtime_error("File containing DB queries " + db_query_file +
926  " does not exist.");
927  }
928  const auto db_file =
929  boost::filesystem::path(base_path) / "mapd_catalogs" / OMNISCI_SYSTEM_CATALOG;
930  if (!boost::filesystem::exists(db_file)) {
931  { // check old system catalog existsense
932  const auto db_file = boost::filesystem::path(base_path) / "mapd_catalogs/mapd";
933  if (!boost::filesystem::exists(db_file)) {
934  throw std::runtime_error("OmniSci system catalog " + OMNISCI_SYSTEM_CATALOG +
935  " does not exist.");
936  }
937  }
938  }
939  if (license_path.length() == 0) {
940  license_path = base_path + "/omnisci.license";
941  }
942 
943  // add all parameters to be displayed on startup
944  LOG(INFO) << "OmniSci started with data directory at '" << base_path << "'";
945  if (vm.count("license-path")) {
946  LOG(INFO) << "License key path set to '" << license_path << "'";
947  }
949  LOG(INFO) << " Server read-only mode is " << read_only;
950 #if DISABLE_CONCURRENCY
951  LOG(INFO) << " Threading layer: serial";
952 #elif ENABLE_TBB
953  LOG(INFO) << " Threading layer: TBB";
954 #else
955  LOG(INFO) << " Threading layer: std";
956 #endif
957  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
958  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
960  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
961  }
962  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
964  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
965  << pending_query_interrupt_freq << " (in ms.)";
966  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
967  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
968  }
969  LOG(INFO) << " Non-kernel time query interrupt is set to "
971 
972  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
973  LOG(INFO) << " LogUserId is set to " << Catalog_Namespace::g_log_user_id;
974  LOG(INFO) << " Maximum idle session duration " << idle_session_duration;
975  LOG(INFO) << " Maximum active session duration " << max_session_duration;
976  LOG(INFO) << " Maximum number of sessions " << system_parameters.num_sessions;
977 
978  LOG(INFO) << "Allowed import paths is set to " << allowed_import_paths;
979  LOG(INFO) << "Allowed export paths is set to " << allowed_export_paths;
982 
984  ddl_utils::FilePathBlacklist::addToBlacklist(base_path + "/temporary/mapd_catalogs");
987  g_enable_s3_fsi = false;
988 
989 #ifdef ENABLE_IMPORT_PARQUET
990  if (g_enable_parquet_import_fsi) {
991  g_enable_fsi = true; // a requirement for FSI parquet import is for FSI to be enabled
992  }
993 #endif
994 
995  if (disk_cache_level == "foreign_tables") {
996  if (g_enable_fsi) {
998  LOG(INFO) << "Disk cache enabled for foreign tables only";
999  } else {
1000  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1001  "disk cache disabled";
1002  }
1003  } else if (disk_cache_level == "all") {
1005  LOG(INFO) << "Disk cache enabled for all tables";
1006  } else if (disk_cache_level == "local_tables") {
1008  LOG(INFO) << "Disk cache enabled for non-FSI tables";
1009  } else if (disk_cache_level == "none") {
1011  LOG(INFO) << "Disk cache disabled";
1012  } else {
1013  throw std::runtime_error{
1014  "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1015  ". Valid options are 'foreign_tables', "
1016  "'local_tables', 'none', and 'all'."};
1017  }
1018 
1020  throw std::runtime_error{"disk-cache-size must be at least " +
1022  }
1023 
1024  if (disk_cache_config.path.empty()) {
1025  disk_cache_config.path = base_path + "/omnisci_disk_cache";
1026  }
1028 
1031 
1032  // If passed in, blacklist all security config files
1041 
1042  if (g_vacuum_min_selectivity < 0) {
1043  throw std::runtime_error{"vacuum-min-selectivity cannot be less than 0."};
1044  }
1045  LOG(INFO) << "Vacuum Min Selectivity: " << g_vacuum_min_selectivity;
1046 
1047  LOG(INFO) << "Enable system tables is set to " << g_enable_system_tables;
1048  if (g_enable_system_tables) {
1049  // System tables currently reuse FSI infrastructure and therefore, require FSI to be
1050  // enabled
1051  if (!g_enable_fsi) {
1052  g_enable_fsi = true;
1053  LOG(INFO) << "FSI has been enabled as a side effect of enabling system tables";
1054  }
1055  }
1056 
1057 #ifdef ENABLE_MEMKIND
1058  if (g_enable_tiered_cpu_mem) {
1059  if (g_pmem_path == "") {
1060  throw std::runtime_error{"pmem-path must be set to use tiered cpu memory"};
1061  }
1062  if (g_pmem_size == 0) {
1063  throw std::runtime_error{"pmem-size must be set to use tiered cpu memory"};
1064  }
1065  if (!std::filesystem::exists(g_pmem_path.c_str())) {
1066  throw std::runtime_error{"path to PMem directory (" + g_pmem_path +
1067  ") does not exist."};
1068  }
1069  }
1070 #endif
1071 }
1072 
1074  int argc,
1075  char const* const* argv,
1076  const bool should_init_logging) {
1077  po::options_description all_desc("All options");
1078  all_desc.add(help_desc).add(developer_desc);
1079 
1080  try {
1081  po::store(po::command_line_parser(argc, argv)
1082  .options(all_desc)
1083  .positional(positional_options)
1084  .run(),
1085  vm);
1086  po::notify(vm);
1087 
1088  if (vm.count("help")) {
1089  std::cerr << "Usage: omnisci_server <data directory path> [-p <port number>] "
1090  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1091  << std::endl
1092  << std::endl;
1093  std::cout << help_desc << std::endl;
1094  return 0;
1095  }
1096  if (vm.count("dev-options")) {
1097  std::cout << "Usage: omnisci_server <data directory path> [-p <port number>] "
1098  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1099  << std::endl
1100  << std::endl;
1101  std::cout << developer_desc << std::endl;
1102  return 0;
1103  }
1104  if (vm.count("version")) {
1105  std::cout << "OmniSci Version: " << MAPD_RELEASE << std::endl;
1106  return 0;
1107  }
1108 
1109  if (vm.count("config")) {
1110  std::ifstream settings_file(system_parameters.config_file);
1111 
1112  auto sanitized_settings = sanitize_config_file(settings_file);
1113 
1114  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
1115  po::notify(vm);
1116  settings_file.close();
1117  }
1118 
1119  if (should_init_logging) {
1120  init_logging();
1121  }
1122 
1124  return 1;
1125  }
1126  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
1127  return 1;
1128  }
1130  "ssl trust store")) {
1131  return 1;
1132  }
1134  return 1;
1135  }
1137  return 1;
1138  }
1140  return 1;
1141  }
1142 
1155 
1156  } catch (po::error& e) {
1157  std::cerr << "Usage Error: " << e.what() << std::endl;
1158  return 1;
1159  }
1160 
1161  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1162  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
1163  return 1;
1164  }
1165 
1166  if (!g_from_table_reordering) {
1167  LOG(INFO) << " From clause table reordering is disabled";
1168  }
1169 
1171  LOG(INFO) << " Filter push down for JOIN is enabled";
1172  }
1173 
1174  if (vm.count("udf")) {
1175  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
1176 
1177  if (!boost::filesystem::exists(udf_file_name)) {
1178  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
1179  return 1;
1180  }
1181 
1182  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
1183  }
1184 
1185  if (vm.count("udf-compiler-path")) {
1186  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
1187  }
1188 
1189  auto trim_string = [](std::string& s) {
1190  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
1191  };
1192 
1193  if (vm.count("udf-compiler-options")) {
1194  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1195  }
1196 
1197  if (enable_runtime_udf) {
1198  LOG(INFO) << " Runtime user defined extension functions enabled globally.";
1199  }
1200 
1201  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
1202  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
1203  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
1204  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1205  boost::is_any_of("\"'"));
1206 
1207  if (!system_parameters.ha_group_id.empty()) {
1208  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
1209  if (system_parameters.ha_unique_server_id.empty()) {
1210  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
1211  return 5;
1212  } else {
1213  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
1214  }
1215  if (system_parameters.ha_brokers.empty()) {
1216  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
1217  return 6;
1218  } else {
1219  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
1220  }
1221  if (system_parameters.ha_shared_data.empty()) {
1222  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
1223  return 7;
1224  } else {
1225  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
1226  }
1227  }
1228 
1229  boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of("\"'"));
1230  if (!system_parameters.master_address.empty()) {
1231  if (!read_only) {
1232  LOG(ERROR) << "The master-address setting is only allowed in read-only mode";
1233  return 9;
1234  }
1235  LOG(INFO) << " Master Address is " << system_parameters.master_address;
1236  LOG(INFO) << " Master Port is " << system_parameters.master_port;
1237  }
1238 
1239  if (g_max_import_threads < 1) {
1240  std::cerr << "max-import-threads must be >= 1 (was set to " << g_max_import_threads
1241  << ")." << std::endl;
1242  return 8;
1243  } else {
1244  LOG(INFO) << " Max import threads " << g_max_import_threads;
1245  }
1246 
1247  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
1248  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
1249  LOG(INFO) << " Min CPU buffer pool slab size " << system_parameters.min_cpu_slab_size;
1250  LOG(INFO) << " Max CPU buffer pool slab size " << system_parameters.max_cpu_slab_size;
1251  LOG(INFO) << " Min GPU buffer pool slab size " << system_parameters.min_gpu_slab_size;
1252  LOG(INFO) << " Max GPU buffer pool slab size " << system_parameters.max_gpu_slab_size;
1253  LOG(INFO) << " calcite JVM max memory " << system_parameters.calcite_max_mem;
1254  LOG(INFO) << " OmniSci Server Port " << system_parameters.omnisci_server_port;
1255  LOG(INFO) << " OmniSci Calcite Port " << system_parameters.calcite_port;
1256  LOG(INFO) << " Enable Calcite view optimize "
1258  LOG(INFO) << " Allow Local Auth Fallback: "
1259  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
1260  LOG(INFO) << " ParallelTop min threshold: " << g_parallel_top_min;
1261  LOG(INFO) << " ParallelTop watchdog max: " << g_parallel_top_max;
1262 
1263  LOG(INFO) << " Enable Data Recycler: "
1264  << (g_enable_data_recycler ? "enabled" : "disabled");
1265  if (g_enable_data_recycler) {
1266  LOG(INFO) << " \t Use hashtable cache: "
1267  << (g_use_hashtable_cache ? "enabled" : "disabled");
1268  if (g_use_hashtable_cache) {
1269  LOG(INFO) << " \t\t Total amount of bytes that hashtable cache keeps: "
1270  << g_hashtable_cache_total_bytes / (1024 * 1024) << " MB.";
1271  LOG(INFO) << " \t\t Per-hashtable size limit: "
1272  << g_max_cacheable_hashtable_size_bytes / (1024 * 1024) << " MB.";
1273  }
1274  }
1275 
1276  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
1277  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
1278  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
1279  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
1280  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
1281 
1282  return boost::none;
1283 }
std::string distinguishedName
Definition: AuthMetadata.h:25
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
unsigned connect_timeout
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:96
double g_running_query_interrupt_freq
Definition: Execute.cpp:122
bool g_enable_smem_group_by
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
size_t g_pmem_size
size_t g_parallel_top_max
Definition: ResultSet.cpp:45
float g_filter_push_down_low_frac
Definition: Execute.cpp:92
std::string ldapQueryUrl
Definition: AuthMetadata.h:26
bool g_enable_watchdog
size_t g_cpu_sub_task_size
Definition: Execute.cpp:79
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
logger::LogOptions log_options_
bool g_strip_join_covered_quals
Definition: Execute.cpp:103
bool g_enable_direct_columnarization
Definition: Execute.cpp:115
std::string ha_shared_data
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:826
bool g_enable_lazy_fetch
Definition: Execute.cpp:117
std::string udf_compiler_path
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:121
po::options_description help_desc
#define LOG(tag)
Definition: Logger.h:203
bool enable_calcite_view_optimize
bool g_enable_debug_timer
Definition: Logger.cpp:17
std::string ldapRoleRegex
Definition: AuthMetadata.h:27
const std::string OMNISCI_SYSTEM_CATALOG
Definition: SysCatalog.h:58
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:94
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:77
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:141
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:85
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:119
bool enable_non_kernel_time_query_interrupt
int g_hll_precision_bits
bool g_enable_data_recycler
Definition: Execute.cpp:139
std::string config_file
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:98
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:87
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:141
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:142
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:131
bool g_null_div_by_zero
Definition: Execute.cpp:84
bool g_enable_interop
std::string ha_brokers
size_t g_parallel_top_min
Definition: ResultSet.cpp:44
std::string ssl_trust_ca_file
bool g_enable_columnar_output
Definition: Execute.cpp:95
void close(const int fd)
Definition: omnisci_fs.cpp:68
std::string ssl_trust_store
int64_t g_bitmap_memory_limit
bool g_enable_s3_fsi
Definition: Catalog.cpp:94
bool g_enable_idp_temporary_users
Definition: SysCatalog.cpp:64
bool g_from_table_reordering
Definition: Execute.cpp:86
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:100
bool g_enable_system_tables
Definition: SysCatalog.cpp:65
void init(LogOptions const &log_opts)
Definition: Logger.cpp:290
std::string g_pmem_path
float g_filter_push_down_high_frac
Definition: Execute.cpp:93
bool g_enable_distance_rangejoin
Definition: Execute.cpp:99
int32_t ftruncate(const int32_t fd, int64_t length)
Definition: omnisci_fs.cpp:84
bool g_enable_thrift_logs
Definition: initdb.cpp:40
std::string uri
Definition: AuthMetadata.h:24
bool g_bigint_count
size_t g_max_memory_allocation_size
Definition: Execute.cpp:109
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:102
size_t g_approx_quantile_buffer
Definition: Execute.cpp:144
std::string ha_unique_server_id
std::string allowed_export_paths
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:101
std::string ca_file_name
Definition: AuthMetadata.h:31
std::string ssl_key_file
AuthMetadata authMetadata
bool g_enable_calcite_ddl_parser
Definition: ParserNode.cpp:76
bool g_enable_window_functions
Definition: Execute.cpp:107
size_t g_min_memory_allocation_size
Definition: Execute.cpp:110
bool with_keepalive
std::string ldapSuperUserRole
Definition: AuthMetadata.h:28
bool g_read_only
Definition: File.cpp:38
bool g_enable_seconds_refresh
unsigned pending_query_interrupt_freq
size_t g_estimator_failure_max_groupby_size
tuple line
Definition: parse_ast.py:10
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:128
bool g_enable_experimental_string_functions
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:147
std::vector< std::string > udf_compiler_options
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:80
bool g_cache_string_hash
float g_fraction_code_cache_to_evict
Severity severity_
Definition: Logger.h:125
std::string ssl_keystore_password
std::string ssl_trust_password
bool g_enable_filter_push_down
Definition: Execute.cpp:91
bool g_use_estimator_result_cache
Definition: Execute.cpp:120
bool g_enable_bump_allocator
Definition: Execute.cpp:113
boost::program_options::options_description const & get_options() const
bool allowLocalAuthFallback
Definition: AuthMetadata.h:32
po::positional_options_description positional_options
void set_base_path(std::string const &base_path)
bool g_enable_union
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:78
std::string ssl_keystore
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:83
bool g_allow_cpu_retry
Definition: Execute.cpp:82
File_Namespace::DiskCacheConfig disk_cache_config
size_t g_approx_quantile_centroids
Definition: Execute.cpp:145
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:871
po::options_description developer_desc
std::string allowed_import_paths
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:43
std::string disk_cache_level
bool g_optimize_row_initialization
Definition: Execute.cpp:97
static bool run
po::variables_map vm
size_t max_cacheable_hashtable_size_bytes
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:81
bool g_enable_fsi
Definition: Catalog.cpp:93
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
std::string ha_group_id
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:118
size_t g_max_import_threads
Definition: Importer.cpp:85
bool g_use_hashtable_cache
Definition: Execute.cpp:140
size_t g_large_ndv_multiplier
bool g_enable_table_functions
Definition: Execute.cpp:108
std::string master_address
std::string cluster_command_line_arg
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
size_t g_gpu_smem_threshold
Definition: Execute.cpp:123
std::string ssl_cert_file