OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <filesystem>
22 #include <iostream>
23 #include <string>
24 
25 using namespace std::string_literals;
26 
27 #include "CommandLineOptions.h"
28 #include "LeafHostInfo.h"
30 #include "MapDRelease.h"
33 #include "Shared/Compressor.h"
34 #include "Shared/SysDefinitions.h"
37 #include "Utils/DdlUtils.h"
38 
39 #ifdef _WIN32
40 #include <io.h>
41 #include <process.h>
42 #endif
43 
44 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
45 
46 extern std::string cluster_command_line_arg;
47 
49 
50 extern bool g_use_table_device_offset;
52 extern bool g_cache_string_hash;
55 extern int64_t g_large_ndv_threshold;
56 extern size_t g_large_ndv_multiplier;
57 extern int64_t g_bitmap_memory_limit;
58 extern bool g_enable_seconds_refresh;
59 extern size_t g_approx_quantile_buffer;
60 extern size_t g_approx_quantile_centroids;
61 extern size_t g_parallel_top_min;
62 extern size_t g_parallel_top_max;
63 extern size_t g_streaming_topn_max;
67 extern bool g_enable_system_tables;
69 extern bool g_enable_logs_system_tables;
71 #ifdef ENABLE_MEMKIND
72 extern std::string g_pmem_path;
73 #endif
74 
75 namespace Catalog_Namespace {
76 extern bool g_log_user_id;
77 }
78 
79 unsigned connect_timeout{20000};
80 unsigned recv_timeout{300000};
81 unsigned send_timeout{300000};
82 bool with_keepalive{false};
84 
86  if (verbose_logging && logger::Severity::DEBUG1 < log_options_.severity_) {
87  log_options_.severity_ = logger::Severity::DEBUG1;
88  }
89  validate_base_path();
90  log_options_.set_base_path(base_path);
91  logger::init(log_options_);
92 }
93 
95  help_desc.add_options()("help,h", "Show available options.");
96  help_desc.add_options()(
97  "allow-cpu-retry",
98  po::value<bool>(&g_allow_cpu_retry)
99  ->default_value(g_allow_cpu_retry)
100  ->implicit_value(true),
101  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
102  help_desc.add_options()("allow-loop-joins",
103  po::value<bool>(&allow_loop_joins)
104  ->default_value(allow_loop_joins)
105  ->implicit_value(true),
106  "Enable loop joins.");
107  help_desc.add_options()("bigint-count",
108  po::value<bool>(&g_bigint_count)
109  ->default_value(g_bigint_count)
110  ->implicit_value(true),
111  "Use 64-bit count.");
112  help_desc.add_options()("calcite-max-mem",
113  po::value<size_t>(&system_parameters.calcite_max_mem)
114  ->default_value(system_parameters.calcite_max_mem),
115  "Max memory available to calcite JVM.");
116  if (!dist_v5_) {
117  help_desc.add_options()("calcite-port",
118  po::value<int>(&system_parameters.calcite_port)
119  ->default_value(system_parameters.calcite_port),
120  "Calcite port number.");
121  }
122  help_desc.add_options()("config",
123  po::value<std::string>(&system_parameters.config_file),
124  "Path to server configuration file.");
125  help_desc.add_options()("cpu-buffer-mem-bytes",
126  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
127  ->default_value(system_parameters.cpu_buffer_mem_bytes),
128  "Size of memory reserved for CPU buffers, in bytes.");
129 
130  help_desc.add_options()("cpu-only",
131  po::value<bool>(&system_parameters.cpu_only)
132  ->default_value(system_parameters.cpu_only)
133  ->implicit_value(true),
134  "Run on CPU only, even if GPUs are available.");
135  help_desc.add_options()("cuda-block-size",
136  po::value<size_t>(&system_parameters.cuda_block_size)
137  ->default_value(system_parameters.cuda_block_size),
138  "Size of block to use on GPU.");
139  help_desc.add_options()("cuda-grid-size",
140  po::value<size_t>(&system_parameters.cuda_grid_size)
141  ->default_value(system_parameters.cuda_grid_size),
142  "Size of grid to use on GPU.");
143  if (!dist_v5_) {
144  help_desc.add_options()(
145  "data",
146  po::value<std::string>(&base_path)->required()->default_value("storage"),
147  "Directory path to HeavyDB data storage (catalogs, raw data, log files, etc).");
148  positional_options.add("data", 1);
149  }
150  help_desc.add_options()("db-query-list",
151  po::value<std::string>(&db_query_file),
152  "Path to file containing HeavyDB warmup queries.");
153  help_desc.add_options()(
154  "exit-after-warmup",
155  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
156  "Exit after HeavyDB warmup queries.");
157  help_desc.add_options()("dynamic-watchdog-time-limit",
158  po::value<unsigned>(&dynamic_watchdog_time_limit)
159  ->default_value(dynamic_watchdog_time_limit)
160  ->implicit_value(10000),
161  "Dynamic watchdog time limit, in milliseconds.");
162  help_desc.add_options()("enable-data-recycler",
163  po::value<bool>(&enable_data_recycler)
164  ->default_value(enable_data_recycler)
165  ->implicit_value(true),
166  "Use data recycler.");
167  help_desc.add_options()("use-hashtable-cache",
168  po::value<bool>(&use_hashtable_cache)
169  ->default_value(use_hashtable_cache)
170  ->implicit_value(true),
171  "Use hashtable cache.");
172  help_desc.add_options()("use-query-resultset-cache",
173  po::value<bool>(&g_use_query_resultset_cache)
174  ->default_value(g_use_query_resultset_cache)
175  ->implicit_value(true),
176  "Use query resultset cache.");
177  help_desc.add_options()("use-chunk-metadata-cache",
178  po::value<bool>(&g_use_chunk_metadata_cache)
179  ->default_value(g_use_chunk_metadata_cache)
180  ->implicit_value(true),
181  "Use chunk metadata cache.");
182  help_desc.add_options()(
183  "hashtable-cache-total-bytes",
184  po::value<size_t>(&hashtable_cache_total_bytes)
185  ->default_value(hashtable_cache_total_bytes)
186  ->implicit_value(4294967296),
187  "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
188  help_desc.add_options()("max-cacheable-hashtable-size-bytes",
189  po::value<size_t>(&max_cacheable_hashtable_size_bytes)
190  ->default_value(max_cacheable_hashtable_size_bytes)
191  ->implicit_value(2147483648),
192  "The maximum size of hashtable that is available to cache, in "
193  "bytes (default: 2GB).");
194  help_desc.add_options()(
195  "query-resultset-cache-total-bytes",
196  po::value<size_t>(&g_query_resultset_cache_total_bytes)
197  ->default_value(g_query_resultset_cache_total_bytes),
198  "Size of total memory space for query resultset cache, in bytes (default: 4GB).");
199  help_desc.add_options()(
200  "max-query-resultset-size-bytes",
203  "The maximum size of query resultset that is available to cache, in "
204  "bytes (default: 2GB).");
205  help_desc.add_options()("allow-auto-query-resultset-caching",
206  po::value<bool>(&g_allow_auto_resultset_caching)
207  ->default_value(g_allow_auto_resultset_caching)
208  ->implicit_value(true),
209  "Allow automatic query resultset caching when the size of "
210  "query resultset is smaller or equal to the threshold defined "
211  "by `auto-resultset-caching-threshold-bytes`, in bytes (to "
212  "enable this, query resultset recycler "
213  "should be enabled, default: 1048576 bytes (or 1MB)).");
214  help_desc.add_options()(
215  "auto-resultset-caching-threshold-bytes",
216  po::value<size_t>(&g_auto_resultset_caching_threshold)
217  ->default_value(g_auto_resultset_caching_threshold),
218  "A threshold that allows caching query resultset automatically if the size of "
219  "resultset is less than it, in bytes (default: 1MB).");
220  help_desc.add_options()("allow-query-step-skipping",
221  po::value<bool>(&g_allow_query_step_skipping)
222  ->default_value(g_allow_query_step_skipping)
223  ->implicit_value(true),
224  "Allow query step skipping when multi-step query has at least "
225  "one cached query resultset.");
226  help_desc.add_options()("enable-debug-timer",
227  po::value<bool>(&g_enable_debug_timer)
228  ->default_value(g_enable_debug_timer)
229  ->implicit_value(true),
230  "Enable debug timer logging.");
231  help_desc.add_options()("enable-dynamic-watchdog",
232  po::value<bool>(&enable_dynamic_watchdog)
233  ->default_value(enable_dynamic_watchdog)
234  ->implicit_value(true),
235  "Enable dynamic watchdog.");
236  help_desc.add_options()("enable-filter-push-down",
237  po::value<bool>(&g_enable_filter_push_down)
238  ->default_value(g_enable_filter_push_down)
239  ->implicit_value(true),
240  "Enable filter push down through joins.");
241  help_desc.add_options()("enable-overlaps-hashjoin",
242  po::value<bool>(&g_enable_overlaps_hashjoin)
243  ->default_value(g_enable_overlaps_hashjoin)
244  ->implicit_value(true),
245  "Enable the overlaps hash join framework allowing for range "
246  "join (e.g. spatial overlaps) computation using a hash table.");
247  help_desc.add_options()("enable-hashjoin-many-to-many",
248  po::value<bool>(&g_enable_hashjoin_many_to_many)
249  ->default_value(g_enable_hashjoin_many_to_many)
250  ->implicit_value(true),
251  "Enable the overlaps hash join framework allowing for range "
252  "join (e.g. spatial overlaps) computation using a hash table.");
253  help_desc.add_options()("enable-distance-rangejoin",
254  po::value<bool>(&g_enable_distance_rangejoin)
255  ->default_value(g_enable_distance_rangejoin)
256  ->implicit_value(true),
257  "Enable accelerating point distance joins with a hash table. "
258  "This rewrites ST_Distance when using an upperbound (<= X).");
259  help_desc.add_options()("enable-runtime-query-interrupt",
260  po::value<bool>(&enable_runtime_query_interrupt)
261  ->default_value(enable_runtime_query_interrupt)
262  ->implicit_value(true),
263  "Enable runtime query interrupt.");
264  help_desc.add_options()("enable-non-kernel-time-query-interrupt",
265  po::value<bool>(&enable_non_kernel_time_query_interrupt)
266  ->default_value(enable_non_kernel_time_query_interrupt)
267  ->implicit_value(true),
268  "Enable non-kernel time query interrupt.");
269  help_desc.add_options()("pending-query-interrupt-freq",
270  po::value<unsigned>(&pending_query_interrupt_freq)
271  ->default_value(pending_query_interrupt_freq)
272  ->implicit_value(1000),
273  "A frequency of checking the request of pending query "
274  "interrupt from user (in millisecond).");
275  help_desc.add_options()(
276  "running-query-interrupt-freq",
277  po::value<double>(&running_query_interrupt_freq)
278  ->default_value(running_query_interrupt_freq)
279  ->implicit_value(0.5),
280  "A frequency of checking the request of running query "
281  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
282  help_desc.add_options()("use-estimator-result-cache",
283  po::value<bool>(&use_estimator_result_cache)
284  ->default_value(use_estimator_result_cache)
285  ->implicit_value(true),
286  "Use estimator result cache.");
287  if (!dist_v5_) {
288  help_desc.add_options()(
289  "enable-string-dict-hash-cache",
290  po::value<bool>(&g_cache_string_hash)
291  ->default_value(g_cache_string_hash)
292  ->implicit_value(true),
293  "Cache string hash values in the string dictionary server during import.");
294  }
295  help_desc.add_options()(
296  "enable-thrift-logs",
297  po::value<bool>(&g_enable_thrift_logs)
298  ->default_value(g_enable_thrift_logs)
299  ->implicit_value(true),
300  "Enable writing messages directly from thrift to stdout/stderr.");
301  help_desc.add_options()("enable-watchdog",
302  po::value<bool>(&enable_watchdog)
303  ->default_value(enable_watchdog)
304  ->implicit_value(true),
305  "Enable watchdog.");
306  help_desc.add_options()(
307  "watchdog-none-encoded-string-translation-limit",
308  po::value<size_t>(&watchdog_none_encoded_string_translation_limit)
309  ->default_value(watchdog_none_encoded_string_translation_limit),
310  "Max number of none-encoded strings allowed to be translated "
311  "to dictionary-encoded with watchdog enabled");
312  help_desc.add_options()(
313  "filter-push-down-low-frac",
314  po::value<float>(&g_filter_push_down_low_frac)
315  ->default_value(g_filter_push_down_low_frac)
316  ->implicit_value(g_filter_push_down_low_frac),
317  "Lower threshold for selectivity of filters that are pushed down.");
318  help_desc.add_options()(
319  "filter-push-down-high-frac",
320  po::value<float>(&g_filter_push_down_high_frac)
321  ->default_value(g_filter_push_down_high_frac)
322  ->implicit_value(g_filter_push_down_high_frac),
323  "Higher threshold for selectivity of filters that are pushed down.");
324  help_desc.add_options()("filter-push-down-passing-row-ubound",
325  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
327  ->implicit_value(g_filter_push_down_passing_row_ubound),
328  "Upperbound on the number of rows that should pass the filter "
329  "if the selectivity is less than "
330  "the high fraction threshold.");
331  help_desc.add_options()("from-table-reordering",
332  po::value<bool>(&g_from_table_reordering)
333  ->default_value(g_from_table_reordering)
334  ->implicit_value(true),
335  "Enable automatic table reordering in FROM clause.");
336  help_desc.add_options()("gpu-buffer-mem-bytes",
337  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
338  ->default_value(system_parameters.gpu_buffer_mem_bytes),
339  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
340  help_desc.add_options()("gpu-input-mem-limit",
341  po::value<double>(&system_parameters.gpu_input_mem_limit)
342  ->default_value(system_parameters.gpu_input_mem_limit),
343  "Force query to CPU when input data memory usage exceeds this "
344  "percentage of available GPU memory.");
345  help_desc.add_options()(
346  "hll-precision-bits",
347  po::value<int>(&g_hll_precision_bits)
348  ->default_value(g_hll_precision_bits)
349  ->implicit_value(g_hll_precision_bits),
350  "Number of bits used from the hash value used to specify the bucket number.");
351  if (!dist_v5_) {
352  help_desc.add_options()("http-port",
353  po::value<int>(&http_port)->default_value(http_port),
354  "HTTP port number.");
355  help_desc.add_options()(
356  "http-binary-port",
357  po::value<int>(&http_binary_port)->default_value(http_binary_port),
358  "HTTP binary port number.");
359  }
360  help_desc.add_options()(
361  "idle-session-duration",
362  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
363  "Maximum duration of idle session.");
364  help_desc.add_options()("inner-join-fragment-skipping",
365  po::value<bool>(&g_inner_join_fragment_skipping)
366  ->default_value(g_inner_join_fragment_skipping)
367  ->implicit_value(true),
368  "Enable/disable inner join fragment skipping. This feature is "
369  "considered stable and is enabled by default. This "
370  "parameter will be removed in a future release.");
371  help_desc.add_options()(
372  "max-session-duration",
373  po::value<int>(&max_session_duration)->default_value(max_session_duration),
374  "Maximum duration of active session.");
375  help_desc.add_options()("num-sessions",
376  po::value<int>(&system_parameters.num_sessions)
377  ->default_value(system_parameters.num_sessions),
378  "Maximum number of active session.");
379  help_desc.add_options()(
380  "null-div-by-zero",
381  po::value<bool>(&g_null_div_by_zero)
382  ->default_value(g_null_div_by_zero)
383  ->implicit_value(true),
384  "Return null on division by zero instead of throwing an exception.");
385  help_desc.add_options()(
386  "num-reader-threads",
387  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
388  "Number of reader threads to use.");
389  help_desc.add_options()(
390  "max-import-threads",
391  po::value<size_t>(&g_max_import_threads)->default_value(g_max_import_threads),
392  "Max number of default import threads to use (num hardware threads will be used "
393  "instead if lower). Can be overriden with copy statement threads option).");
394  help_desc.add_options()(
395  "overlaps-max-table-size-bytes",
396  po::value<size_t>(&g_overlaps_max_table_size_bytes)
397  ->default_value(g_overlaps_max_table_size_bytes),
398  "The maximum size in bytes of the hash table for an overlaps hash join.");
399  help_desc.add_options()("overlaps-target-entries-per-bin",
400  po::value<double>(&g_overlaps_target_entries_per_bin)
401  ->default_value(g_overlaps_target_entries_per_bin),
402  "The target number of hash entries per bin for overlaps join");
403  if (!dist_v5_) {
404  help_desc.add_options()("port,p",
405  po::value<int>(&system_parameters.omnisci_server_port)
406  ->default_value(system_parameters.omnisci_server_port),
407  "TCP Port number.");
408  }
409  help_desc.add_options()("num-gpus",
410  po::value<int>(&system_parameters.num_gpus)
411  ->default_value(system_parameters.num_gpus),
412  "Number of gpus to use.");
413  help_desc.add_options()(
414  "read-only",
415  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
416  "Enable read-only mode.");
417 
418  help_desc.add_options()(
419  "res-gpu-mem",
420  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
421  "Reduces GPU memory available to the HeavyDB allocator by this amount. Used for "
422  "compiled code cache and ancillary GPU functions and other processes that may also "
423  "be using the GPU concurrent with HeavyDB.");
424 
425  help_desc.add_options()("start-gpu",
426  po::value<int>(&system_parameters.start_gpu)
427  ->default_value(system_parameters.start_gpu),
428  "First gpu to use.");
429  help_desc.add_options()("trivial-loop-join-threshold",
430  po::value<unsigned>(&g_trivial_loop_join_threshold)
431  ->default_value(g_trivial_loop_join_threshold)
432  ->implicit_value(1000),
433  "The maximum number of rows in the inner table of a loop join "
434  "considered to be trivially small.");
435  help_desc.add_options()("verbose",
436  po::value<bool>(&verbose_logging)
437  ->default_value(verbose_logging)
438  ->implicit_value(true),
439  "Write additional debug log messages to server logs.");
440  help_desc.add_options()(
441  "enable-runtime-udf",
442  po::value<bool>(&enable_runtime_udf)
443  ->default_value(enable_runtime_udf)
444  ->implicit_value(true),
445  "DEPRECATED. Please use `enable-runtime-udfs` instead as this flag will be removed "
446  "in the near future.");
447  help_desc.add_options()(
448  "enable-runtime-udfs",
449  po::value<bool>(&enable_runtime_udfs)
450  ->default_value(enable_runtime_udfs)
451  ->implicit_value(true),
452  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
453  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
454  "Compiler server, packaged separately.");
455  help_desc.add_options()("enable-udf-registration-for-all-users",
456  po::value<bool>(&enable_udf_registration_for_all_users)
457  ->default_value(enable_udf_registration_for_all_users)
458  ->implicit_value(true),
459  "Allow all users, not just superusers, to register runtime "
460  "UDFs/UDTFs. Option only valid if "
461  "`--enable-runtime-udfs` is set to true.");
462  help_desc.add_options()("version,v", "Print Version Number.");
463  help_desc.add_options()("enable-string-functions",
464  po::value<bool>(&g_enable_string_functions)
465  ->default_value(g_enable_string_functions)
466  ->implicit_value(true),
467  "Enable experimental string functions.");
468  help_desc.add_options()("enable-experimental-string-functions",
469  po::value<bool>(&g_enable_string_functions)
470  ->default_value(g_enable_string_functions)
471  ->implicit_value(true),
472  "DEPRECATED. String functions are now enabled by default, "
473  "but can still be controlled with --enable-string-functions.");
474  help_desc.add_options()(
475  "enable-fsi",
476  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
477  "Enable foreign storage interface.");
478 
479  help_desc.add_options()("enable-legacy-delimited-import",
480  po::value<bool>(&g_enable_legacy_delimited_import)
481  ->default_value(g_enable_legacy_delimited_import)
482  ->implicit_value(true),
483  "Use legacy importer for delimited sources.");
484 #ifdef ENABLE_IMPORT_PARQUET
485  help_desc.add_options()("enable-legacy-parquet-import",
486  po::value<bool>(&g_enable_legacy_parquet_import)
487  ->default_value(g_enable_legacy_parquet_import)
488  ->implicit_value(true),
489  "Use legacy importer for parquet sources.");
490 #endif
491  help_desc.add_options()("enable-fsi-regex-import",
492  po::value<bool>(&g_enable_fsi_regex_import)
493  ->default_value(g_enable_fsi_regex_import)
494  ->implicit_value(true),
495  "Use FSI importer for regex parsed sources.");
496 
497  help_desc.add_options()("enable-add-metadata-columns",
498  po::value<bool>(&g_enable_add_metadata_columns)
499  ->default_value(g_enable_add_metadata_columns)
500  ->implicit_value(true),
501  "Enable add_metadata_columns COPY FROM WITH option (Beta).");
502 
503  help_desc.add_options()("disk-cache-path",
504  po::value<std::string>(&disk_cache_config.path),
505  "Specify the path for the disk cache.");
506 
507  help_desc.add_options()(
508  "disk-cache-level",
509  po::value<std::string>(&(disk_cache_level))->default_value("foreign_tables"),
510  "Specify level of disk cache. Valid options are 'foreign_tables', "
511  "'local_tables', 'none', and 'all'.");
512 
513  help_desc.add_options()("disk-cache-size",
514  po::value<size_t>(&(disk_cache_config.size_limit)),
515  "Specify a maximum size for the disk cache in bytes.");
516 
517 #ifdef HAVE_AWS_S3
518  help_desc.add_options()(
519  "allow-s3-server-privileges",
520  po::value<bool>(&g_allow_s3_server_privileges)
521  ->default_value(g_allow_s3_server_privileges)
522  ->implicit_value(true),
523  "Allow S3 server privileges, if IAM user credentials are not provided. Credentials "
524  "may be specified with "
525  "environment variables (such as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, etc), "
526  "an AWS credentials file, or when running on an EC2 instance, with an IAM role "
527  "that is attached to the instance.");
528 #endif // defined(HAVE_AWS_S3)
529  help_desc.add_options()(
530  "enable-interoperability",
531  po::value<bool>(&g_enable_interop)
532  ->default_value(g_enable_interop)
533  ->implicit_value(true),
534  "Enable offloading of query portions to an external execution engine.");
535  help_desc.add_options()("enable-union",
536  po::value<bool>(&g_enable_union)
537  ->default_value(g_enable_union)
538  ->implicit_value(true),
539  "DEPRECATED. UNION ALL is enabled by default. Please remove "
540  "use of this option, as it may be disabled in the future.");
541  help_desc.add_options()(
542  "calcite-service-timeout",
543  po::value<size_t>(&system_parameters.calcite_timeout)
544  ->default_value(system_parameters.calcite_timeout),
545  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
546  "schema changes or when running large numbers of parallel queries.");
547  help_desc.add_options()("calcite-service-keepalive",
548  po::value<size_t>(&system_parameters.calcite_keepalive)
549  ->default_value(system_parameters.calcite_keepalive)
550  ->implicit_value(true),
551  "Enable keepalive on Calcite connections.");
552  help_desc.add_options()(
553  "stringdict-parallelizm",
554  po::value<bool>(&g_enable_stringdict_parallel)
555  ->default_value(g_enable_stringdict_parallel)
556  ->implicit_value(true),
557  "Allow StringDictionary to parallelize loads using multiple threads");
558  help_desc.add_options()(
559  "log-user-id",
560  po::value<bool>(&Catalog_Namespace::g_log_user_id)
561  ->default_value(Catalog_Namespace::g_log_user_id)
562  ->implicit_value(true),
563  "Log userId integer in place of the userName (when available).");
564  help_desc.add_options()("log-user-origin",
565  po::value<bool>(&log_user_origin)
566  ->default_value(log_user_origin)
567  ->implicit_value(true),
568  "Lookup the origin of inbound connections by IP address/DNS "
569  "name, and print this information as part of stdlog.");
570  help_desc.add_options()(
571  "allowed-import-paths",
572  po::value<std::string>(&allowed_import_paths),
573  "List of allowed root paths that can be used in import operations.");
574  help_desc.add_options()(
575  "allowed-export-paths",
576  po::value<std::string>(&allowed_export_paths),
577  "List of allowed root paths that can be used in export operations.");
578  help_desc.add_options()("enable-system-tables",
579  po::value<bool>(&g_enable_system_tables)
580  ->default_value(g_enable_system_tables)
581  ->implicit_value(true),
582  "Enable use of system tables.");
583  help_desc.add_options()("enable-table-functions",
584  po::value<bool>(&g_enable_table_functions)
585  ->default_value(g_enable_table_functions)
586  ->implicit_value(true),
587  "Enable system table functions support.");
588  help_desc.add_options()("enable-logs-system-tables",
589  po::value<bool>(&g_enable_logs_system_tables)
590  ->default_value(g_enable_logs_system_tables)
591  ->implicit_value(true),
592  "Enable use of logs system tables.");
593  help_desc.add_options()(
594  "logs-system-tables-max-files-count",
595  po::value<size_t>(&g_logs_system_tables_max_files_count)
596  ->default_value(g_logs_system_tables_max_files_count),
597  "Maximum number of log files that will be processed by each logs system table.");
598 #ifdef ENABLE_MEMKIND
599  help_desc.add_options()("enable-tiered-cpu-mem",
600  po::value<bool>(&g_enable_tiered_cpu_mem)
601  ->default_value(g_enable_tiered_cpu_mem)
602  ->implicit_value(true),
603  "Enable additional tiers of CPU memory (PMEM, etc...)");
604  help_desc.add_options()("pmem-size", po::value<size_t>(&g_pmem_size)->default_value(0));
605  help_desc.add_options()("pmem-path", po::value<std::string>(&g_pmem_path));
606 #endif
607 
608  help_desc.add(log_options_.get_options());
609 }
610 
612  developer_desc.add_options()("dev-options", "Print internal developer options.");
613  developer_desc.add_options()(
614  "enable-calcite-view-optimize",
615  po::value<bool>(&system_parameters.enable_calcite_view_optimize)
616  ->default_value(system_parameters.enable_calcite_view_optimize)
617  ->implicit_value(true),
618  "Enable additional calcite (query plan) optimizations when a view is part of the "
619  "query.");
620  developer_desc.add_options()(
621  "enable-columnar-output",
622  po::value<bool>(&g_enable_columnar_output)
623  ->default_value(g_enable_columnar_output)
624  ->implicit_value(true),
625  "Enable columnar output for intermediate/final query steps.");
626  developer_desc.add_options()(
627  "enable-left-join-filter-hoisting",
628  po::value<bool>(&g_enable_left_join_filter_hoisting)
629  ->default_value(g_enable_left_join_filter_hoisting)
630  ->implicit_value(true),
631  "Enable hoisting left hand side filters through left joins.");
632  developer_desc.add_options()("optimize-row-init",
633  po::value<bool>(&g_optimize_row_initialization)
634  ->default_value(g_optimize_row_initialization)
635  ->implicit_value(true),
636  "Optimize row initialization.");
637  developer_desc.add_options()("enable-legacy-syntax",
638  po::value<bool>(&enable_legacy_syntax)
639  ->default_value(enable_legacy_syntax)
640  ->implicit_value(true),
641  "Enable legacy syntax.");
642  developer_desc.add_options()(
643  "enable-multifrag",
644  po::value<bool>(&allow_multifrag)
645  ->default_value(allow_multifrag)
646  ->implicit_value(true),
647  "Enable execution over multiple fragments in a single round-trip to GPU.");
648  developer_desc.add_options()("enable-lazy-fetch",
649  po::value<bool>(&g_enable_lazy_fetch)
650  ->default_value(g_enable_lazy_fetch)
651  ->implicit_value(true),
652  "Enable lazy fetch columns in query results.");
653  developer_desc.add_options()(
654  "enable-shared-mem-group-by",
655  po::value<bool>(&g_enable_smem_group_by)
656  ->default_value(g_enable_smem_group_by)
657  ->implicit_value(true),
658  "Enable using GPU shared memory for some GROUP BY queries.");
659  developer_desc.add_options()("num-executors",
660  po::value<int>(&system_parameters.num_executors)
661  ->default_value(system_parameters.num_executors),
662  "Number of executors to run in parallel.");
663  developer_desc.add_options()(
664  "gpu-shared-mem-threshold",
665  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
666  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
667  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
668  developer_desc.add_options()(
669  "enable-shared-mem-grouped-non-count-agg",
670  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
671  ->default_value(g_enable_smem_grouped_non_count_agg)
672  ->implicit_value(true),
673  "Enable using GPU shared memory for grouped non-count aggregate queries.");
674  developer_desc.add_options()(
675  "enable-shared-mem-non-grouped-agg",
676  po::value<bool>(&g_enable_smem_non_grouped_agg)
677  ->default_value(g_enable_smem_non_grouped_agg)
678  ->implicit_value(true),
679  "Enable using GPU shared memory for non-grouped aggregate queries.");
680  developer_desc.add_options()("enable-direct-columnarization",
681  po::value<bool>(&g_enable_direct_columnarization)
682  ->default_value(g_enable_direct_columnarization)
683  ->implicit_value(true),
684  "Enables/disables a more optimized columnarization method "
685  "for intermediate steps in multi-step queries.");
686  developer_desc.add_options()(
687  "offset-device-by-table-id",
688  po::value<bool>(&g_use_table_device_offset)
689  ->default_value(g_use_table_device_offset)
690  ->implicit_value(true),
691  "Enables/disables offseting the chosen device ID by the table ID for a given "
692  "fragment. This improves balance of fragments across GPUs.");
693  developer_desc.add_options()("enable-window-functions",
694  po::value<bool>(&g_enable_window_functions)
695  ->default_value(g_enable_window_functions)
696  ->implicit_value(true),
697  "Enable window function support.");
698  developer_desc.add_options()(
699  "enable-parallel-window-partition-compute",
702  ->implicit_value(true),
703  "Enable parallel window function partition computation.");
704  developer_desc.add_options()(
705  "enable-parallel-window-partition-sort",
708  ->implicit_value(true),
709  "Enable parallel window function partition sorting.");
710  developer_desc.add_options()(
711  "window-function-frame-aggregation-tree-fanout",
712  po::value<size_t>(&g_window_function_aggregation_tree_fanout)->default_value(8),
713  "A tree fanout for aggregation tree used to compute aggregation over "
714  "window frame");
715  developer_desc.add_options()("enable-dev-table-functions",
716  po::value<bool>(&g_enable_dev_table_functions)
717  ->default_value(g_enable_dev_table_functions)
718  ->implicit_value(true),
719  "Enable dev (test or alpha) table functions. Also "
720  "requires --enable-table-functions to be turned on");
721 
722  developer_desc.add_options()(
723  "enable-geo-ops-on-uncompressed-coords",
726  ->implicit_value(true),
727  "Enable faster geo operations on uncompressed coords");
728  developer_desc.add_options()(
729  "jit-debug-ir",
730  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
731  "Enable runtime debugger support for the JIT. Note that this flag is "
732  "incompatible "
733  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
734  "`/tmp/mapdquery`.");
735  developer_desc.add_options()(
736  "intel-jit-profile",
737  po::value<bool>(&intel_jit_profile)
738  ->default_value(intel_jit_profile)
739  ->implicit_value(true),
740  "Enable runtime support for the JIT code profiling using Intel VTune.");
741  developer_desc.add_options()(
742  "enable-cpu-sub-tasks",
743  po::value<bool>(&g_enable_cpu_sub_tasks)
744  ->default_value(g_enable_cpu_sub_tasks)
745  ->implicit_value(true),
746  "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
747  "load balance and decrease reduction overhead.");
748  developer_desc.add_options()(
749  "cpu-sub-task-size",
750  po::value<size_t>(&g_cpu_sub_task_size)->default_value(g_cpu_sub_task_size),
751  "Set CPU sub-task size in rows.");
752  developer_desc.add_options()(
753  "cpu-threads",
754  po::value<unsigned>(&g_cpu_threads_override)->default_value(g_cpu_threads_override),
755  "Set max CPU concurrent threads. Values <= 0 will use default of 2X the number of "
756  "hardware threads.");
757  developer_desc.add_options()(
758  "skip-intermediate-count",
759  po::value<bool>(&g_skip_intermediate_count)
760  ->default_value(g_skip_intermediate_count)
761  ->implicit_value(true),
762  "Skip pre-flight counts for intermediate projections with no filters.");
763  developer_desc.add_options()(
764  "strip-join-covered-quals",
765  po::value<bool>(&g_strip_join_covered_quals)
766  ->default_value(g_strip_join_covered_quals)
767  ->implicit_value(true),
768  "Remove quals from the filtered count if they are covered by a "
769  "join condition (currently only ST_Contains).");
770 
771  developer_desc.add_options()(
772  "min-cpu-slab-size",
773  po::value<size_t>(&system_parameters.min_cpu_slab_size)
774  ->default_value(system_parameters.min_cpu_slab_size),
775  "Min slab size (size of memory allocations) for CPU buffer pool.");
776  developer_desc.add_options()(
777  "max-cpu-slab-size",
778  po::value<size_t>(&system_parameters.max_cpu_slab_size)
779  ->default_value(system_parameters.max_cpu_slab_size),
780  "Max CPU buffer pool slab size (size of memory allocations). Note if "
781  "there is not enough free memory to accomodate the target slab size, smaller "
782  "slabs will be allocated, down to the minimum size specified by "
783  "min-cpu-slab-size.");
784  developer_desc.add_options()(
785  "min-gpu-slab-size",
786  po::value<size_t>(&system_parameters.min_gpu_slab_size)
787  ->default_value(system_parameters.min_gpu_slab_size),
788  "Min slab size (size of memory allocations) for GPU buffer pools.");
789  developer_desc.add_options()(
790  "max-gpu-slab-size",
791  po::value<size_t>(&system_parameters.max_gpu_slab_size)
792  ->default_value(system_parameters.max_gpu_slab_size),
793  "Max GPU buffer pool slab size (size of memory allocations). Note if "
794  "there is not enough free memory to accomodate the target slab size, smaller "
795  "slabs will be allocated, down to the minimum size speified by "
796  "min-gpu-slab-size.");
797 
798  developer_desc.add_options()(
799  "max-output-projection-allocation-bytes",
800  po::value<size_t>(&g_max_memory_allocation_size)
801  ->default_value(g_max_memory_allocation_size),
802  "Maximum allocation size for a fixed output buffer allocation for projection "
803  "queries with no pre-flight count. Default is the maximum slab size (sizes "
804  "greater "
805  "than the maximum slab size have no affect). Requires bump allocator.");
806  developer_desc.add_options()(
807  "min-output-projection-allocation-bytes",
808  po::value<size_t>(&g_min_memory_allocation_size)
809  ->default_value(g_min_memory_allocation_size),
810  "Minimum allocation size for a fixed output buffer allocation for projection "
811  "queries with no pre-flight count. If an allocation of this size cannot be "
812  "obtained, the query will be retried with different execution parameters and/or "
813  "on "
814  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
815  developer_desc.add_options()("enable-bump-allocator",
816  po::value<bool>(&g_enable_bump_allocator)
817  ->default_value(g_enable_bump_allocator)
818  ->implicit_value(true),
819  "Enable the bump allocator for projection queries on "
820  "GPU. The bump allocator will "
821  "allocate a fixed size buffer for each query, track the "
822  "number of rows passing the "
823  "kernel during query execution, and copy back only the "
824  "rows that passed the kernel "
825  "to CPU after execution. When disabled, pre-flight "
826  "count queries are used to size "
827  "the output buffer for projection queries.");
828  developer_desc.add_options()(
829  "code-cache-eviction-percent",
830  po::value<float>(&g_fraction_code_cache_to_evict)
831  ->default_value(g_fraction_code_cache_to_evict),
832  "Percentage of the GPU code cache to evict if an out of memory error is "
833  "encountered while attempting to place generated code on the GPU.");
834 
835  developer_desc.add_options()("ssl-cert",
836  po::value<std::string>(&system_parameters.ssl_cert_file)
837  ->default_value(std::string("")),
838  "SSL Validated public certficate.");
839 
840  developer_desc.add_options()("ssl-private-key",
841  po::value<std::string>(&system_parameters.ssl_key_file)
842  ->default_value(std::string("")),
843  "SSL private key file.");
844  // Note ssl_trust_store is passed through to Calcite via system_parameters
845  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
846  developer_desc.add_options()("ssl-trust-store",
847  po::value<std::string>(&system_parameters.ssl_trust_store)
848  ->default_value(std::string("")),
849  "SSL public CA certifcates (java trust store) to validate "
850  "TLS connections (passed through to the Calcite server).");
851 
852  developer_desc.add_options()(
853  "ssl-trust-password",
854  po::value<std::string>(&system_parameters.ssl_trust_password)
855  ->default_value(std::string("")),
856  "SSL password for java trust store provided via --ssl-trust-store parameter.");
857 
858  developer_desc.add_options()(
859  "ssl-trust-ca",
860  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
861  ->default_value(std::string("")),
862  "SSL public CA certificates to validate TLS connection(as a client).");
863 
864  developer_desc.add_options()(
865  "ssl-trust-ca-server",
866  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
867  "SSL public CA certificates to validate TLS connection(as a server).");
868 
869  developer_desc.add_options()("ssl-keystore",
870  po::value<std::string>(&system_parameters.ssl_keystore)
871  ->default_value(std::string("")),
872  "SSL server credentials as a java key store (passed "
873  "through to the Calcite server).");
874 
875  developer_desc.add_options()(
876  "ssl-keystore-password",
877  po::value<std::string>(&system_parameters.ssl_keystore_password)
878  ->default_value(std::string("")),
879  "SSL password for java keystore, provide by via --ssl-keystore.");
880 
881  developer_desc.add_options()(
882  "udf",
883  po::value<std::string>(&udf_file_name),
884  "Load user defined extension functions from this file at startup. The file is "
885  "expected to be a C/C++ file with extension .cpp.");
886 
887  developer_desc.add_options()(
888  "udf-compiler-path",
889  po::value<std::string>(&udf_compiler_path),
890  "Provide absolute path to clang++ used in udf compilation.");
891 
892  developer_desc.add_options()("udf-compiler-options",
893  po::value<std::vector<std::string>>(&udf_compiler_options),
894  "Specify compiler options to tailor udf compilation.");
895 
896 #ifdef ENABLE_GEOS
897  developer_desc.add_options()("libgeos-so-filename",
898  po::value<std::string>(&libgeos_so_filename),
899  "Specify libgeos shared object filename to be used for "
900  "geos-backed geo opertations.");
901 #endif
902  developer_desc.add_options()(
903  "large-ndv-threshold",
904  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
905  developer_desc.add_options()(
906  "large-ndv-multiplier",
907  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
908  developer_desc.add_options()("approx_quantile_buffer",
909  po::value<size_t>(&g_approx_quantile_buffer)
910  ->default_value(g_approx_quantile_buffer));
911  developer_desc.add_options()("approx_quantile_centroids",
912  po::value<size_t>(&g_approx_quantile_centroids)
913  ->default_value(g_approx_quantile_centroids));
914  developer_desc.add_options()(
915  "bitmap-memory-limit",
916  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
917  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
918  "size of the group by buffer (entry count in Query Memory Descriptor) and "
919  "multiplying it by the number of count distinct expression and the size of bitmap "
920  "required for each. For approx_count_distinct this is typically 8192 bytes.");
921  developer_desc.add_options()(
922  "enable-filter-function",
923  po::value<bool>(&g_enable_filter_function)
924  ->default_value(g_enable_filter_function)
925  ->implicit_value(true),
926  "Enable the filter function protection feature for the SQL JIT compiler. "
927  "Normally should be on but techs might want to disable for troubleshooting.");
928  developer_desc.add_options()(
929  "enable-idp-temporary-users",
930  po::value<bool>(&g_enable_idp_temporary_users)
931  ->default_value(g_enable_idp_temporary_users)
932  ->implicit_value(true),
933  "Enable temporary users for SAML and LDAP logins on read-only servers. "
934  "Normally should be on but techs might want to disable for troubleshooting.");
935  developer_desc.add_options()(
936  "enable-seconds-refresh-interval",
937  po::value<bool>(&g_enable_seconds_refresh)
938  ->default_value(g_enable_seconds_refresh)
939  ->implicit_value(true),
940  "Enable foreign table seconds refresh interval for testing purposes.");
941  developer_desc.add_options()("enable-auto-metadata-update",
942  po::value<bool>(&g_enable_auto_metadata_update)
943  ->default_value(g_enable_auto_metadata_update)
944  ->implicit_value(true),
945  "Enable automatic metadata update.");
946  developer_desc.add_options()(
947  "parallel-top-min",
948  po::value<size_t>(&g_parallel_top_min)->default_value(g_parallel_top_min),
949  "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
950  "parallelTop() to sort.");
951  developer_desc.add_options()(
952  "parallel-top-max",
953  po::value<size_t>(&g_parallel_top_max)->default_value(g_parallel_top_max),
954  "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
955  "watchdog.");
956  developer_desc.add_options()(
957  "streaming-top-n-max",
958  po::value<size_t>(&g_streaming_topn_max)->default_value(g_streaming_topn_max),
959  "The maximum number of rows allowing streaming top-N sorting.");
960  developer_desc.add_options()("vacuum-min-selectivity",
961  po::value<float>(&g_vacuum_min_selectivity)
962  ->default_value(g_vacuum_min_selectivity),
963  "Minimum selectivity for automatic vacuuming. "
964  "This specifies the percentage (with a value of 0 "
965  "implying 0% and a value of 1 implying 100%) of "
966  "deleted rows in a fragment at which to perform "
967  "automatic vacuuming. A number greater than 1 can "
968  "be used to disable automatic vacuuming.");
969  developer_desc.add_options()("enable-automatic-ir-metadata",
970  po::value<bool>(&g_enable_automatic_ir_metadata)
971  ->default_value(g_enable_automatic_ir_metadata)
972  ->implicit_value(true),
973  "Enable automatic IR metadata (debug builds only).");
974  developer_desc.add_options()(
975  "max-log-length",
976  po::value<size_t>(&g_max_log_length)->default_value(g_max_log_length),
977  "The maximum number of characters that a log message can has. If the log message "
978  "is longer than this, we only record \'g_max_log_message_length\' characters.");
979  developer_desc.add_options()(
980  "estimator-failure-max-groupby-size",
981  po::value<size_t>(&g_estimator_failure_max_groupby_size)
982  ->default_value(g_estimator_failure_max_groupby_size),
983  "Maximum size of the groupby buffer if the estimator fails. By default we use the "
984  "number of tuples in the table up to this value.");
985  developer_desc.add_options()("columnar-large-projections",
986  po::value<bool>(&g_columnar_large_projections)
987  ->default_value(g_columnar_large_projections)
988  ->implicit_value(true),
989  "Prefer columnar output if projection size is >= "
990  "threshold set by --columnar-large-projections-threshold "
991  "(default 1,000,000 rows).");
992  developer_desc.add_options()(
993  "columnar-large-projections-threshold",
994  po::value<size_t>(&g_columnar_large_projections_threshold)
996  "Threshold (in minimum number of rows) to prefer columnar output for projections. "
997  "Requires --columnar-large-projections to be set.");
998 
999  help_desc.add_options()(
1000  "allow-query-step-cpu-retry",
1001  po::value<bool>(&g_allow_query_step_cpu_retry)
1002  ->default_value(g_allow_query_step_cpu_retry)
1003  ->implicit_value(true),
1004  R"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
1005  help_desc.add_options()("enable-http-binary-server",
1006  po::value<bool>(&g_enable_http_binary_server)
1007  ->default_value(g_enable_http_binary_server)
1008  ->implicit_value(true),
1009  "Enable binary over HTTP Thrift server");
1010 
1011  help_desc.add_options()("enable-assign-render-groups",
1012  po::value<bool>(&g_enable_assign_render_groups)
1013  ->default_value(g_enable_assign_render_groups)
1014  ->implicit_value(true),
1015  "Enable Render Group assignment");
1016 
1017  help_desc.add_options()("enable-query-engine-cuda-streams",
1018  po::value<bool>(&g_query_engine_cuda_streams)
1019  ->default_value(g_query_engine_cuda_streams)
1020  ->implicit_value(true),
1021  "Enable Query Engine CUDA streams");
1022 
1023  help_desc.add_options()(
1024  "allow-invalid-literal-buffer-reads",
1025  po::value<bool>(&g_allow_invalid_literal_buffer_reads)
1026  ->default_value(g_allow_invalid_literal_buffer_reads)
1027  ->implicit_value(true),
1028  "For backwards compatibility. Enabling may cause invalid query results.");
1029 }
1030 
1031 namespace {
1032 
1033 std::stringstream sanitize_config_file(std::ifstream& in) {
1034  // Strip the web section out of the config file so boost can validate program options
1035  std::stringstream ss;
1036  std::string line;
1037  while (std::getline(in, line)) {
1038  ss << line << "\n";
1039  if (line == "[web]") {
1040  break;
1041  }
1042  }
1043  return ss;
1044 }
1045 
1046 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
1047  if (!filename.empty()) {
1048  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
1049  if (!boost::filesystem::exists(filename)) {
1050  std::cerr << desc << " " << filename << " does not exist." << std::endl;
1051  return false;
1052  }
1053  }
1054  return true;
1055 }
1056 
1058  if (!filename.empty()) {
1060  }
1061 }
1062 
1063 } // namespace
1064 
1066  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1067  if (!boost::filesystem::exists(base_path)) {
1068  throw std::runtime_error("HeavyDB base directory does not exist at " + base_path);
1069  }
1070 }
1071 
1073  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1074  const auto data_path = boost::filesystem::path(base_path) / shared::kDataDirectoryName;
1075  if (!boost::filesystem::exists(data_path)) {
1076  throw std::runtime_error("HeavyDB data directory does not exist at '" + base_path +
1077  "'");
1078  }
1079 
1080 // TODO: support lock on Windows
1081 #ifndef _WIN32
1082  {
1083  // If we aren't sharing the data directory, take and hold a write lock on
1084  // heavydb_pid.lck to prevent other processes from trying to share our dir.
1085  // TODO(sy): Probably need to get rid of this PID file because it doesn't make much
1086  // sense to store only one server's PID when we have the --multi-instance option.
1087  auto exe_filename = boost::filesystem::path(exe_name).filename().string();
1088  const std::string lock_file =
1089  (boost::filesystem::path(base_path) / std::string(exe_filename + "_pid.lck"))
1090  .string();
1091  auto pid = std::to_string(getpid());
1092  if (!g_multi_instance) {
1093  VLOG(1) << "taking [" << lock_file << "] read+write lock until process exit";
1094  } else {
1095  VLOG(1) << "taking [" << lock_file << "] read-only lock until process exit";
1096  }
1097 
1098  int fd;
1099  fd = heavyai::safe_open(lock_file.c_str(), O_RDWR | O_CREAT, 0664);
1100  if (fd == -1) {
1101  throw std::runtime_error("failed to open lockfile: " + lock_file + ": " +
1102  std::string(strerror(errno)) + " (" +
1103  std::to_string(errno) + ")");
1104  }
1105 
1106  struct flock fl;
1107  memset(&fl, 0, sizeof(fl));
1108  fl.l_type = !g_multi_instance ? F_WRLCK : F_RDLCK;
1109  fl.l_whence = SEEK_SET;
1110  int cmd;
1111 #ifdef __linux__
1112  // cmd = F_OFD_SETLK; // TODO(sy): broken on centos
1113  cmd = F_SETLK;
1114 #else
1115  cmd = F_SETLK;
1116 #endif // __linux__
1117  int ret = heavyai::safe_fcntl(fd, cmd, &fl);
1118  if (ret == -1 && (errno == EACCES || errno == EAGAIN)) { // locked by someone else
1119  heavyai::safe_close(fd);
1120  throw std::runtime_error(
1121  "another HeavyDB server instance is already using data directory: " +
1122  base_path);
1123  } else if (ret == -1) {
1124  auto errno0 = errno;
1125  heavyai::safe_close(fd);
1126  throw std::runtime_error("failed to lock lockfile: " + lock_file + ": " +
1127  std::string(strerror(errno0)) + " (" +
1128  std::to_string(errno0) + ")");
1129  }
1130 
1131  if (!g_multi_instance) {
1132  if (heavyai::ftruncate(fd, 0) == -1) {
1133  auto errno0 = errno;
1134  heavyai::safe_close(fd);
1135  throw std::runtime_error("failed to truncate lockfile: " + lock_file + ": " +
1136  std::string(strerror(errno0)) + " (" +
1137  std::to_string(errno0) + ")");
1138  }
1139  if (heavyai::safe_write(fd, pid.c_str(), pid.length()) == -1) {
1140  auto errno0 = errno;
1141  heavyai::safe_close(fd);
1142  throw std::runtime_error("failed to write lockfile: " + lock_file + ": " +
1143  std::string(strerror(errno0)) + " (" +
1144  std::to_string(errno0) + ")");
1145  }
1146  }
1147 
1148  // Intentionally leak the file descriptor. Lock will be held until process exit.
1149  }
1150 #endif // _WIN32
1151 
1152  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
1153  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
1154  throw std::runtime_error("File containing DB queries " + db_query_file +
1155  " does not exist.");
1156  }
1157  const auto db_file = boost::filesystem::path(base_path) /
1159  if (!boost::filesystem::exists(db_file)) {
1160  { // check old system catalog existsense
1161  const auto db_file =
1162  boost::filesystem::path(base_path) / shared::kCatalogDirectoryName / "mapd";
1163  if (!boost::filesystem::exists(db_file)) {
1164  throw std::runtime_error("System catalog " + shared::kSystemCatalogName +
1165  " does not exist.");
1166  }
1167  }
1168  }
1169  if (license_path.length() == 0) {
1170  license_path = base_path + "/" + shared::kDefaultLicenseFileName;
1171  }
1172 
1173  // add all parameters to be displayed on startup
1174  LOG(INFO) << "HeavyDB started with data directory at '" << base_path << "'";
1175  if (vm.count("license-path")) {
1176  LOG(INFO) << "License key path set to '" << license_path << "'";
1177  }
1178  g_read_only = read_only;
1179  LOG(INFO) << " Server read-only mode is " << read_only << " (--read-only)";
1180  if (g_multi_instance) {
1181  LOG(INFO) << " Multiple servers per --data directory is " << g_multi_instance
1182  << " (--multi-instance)";
1183  }
1185  LOG(WARNING) << " Allowing invalid reads from the literal buffer. May cause invalid "
1186  "query results! (--allow-invalid-literal-buffer-reads)";
1187  }
1188 #if DISABLE_CONCURRENCY
1189  LOG(INFO) << " Threading layer: serial";
1190 #elif ENABLE_TBB
1191  LOG(INFO) << " Threading layer: TBB";
1192 #else
1193  LOG(INFO) << " Threading layer: std";
1194 #endif
1195  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
1196  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
1197  if (enable_dynamic_watchdog) {
1198  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
1199  }
1200  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
1201  if (enable_runtime_query_interrupt) {
1202  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
1203  << pending_query_interrupt_freq << " (in ms.)";
1204  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
1205  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
1206  }
1207  LOG(INFO) << " Non-kernel time query interrupt is set to "
1208  << enable_non_kernel_time_query_interrupt;
1209 
1210  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
1211  LOG(INFO) << " LogUserId is set to " << Catalog_Namespace::g_log_user_id;
1212  LOG(INFO) << " Maximum idle session duration " << idle_session_duration;
1213  LOG(INFO) << " Maximum active session duration " << max_session_duration;
1214  LOG(INFO) << " Maximum number of sessions " << system_parameters.num_sessions;
1215 
1216  LOG(INFO) << "Legacy delimited import is set to " << g_enable_legacy_delimited_import;
1217 #ifdef ENABLE_IMPORT_PARQUET
1218  LOG(INFO) << "Legacy parquet import is set to " << g_enable_legacy_parquet_import;
1219 #endif
1220  LOG(INFO) << "FSI regex parsed import is set to " << g_enable_fsi_regex_import;
1221 
1222  LOG(INFO) << "Allowed import paths is set to " << allowed_import_paths;
1223  LOG(INFO) << "Allowed export paths is set to " << allowed_export_paths;
1225  base_path, allowed_import_paths, allowed_export_paths);
1226 
1229  ddl_utils::FilePathBlacklist::addToBlacklist(base_path + "/temporary/" +
1236  g_enable_s3_fsi = false;
1237 
1239 #ifdef ENABLE_IMPORT_PARQUET
1240  !g_enable_legacy_parquet_import ||
1241 #endif
1243  g_enable_fsi =
1244  true; // a requirement for FSI import code-paths is for FSI to be enabled
1245  }
1246 
1247  if (disk_cache_level == "foreign_tables") {
1248  if (g_enable_fsi) {
1249  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::fsi;
1250  LOG(INFO) << "Disk cache enabled for foreign tables only";
1251  } else {
1252  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1253  "disk cache disabled";
1254  }
1255  } else if (disk_cache_level == "all") {
1256  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::all;
1257  LOG(INFO) << "Disk cache enabled for all tables";
1258  } else if (disk_cache_level == "local_tables") {
1259  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::non_fsi;
1260  LOG(INFO) << "Disk cache enabled for non-FSI tables";
1261  } else if (disk_cache_level == "none") {
1262  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::none;
1263  LOG(INFO) << "Disk cache disabled";
1264  } else {
1265  throw std::runtime_error{
1266  "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1267  ". Valid options are 'foreign_tables', "
1268  "'local_tables', 'none', and 'all'."};
1269  }
1270 
1271  if (disk_cache_config.size_limit < File_Namespace::CachingFileMgr::getMinimumSize()) {
1272  throw std::runtime_error{"disk-cache-size must be at least " +
1274  }
1275 
1276  if (disk_cache_config.path.empty()) {
1277  disk_cache_config.path = base_path + "/" + shared::kDefaultDiskCacheDirName;
1278  }
1279  ddl_utils::FilePathBlacklist::addToBlacklist(disk_cache_config.path);
1280 
1283 
1284  // If passed in, blacklist all security config files
1285  addOptionalFileToBlacklist(license_path);
1286  addOptionalFileToBlacklist(system_parameters.ssl_cert_file);
1287  addOptionalFileToBlacklist(authMetadata.ca_file_name);
1288  addOptionalFileToBlacklist(system_parameters.ssl_trust_store);
1289  addOptionalFileToBlacklist(system_parameters.ssl_keystore);
1290  addOptionalFileToBlacklist(system_parameters.ssl_key_file);
1291  addOptionalFileToBlacklist(system_parameters.ssl_trust_ca_file);
1292  addOptionalFileToBlacklist(cluster_file);
1293 
1294  if (g_vacuum_min_selectivity < 0) {
1295  throw std::runtime_error{"vacuum-min-selectivity cannot be less than 0."};
1296  }
1297  LOG(INFO) << "Vacuum Min Selectivity: " << g_vacuum_min_selectivity;
1298 
1299  LOG(INFO) << "Enable system tables is set to " << g_enable_system_tables;
1300  if (g_enable_system_tables) {
1301  // System tables currently reuse FSI infrastructure and therefore, require FSI to be
1302  // enabled
1303  if (!g_enable_fsi) {
1304  g_enable_fsi = true;
1305  LOG(INFO) << "FSI has been enabled as a side effect of enabling system tables";
1306  }
1307  }
1308  LOG(INFO) << "Enable FSI is set to " << g_enable_fsi;
1309  LOG(INFO) << "Enable logs system tables set to " << g_enable_logs_system_tables;
1310 
1312  throw std::runtime_error{
1313  "Invalid value provided for the \"logs-system-tables-max-files-count\" "
1314  "option. Value must be greater than 0."};
1315  }
1316  LOG(INFO) << "Maximum number of logs system table files set to "
1318 
1319 #ifdef ENABLE_MEMKIND
1320  if (g_enable_tiered_cpu_mem) {
1321  if (g_pmem_path == "") {
1322  throw std::runtime_error{"pmem-path must be set to use tiered cpu memory"};
1323  }
1324  if (g_pmem_size == 0) {
1325  throw std::runtime_error{"pmem-size must be set to use tiered cpu memory"};
1326  }
1327  if (!std::filesystem::exists(g_pmem_path.c_str())) {
1328  throw std::runtime_error{"path to PMem directory (" + g_pmem_path +
1329  ") does not exist."};
1330  }
1331  }
1332 #endif
1333 }
1334 
1336  const bool enable_runtime_udfs,
1337  const bool enable_udf_registration_for_all_users) {
1338  return enable_runtime_udfs
1339  ? (enable_udf_registration_for_all_users
1344 }
1345 
1347  int argc,
1348  char const* const* argv,
1349  const bool should_init_logging) {
1350  po::options_description all_desc("All options");
1351  all_desc.add(help_desc).add(developer_desc);
1352 
1353  try {
1354  po::store(po::command_line_parser(argc, argv)
1355  .options(all_desc)
1356  .positional(positional_options)
1357  .run(),
1358  vm);
1359  po::notify(vm);
1360 
1361  if (vm.count("help")) {
1362  std::cerr << "Usage: heavydb <data directory path> [-p <port number>] "
1363  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1364  << std::endl
1365  << std::endl;
1366  std::cout << help_desc << std::endl;
1367  return 0;
1368  }
1369  if (vm.count("dev-options")) {
1370  std::cout << "Usage: heavydb <data directory path> [-p <port number>] "
1371  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1372  << std::endl
1373  << std::endl;
1374  std::cout << developer_desc << std::endl;
1375  return 0;
1376  }
1377  if (vm.count("version")) {
1378  std::cout << "HeavyDB Version: " << MAPD_RELEASE << std::endl;
1379  return 0;
1380  }
1381 
1382  if (vm.count("config")) {
1383  std::ifstream settings_file(system_parameters.config_file);
1384 
1385  auto sanitized_settings = sanitize_config_file(settings_file);
1386 
1387  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
1388  po::notify(vm);
1389  settings_file.close();
1390  }
1391 
1392  if (!g_enable_union) {
1393  std::cerr
1394  << "The enable-union option is DEPRECATED and is now enabled by default. "
1395  "Please remove use of this option, as it may be disabled in the future."
1396  << std::endl;
1397  }
1398 
1399  // Trim base path before executing migration
1400  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1401 
1402  // Execute rebrand migration before accessing any system files.
1403  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
1404  if (!boost::filesystem::exists(lockfiles_path)) {
1405  if (!boost::filesystem::create_directory(lockfiles_path)) {
1406  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
1407  " subdirectory under "
1408  << base_path << std::endl;
1409  return 1;
1410  }
1411  }
1412  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
1413  if (!boost::filesystem::exists(lockfiles_path2)) {
1414  if (!boost::filesystem::create_directory(lockfiles_path2)) {
1415  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1416  shared::kCatalogDirectoryName + " subdirectory under "
1417  << base_path << std::endl;
1418  return 1;
1419  }
1420  }
1421  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
1422  if (!boost::filesystem::exists(lockfiles_path3)) {
1423  if (!boost::filesystem::create_directory(lockfiles_path3)) {
1424  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1425  shared::kDataDirectoryName + " subdirectory under "
1426  << base_path << std::endl;
1427  return 1;
1428  }
1429  }
1433  }
1434 
1435  if (!vm["enable-runtime-udf"].defaulted()) {
1436  if (!vm["enable-runtime-udfs"].defaulted()) {
1437  std::cerr << "Usage Error: Both enable-runtime-udf and enable-runtime-udfs "
1438  "specified. Please remove use of the enable-runtime-udfs flag, "
1439  "as it will be deprecated in the future."
1440  << std::endl;
1441  return 1;
1442  } else {
1443  enable_runtime_udfs = enable_runtime_udf;
1444  std::cerr << "The enable-runtime-udf flag has been deprecated and replaced "
1445  "with enable-runtime-udfs. Please remove use of this option "
1446  "as it will be disabled in the future."
1447  << std::endl;
1448  }
1449  }
1450  system_parameters.runtime_udf_registration_policy =
1451  construct_runtime_udf_registration_policy(enable_runtime_udfs,
1452  enable_udf_registration_for_all_users);
1453 
1454  if (should_init_logging) {
1455  init_logging();
1456  }
1457 
1458  if (!trim_and_check_file_exists(system_parameters.ssl_cert_file, "ssl cert file")) {
1459  return 1;
1460  }
1461  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
1462  return 1;
1463  }
1464  if (!trim_and_check_file_exists(system_parameters.ssl_trust_store,
1465  "ssl trust store")) {
1466  return 1;
1467  }
1468  if (!trim_and_check_file_exists(system_parameters.ssl_keystore, "ssl key store")) {
1469  return 1;
1470  }
1471  if (!trim_and_check_file_exists(system_parameters.ssl_key_file, "ssl key file")) {
1472  return 1;
1473  }
1474  if (!trim_and_check_file_exists(system_parameters.ssl_trust_ca_file, "ssl ca file")) {
1475  return 1;
1476  }
1477 
1478  g_enable_watchdog = enable_watchdog;
1480  watchdog_none_encoded_string_translation_limit;
1481  g_enable_dynamic_watchdog = enable_dynamic_watchdog;
1482  g_dynamic_watchdog_time_limit = dynamic_watchdog_time_limit;
1483  g_enable_runtime_query_interrupt = enable_runtime_query_interrupt;
1484  g_enable_non_kernel_time_query_interrupt = enable_non_kernel_time_query_interrupt;
1485  g_pending_query_interrupt_freq = pending_query_interrupt_freq;
1486  g_running_query_interrupt_freq = running_query_interrupt_freq;
1487  g_use_estimator_result_cache = use_estimator_result_cache;
1488  g_enable_data_recycler = enable_data_recycler;
1489  g_use_hashtable_cache = use_hashtable_cache;
1490  g_max_cacheable_hashtable_size_bytes = max_cacheable_hashtable_size_bytes;
1491  g_hashtable_cache_total_bytes = hashtable_cache_total_bytes;
1492 
1493  } catch (po::error& e) {
1494  std::cerr << "Usage Error: " << e.what() << std::endl;
1495  return 1;
1496  }
1497 
1498  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1499  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
1500  return 1;
1501  }
1502 
1503  if (!g_from_table_reordering) {
1504  LOG(INFO) << " From clause table reordering is disabled";
1505  }
1506 
1508  LOG(INFO) << " Filter push down for JOIN is enabled";
1509  }
1510 
1511  if (vm.count("udf")) {
1512  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
1513 
1514  if (!boost::filesystem::exists(udf_file_name)) {
1515  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
1516  return 1;
1517  }
1518 
1519  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
1520  }
1521 
1522  if (vm.count("udf-compiler-path")) {
1523  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
1524  }
1525 
1526  auto trim_string = [](std::string& s) {
1527  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
1528  };
1529 
1530  if (vm.count("udf-compiler-options")) {
1531  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1532  }
1533 
1534  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
1535  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
1536  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
1537  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1538  boost::is_any_of("\"'"));
1539 
1540  if (!system_parameters.ha_group_id.empty()) {
1541  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
1542  if (system_parameters.ha_unique_server_id.empty()) {
1543  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
1544  return 5;
1545  } else {
1546  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
1547  }
1548  if (system_parameters.ha_brokers.empty()) {
1549  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
1550  return 6;
1551  } else {
1552  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
1553  }
1554  if (system_parameters.ha_shared_data.empty()) {
1555  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
1556  return 7;
1557  } else {
1558  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
1559  }
1560  }
1561 
1562  boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of("\"'"));
1563  if (!system_parameters.master_address.empty()) {
1564  if (!read_only) {
1565  LOG(ERROR) << "The master-address setting is only allowed in read-only mode";
1566  return 9;
1567  }
1568  LOG(INFO) << " Master Address is " << system_parameters.master_address;
1569  LOG(INFO) << " Master Port is " << system_parameters.master_port;
1570  }
1571 
1572  if (g_max_import_threads < 1) {
1573  std::cerr << "max-import-threads must be >= 1 (was set to " << g_max_import_threads
1574  << ")." << std::endl;
1575  return 8;
1576  } else {
1577  LOG(INFO) << " Max import threads " << g_max_import_threads;
1578  }
1579 
1580  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
1581  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
1582  LOG(INFO) << " Min CPU buffer pool slab size " << system_parameters.min_cpu_slab_size;
1583  LOG(INFO) << " Max CPU buffer pool slab size " << system_parameters.max_cpu_slab_size;
1584  LOG(INFO) << " Min GPU buffer pool slab size " << system_parameters.min_gpu_slab_size;
1585  LOG(INFO) << " Max GPU buffer pool slab size " << system_parameters.max_gpu_slab_size;
1586  LOG(INFO) << " calcite JVM max memory " << system_parameters.calcite_max_mem;
1587  LOG(INFO) << " HeavyDB Server Port " << system_parameters.omnisci_server_port;
1588  LOG(INFO) << " HeavyDB Calcite Port " << system_parameters.calcite_port;
1589  LOG(INFO) << " Enable Calcite view optimize "
1590  << system_parameters.enable_calcite_view_optimize;
1591  LOG(INFO) << " Allow Local Auth Fallback: "
1592  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
1593  LOG(INFO) << " ParallelTop min threshold: " << g_parallel_top_min;
1594  LOG(INFO) << " ParallelTop watchdog max: " << g_parallel_top_max;
1595 
1596  LOG(INFO) << " Enable Data Recycler: "
1597  << (g_enable_data_recycler ? "enabled" : "disabled");
1598  if (g_enable_data_recycler) {
1599  LOG(INFO) << " \t Use hashtable cache: "
1600  << (g_use_hashtable_cache ? "enabled" : "disabled");
1601  if (g_use_hashtable_cache) {
1602  LOG(INFO) << " \t\t Total amount of bytes that hashtable cache keeps: "
1603  << g_hashtable_cache_total_bytes / (1024 * 1024) << " MB.";
1604  LOG(INFO) << " \t\t Per-hashtable size limit: "
1605  << g_max_cacheable_hashtable_size_bytes / (1024 * 1024) << " MB.";
1606  }
1607  LOG(INFO) << " \t Use query resultset cache: "
1608  << (g_use_query_resultset_cache ? "enabled" : "disabled");
1610  LOG(INFO) << " \t\t Total amount of bytes that query resultset cache keeps: "
1611  << g_query_resultset_cache_total_bytes / (1024 * 1024) << " MB.";
1612  LOG(INFO) << " \t\t Per-query resultset size limit: "
1613  << g_max_cacheable_query_resultset_size_bytes / (1024 * 1024) << " MB.";
1614  }
1615  LOG(INFO) << " \t\t Use auto query resultset caching: "
1616  << (g_allow_auto_resultset_caching ? "enabled" : "disabled");
1618  LOG(INFO) << " \t\t\t The maximum bytes of a query resultset which is "
1619  "automatically cached: "
1620  << g_auto_resultset_caching_threshold << " Bytes.";
1621  }
1622  LOG(INFO) << " \t\t Use query step skipping: "
1623  << (g_allow_query_step_skipping ? "enabled" : "disabled");
1624  LOG(INFO) << " \t Use chunk metadata cache: "
1625  << (g_use_chunk_metadata_cache ? "enabled" : "disabled");
1626  }
1627 
1628  const std::string udf_reg_policy_log_prefix{
1629  " \t\t Runtime UDF/UDTF Registration Policy: "};
1630  switch (system_parameters.runtime_udf_registration_policy) {
1632  LOG(INFO) << udf_reg_policy_log_prefix << " DISALLOWED";
1633  break;
1634  }
1636  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for superusers only";
1637  break;
1638  }
1640  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for all users";
1641  break;
1642  }
1643  default: {
1644  UNREACHABLE() << "Unrecognized option for Runtime UDF/UDTF registration policy.";
1645  }
1646  }
1647 
1648  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
1649  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
1650  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
1651  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
1652  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
1653 
1654  return boost::none;
1655 }
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
bool g_enable_parallel_window_partition_sort
unsigned connect_timeout
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
double g_running_query_interrupt_freq
Definition: Execute.cpp:129
bool g_enable_smem_group_by
size_t g_pmem_size
size_t g_parallel_top_max
Definition: ResultSet.cpp:48
int safe_open(const char *path, int flags, mode_t mode) noexcept
Definition: heavyai_fs.cpp:90
float g_filter_push_down_low_frac
Definition: Execute.cpp:96
const std::string kDataDirectoryName
bool g_use_query_resultset_cache
Definition: Execute.cpp:148
bool g_multi_instance
Definition: heavyai_locks.h:21
size_t g_cpu_sub_task_size
Definition: Execute.cpp:83
SystemParameters::RuntimeUdfRegistrationPolicy construct_runtime_udf_registration_policy(const bool enable_runtime_udfs, const bool enable_udf_registration_for_all_users)
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
bool g_strip_join_covered_quals
Definition: Execute.cpp:107
bool g_enable_logs_system_tables
Definition: Catalog.cpp:100
bool g_enable_direct_columnarization
Definition: Execute.cpp:122
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:841
bool g_enable_lazy_fetch
Definition: Execute.cpp:124
const std::string kDefaultDiskCacheDirName
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:81
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:128
#define LOG(tag)
Definition: Logger.h:216
bool g_allow_query_step_skipping
Definition: Execute.cpp:151
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
Definition: Logger.cpp:17
const std::string kDefaultLogDirName
#define UNREACHABLE()
Definition: Logger.h:266
const std::string kSystemCatalogName
unsigned g_cpu_threads_override
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:98
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
size_t g_streaming_topn_max
Definition: ResultSet.cpp:49
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:152
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:89
bool g_enable_geo_ops_on_uncompressed_coords
Definition: Execute.cpp:114
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:126
int g_hll_precision_bits
bool g_enable_data_recycler
Definition: Execute.cpp:146
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:91
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:149
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:153
bool g_enable_string_functions
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:138
size_t g_watchdog_none_encoded_string_translation_limit
Definition: Execute.cpp:81
bool g_null_div_by_zero
Definition: Execute.cpp:88
bool g_enable_interop
size_t g_parallel_top_min
Definition: ResultSet.cpp:47
bool g_enable_columnar_output
Definition: Execute.cpp:99
ssize_t safe_write(const int fd, const void *buffer, const size_t buffer_size) noexcept
Definition: heavyai_fs.cpp:144
bool g_enable_s3_fsi
Definition: Catalog.cpp:97
bool g_enable_idp_temporary_users
Definition: SysCatalog.cpp:63
bool g_from_table_reordering
Definition: Execute.cpp:90
size_t g_window_function_aggregation_tree_fanout
static void setDefaultImportPath(const std::string &base_path)
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_assign_render_groups
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:104
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
void init(LogOptions const &log_opts)
Definition: Logger.cpp:308
bool g_enable_http_binary_server
std::string g_pmem_path
static bool migrationEnabled()
Definition: MigrationMgr.h:43
float g_filter_push_down_high_frac
Definition: Execute.cpp:97
bool g_enable_distance_rangejoin
Definition: Execute.cpp:103
bool g_bigint_count
bool g_enable_watchdog
int64_t g_bitmap_memory_limit
size_t g_max_memory_allocation_size
Definition: Execute.cpp:116
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:106
size_t g_approx_quantile_buffer
Definition: Execute.cpp:158
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:150
size_t g_max_log_length
Definition: Execute.cpp:163
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:105
bool g_enable_dev_table_functions
Definition: Execute.cpp:113
Global bool for controlling render group assignment, remove along with legacy poly rendering...
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:154
bool g_enable_window_functions
Definition: Execute.cpp:111
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:155
size_t g_min_memory_allocation_size
Definition: Execute.cpp:117
bool with_keepalive
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool g_read_only
Definition: File.cpp:40
bool g_enable_seconds_refresh
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:85
size_t g_estimator_failure_max_groupby_size
tuple line
Definition: parse_ast.py:10
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:135
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:161
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:84
bool g_cache_string_hash
const std::string kCatalogDirectoryName
float g_fraction_code_cache_to_evict
bool g_allow_invalid_literal_buffer_reads
Definition: ConstantIR.cpp:140
bool g_allow_system_dashboard_update
Definition: DBHandler.cpp:121
bool g_enable_filter_push_down
Definition: Execute.cpp:95
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
const std::string kDefaultLicenseFileName
bool g_enable_bump_allocator
Definition: Execute.cpp:120
bool g_enable_parallel_window_partition_compute
bool g_enable_union
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:82
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:87
bool g_allow_cpu_retry
Definition: Execute.cpp:86
int32_t ftruncate(const int32_t fd, int64_t length)
Definition: heavyai_fs.cpp:86
size_t g_approx_quantile_centroids
Definition: Execute.cpp:159
const std::string kLockfilesDirectoryName
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:888
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:42
bool g_optimize_row_initialization
Definition: Execute.cpp:101
static bool run
int safe_fcntl(int fd, int cmd, struct flock *fl) noexcept
Definition: heavyai_fs.cpp:112
bool g_columnar_large_projections
int safe_close(int fd) noexcept
Definition: heavyai_fs.cpp:101
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:85
bool g_enable_fsi
Definition: Catalog.cpp:96
size_t g_columnar_large_projections_threshold
bool g_query_engine_cuda_streams
Definition: QueryEngine.h:9
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:289
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:87
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:125
size_t g_max_import_threads
Definition: Importer.cpp:106
bool g_use_hashtable_cache
Definition: Execute.cpp:147
#define VLOG(n)
Definition: Logger.h:316
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:156
size_t g_large_ndv_multiplier
bool g_enable_table_functions
Definition: Execute.cpp:112
std::string cluster_command_line_arg
size_t g_gpu_smem_threshold
Definition: Execute.cpp:130