OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <filesystem>
22 #include <iostream>
23 #include <string>
24 
25 using namespace std::string_literals;
26 
27 #include "CommandLineOptions.h"
28 #include "LeafHostInfo.h"
30 #include "MapDRelease.h"
33 #include "Shared/Compressor.h"
34 #include "Shared/SysDefinitions.h"
37 #include "Utils/DdlUtils.h"
38 
39 #ifdef _WIN32
40 #include <io.h>
41 #include <process.h>
42 #endif
43 
44 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
45 
46 extern std::string cluster_command_line_arg;
47 
49 
50 extern bool g_use_table_device_offset;
52 extern bool g_cache_string_hash;
55 extern int64_t g_large_ndv_threshold;
56 extern size_t g_large_ndv_multiplier;
57 extern int64_t g_bitmap_memory_limit;
58 extern bool g_enable_seconds_refresh;
59 extern size_t g_approx_quantile_buffer;
60 extern size_t g_approx_quantile_centroids;
61 extern size_t g_parallel_top_min;
62 extern size_t g_parallel_top_max;
63 extern size_t g_streaming_topn_max;
67 extern bool g_enable_system_tables;
69 extern bool g_enable_logs_system_tables;
70 extern std::string g_logs_system_tables_refresh_interval;
72 #ifdef ENABLE_MEMKIND
73 extern std::string g_pmem_path;
74 #endif
75 
76 namespace Catalog_Namespace {
77 extern bool g_log_user_id;
78 }
79 
80 unsigned connect_timeout{20000};
81 unsigned recv_timeout{300000};
82 unsigned send_timeout{300000};
83 bool with_keepalive{false};
85 
87  if (verbose_logging && logger::Severity::DEBUG1 < log_options_.severity_) {
88  log_options_.severity_ = logger::Severity::DEBUG1;
89  }
90  validate_base_path();
91  log_options_.set_base_path(base_path);
92  logger::init(log_options_);
93 }
94 
96  help_desc.add_options()("help,h", "Show available options.");
97  help_desc.add_options()(
98  "allow-cpu-retry",
99  po::value<bool>(&g_allow_cpu_retry)
100  ->default_value(g_allow_cpu_retry)
101  ->implicit_value(true),
102  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
103  help_desc.add_options()("allow-loop-joins",
104  po::value<bool>(&allow_loop_joins)
105  ->default_value(allow_loop_joins)
106  ->implicit_value(true),
107  "Enable loop joins.");
108  help_desc.add_options()("bigint-count",
109  po::value<bool>(&g_bigint_count)
110  ->default_value(g_bigint_count)
111  ->implicit_value(true),
112  "Use 64-bit count.");
113  help_desc.add_options()("calcite-max-mem",
114  po::value<size_t>(&system_parameters.calcite_max_mem)
115  ->default_value(system_parameters.calcite_max_mem),
116  "Max memory available to calcite JVM.");
117  if (!dist_v5_) {
118  help_desc.add_options()("calcite-port",
119  po::value<int>(&system_parameters.calcite_port)
120  ->default_value(system_parameters.calcite_port),
121  "Calcite port number.");
122  }
123  help_desc.add_options()("config",
124  po::value<std::string>(&system_parameters.config_file),
125  "Path to server configuration file.");
126  help_desc.add_options()("cpu-buffer-mem-bytes",
127  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
128  ->default_value(system_parameters.cpu_buffer_mem_bytes),
129  "Size of memory reserved for CPU buffers, in bytes.");
130 
131  help_desc.add_options()("cpu-only",
132  po::value<bool>(&system_parameters.cpu_only)
133  ->default_value(system_parameters.cpu_only)
134  ->implicit_value(true),
135  "Run on CPU only, even if GPUs are available.");
136  help_desc.add_options()("cuda-block-size",
137  po::value<size_t>(&system_parameters.cuda_block_size)
138  ->default_value(system_parameters.cuda_block_size),
139  "Size of block to use on GPU.");
140  help_desc.add_options()("cuda-grid-size",
141  po::value<size_t>(&system_parameters.cuda_grid_size)
142  ->default_value(system_parameters.cuda_grid_size),
143  "Size of grid to use on GPU.");
144  if (!dist_v5_) {
145  help_desc.add_options()(
146  "data",
147  po::value<std::string>(&base_path)->required()->default_value("storage"),
148  "Directory path to HeavyDB data storage (catalogs, raw data, log files, etc).");
149  positional_options.add("data", 1);
150  }
151  help_desc.add_options()("db-query-list",
152  po::value<std::string>(&db_query_file),
153  "Path to file containing HeavyDB warmup queries.");
154  help_desc.add_options()(
155  "exit-after-warmup",
156  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
157  "Exit after HeavyDB warmup queries.");
158  help_desc.add_options()("dynamic-watchdog-time-limit",
159  po::value<unsigned>(&dynamic_watchdog_time_limit)
160  ->default_value(dynamic_watchdog_time_limit)
161  ->implicit_value(10000),
162  "Dynamic watchdog time limit, in milliseconds.");
163  help_desc.add_options()("enable-data-recycler",
164  po::value<bool>(&enable_data_recycler)
165  ->default_value(enable_data_recycler)
166  ->implicit_value(true),
167  "Use data recycler.");
168  help_desc.add_options()("use-hashtable-cache",
169  po::value<bool>(&use_hashtable_cache)
170  ->default_value(use_hashtable_cache)
171  ->implicit_value(true),
172  "Use hashtable cache.");
173  help_desc.add_options()("use-query-resultset-cache",
174  po::value<bool>(&g_use_query_resultset_cache)
175  ->default_value(g_use_query_resultset_cache)
176  ->implicit_value(true),
177  "Use query resultset cache.");
178  help_desc.add_options()("use-chunk-metadata-cache",
179  po::value<bool>(&g_use_chunk_metadata_cache)
180  ->default_value(g_use_chunk_metadata_cache)
181  ->implicit_value(true),
182  "Use chunk metadata cache.");
183  help_desc.add_options()(
184  "hashtable-cache-total-bytes",
185  po::value<size_t>(&hashtable_cache_total_bytes)
186  ->default_value(hashtable_cache_total_bytes)
187  ->implicit_value(4294967296),
188  "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
189  help_desc.add_options()("max-cacheable-hashtable-size-bytes",
190  po::value<size_t>(&max_cacheable_hashtable_size_bytes)
191  ->default_value(max_cacheable_hashtable_size_bytes)
192  ->implicit_value(2147483648),
193  "The maximum size of hashtable that is available to cache, in "
194  "bytes (default: 2GB).");
195  help_desc.add_options()(
196  "query-resultset-cache-total-bytes",
197  po::value<size_t>(&g_query_resultset_cache_total_bytes)
198  ->default_value(g_query_resultset_cache_total_bytes),
199  "Size of total memory space for query resultset cache, in bytes (default: 4GB).");
200  help_desc.add_options()(
201  "max-query-resultset-size-bytes",
204  "The maximum size of query resultset that is available to cache, in "
205  "bytes (default: 2GB).");
206  help_desc.add_options()("allow-auto-query-resultset-caching",
207  po::value<bool>(&g_allow_auto_resultset_caching)
208  ->default_value(g_allow_auto_resultset_caching)
209  ->implicit_value(true),
210  "Allow automatic query resultset caching when the size of "
211  "query resultset is smaller or equal to the threshold defined "
212  "by `auto-resultset-caching-threshold-bytes`, in bytes (to "
213  "enable this, query resultset recycler "
214  "should be enabled, default: 1048576 bytes (or 1MB)).");
215  help_desc.add_options()(
216  "auto-resultset-caching-threshold-bytes",
217  po::value<size_t>(&g_auto_resultset_caching_threshold)
218  ->default_value(g_auto_resultset_caching_threshold),
219  "A threshold that allows caching query resultset automatically if the size of "
220  "resultset is less than it, in bytes (default: 1MB).");
221  help_desc.add_options()("allow-query-step-skipping",
222  po::value<bool>(&g_allow_query_step_skipping)
223  ->default_value(g_allow_query_step_skipping)
224  ->implicit_value(true),
225  "Allow query step skipping when multi-step query has at least "
226  "one cached query resultset.");
227  help_desc.add_options()("enable-debug-timer",
228  po::value<bool>(&g_enable_debug_timer)
229  ->default_value(g_enable_debug_timer)
230  ->implicit_value(true),
231  "Enable debug timer logging.");
232  help_desc.add_options()("enable-dynamic-watchdog",
233  po::value<bool>(&enable_dynamic_watchdog)
234  ->default_value(enable_dynamic_watchdog)
235  ->implicit_value(true),
236  "Enable dynamic watchdog.");
237  help_desc.add_options()("enable-filter-push-down",
238  po::value<bool>(&g_enable_filter_push_down)
239  ->default_value(g_enable_filter_push_down)
240  ->implicit_value(true),
241  "Enable filter push down through joins.");
242  help_desc.add_options()("enable-overlaps-hashjoin",
243  po::value<bool>(&g_enable_overlaps_hashjoin)
244  ->default_value(g_enable_overlaps_hashjoin)
245  ->implicit_value(true),
246  "Enable the overlaps hash join framework allowing for range "
247  "join (e.g. spatial overlaps) computation using a hash table.");
248  help_desc.add_options()("enable-hashjoin-many-to-many",
249  po::value<bool>(&g_enable_hashjoin_many_to_many)
250  ->default_value(g_enable_hashjoin_many_to_many)
251  ->implicit_value(true),
252  "Enable the overlaps hash join framework allowing for range "
253  "join (e.g. spatial overlaps) computation using a hash table.");
254  help_desc.add_options()("enable-distance-rangejoin",
255  po::value<bool>(&g_enable_distance_rangejoin)
256  ->default_value(g_enable_distance_rangejoin)
257  ->implicit_value(true),
258  "Enable accelerating point distance joins with a hash table. "
259  "This rewrites ST_Distance when using an upperbound (<= X).");
260  help_desc.add_options()("enable-runtime-query-interrupt",
261  po::value<bool>(&enable_runtime_query_interrupt)
262  ->default_value(enable_runtime_query_interrupt)
263  ->implicit_value(true),
264  "Enable runtime query interrupt.");
265  help_desc.add_options()("enable-non-kernel-time-query-interrupt",
266  po::value<bool>(&enable_non_kernel_time_query_interrupt)
267  ->default_value(enable_non_kernel_time_query_interrupt)
268  ->implicit_value(true),
269  "Enable non-kernel time query interrupt.");
270  help_desc.add_options()("pending-query-interrupt-freq",
271  po::value<unsigned>(&pending_query_interrupt_freq)
272  ->default_value(pending_query_interrupt_freq)
273  ->implicit_value(1000),
274  "A frequency of checking the request of pending query "
275  "interrupt from user (in millisecond).");
276  help_desc.add_options()(
277  "running-query-interrupt-freq",
278  po::value<double>(&running_query_interrupt_freq)
279  ->default_value(running_query_interrupt_freq)
280  ->implicit_value(0.5),
281  "A frequency of checking the request of running query "
282  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
283  help_desc.add_options()("use-estimator-result-cache",
284  po::value<bool>(&use_estimator_result_cache)
285  ->default_value(use_estimator_result_cache)
286  ->implicit_value(true),
287  "Use estimator result cache.");
288  if (!dist_v5_) {
289  help_desc.add_options()(
290  "enable-string-dict-hash-cache",
291  po::value<bool>(&g_cache_string_hash)
292  ->default_value(g_cache_string_hash)
293  ->implicit_value(true),
294  "Cache string hash values in the string dictionary server during import.");
295  }
296  help_desc.add_options()(
297  "enable-thrift-logs",
298  po::value<bool>(&g_enable_thrift_logs)
299  ->default_value(g_enable_thrift_logs)
300  ->implicit_value(true),
301  "Enable writing messages directly from thrift to stdout/stderr.");
302  help_desc.add_options()("enable-watchdog",
303  po::value<bool>(&enable_watchdog)
304  ->default_value(enable_watchdog)
305  ->implicit_value(true),
306  "Enable watchdog.");
307  help_desc.add_options()(
308  "watchdog-none-encoded-string-translation-limit",
309  po::value<size_t>(&watchdog_none_encoded_string_translation_limit)
310  ->default_value(watchdog_none_encoded_string_translation_limit),
311  "Max number of none-encoded strings allowed to be translated "
312  "to dictionary-encoded with watchdog enabled");
313  help_desc.add_options()(
314  "filter-push-down-low-frac",
315  po::value<float>(&g_filter_push_down_low_frac)
316  ->default_value(g_filter_push_down_low_frac)
317  ->implicit_value(g_filter_push_down_low_frac),
318  "Lower threshold for selectivity of filters that are pushed down.");
319  help_desc.add_options()(
320  "filter-push-down-high-frac",
321  po::value<float>(&g_filter_push_down_high_frac)
322  ->default_value(g_filter_push_down_high_frac)
323  ->implicit_value(g_filter_push_down_high_frac),
324  "Higher threshold for selectivity of filters that are pushed down.");
325  help_desc.add_options()("filter-push-down-passing-row-ubound",
326  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
328  ->implicit_value(g_filter_push_down_passing_row_ubound),
329  "Upperbound on the number of rows that should pass the filter "
330  "if the selectivity is less than "
331  "the high fraction threshold.");
332  help_desc.add_options()("from-table-reordering",
333  po::value<bool>(&g_from_table_reordering)
334  ->default_value(g_from_table_reordering)
335  ->implicit_value(true),
336  "Enable automatic table reordering in FROM clause.");
337  help_desc.add_options()("gpu-buffer-mem-bytes",
338  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
339  ->default_value(system_parameters.gpu_buffer_mem_bytes),
340  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
341  help_desc.add_options()("gpu-input-mem-limit",
342  po::value<double>(&system_parameters.gpu_input_mem_limit)
343  ->default_value(system_parameters.gpu_input_mem_limit),
344  "Force query to CPU when input data memory usage exceeds this "
345  "percentage of available GPU memory.");
346  help_desc.add_options()(
347  "hll-precision-bits",
348  po::value<int>(&g_hll_precision_bits)
349  ->default_value(g_hll_precision_bits)
350  ->implicit_value(g_hll_precision_bits),
351  "Number of bits used from the hash value used to specify the bucket number.");
352  if (!dist_v5_) {
353  help_desc.add_options()("http-port",
354  po::value<int>(&http_port)->default_value(http_port),
355  "HTTP port number.");
356  help_desc.add_options()(
357  "http-binary-port",
358  po::value<int>(&http_binary_port)->default_value(http_binary_port),
359  "HTTP binary port number.");
360  }
361  help_desc.add_options()(
362  "idle-session-duration",
363  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
364  "Maximum duration of idle session.");
365  help_desc.add_options()("inner-join-fragment-skipping",
366  po::value<bool>(&g_inner_join_fragment_skipping)
367  ->default_value(g_inner_join_fragment_skipping)
368  ->implicit_value(true),
369  "Enable/disable inner join fragment skipping. This feature is "
370  "considered stable and is enabled by default. This "
371  "parameter will be removed in a future release.");
372  help_desc.add_options()(
373  "max-session-duration",
374  po::value<int>(&max_session_duration)->default_value(max_session_duration),
375  "Maximum duration of active session.");
376  help_desc.add_options()("num-sessions",
377  po::value<int>(&system_parameters.num_sessions)
378  ->default_value(system_parameters.num_sessions),
379  "Maximum number of active session.");
380  help_desc.add_options()(
381  "null-div-by-zero",
382  po::value<bool>(&g_null_div_by_zero)
383  ->default_value(g_null_div_by_zero)
384  ->implicit_value(true),
385  "Return null on division by zero instead of throwing an exception.");
386  help_desc.add_options()(
387  "num-reader-threads",
388  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
389  "Number of reader threads to use.");
390  help_desc.add_options()(
391  "max-import-threads",
392  po::value<size_t>(&g_max_import_threads)->default_value(g_max_import_threads),
393  "Max number of default import threads to use (num hardware threads will be used "
394  "instead if lower). Can be overriden with copy statement threads option).");
395  help_desc.add_options()(
396  "overlaps-max-table-size-bytes",
397  po::value<size_t>(&g_overlaps_max_table_size_bytes)
398  ->default_value(g_overlaps_max_table_size_bytes),
399  "The maximum size in bytes of the hash table for an overlaps hash join.");
400  help_desc.add_options()("overlaps-target-entries-per-bin",
401  po::value<double>(&g_overlaps_target_entries_per_bin)
402  ->default_value(g_overlaps_target_entries_per_bin),
403  "The target number of hash entries per bin for overlaps join");
404  if (!dist_v5_) {
405  help_desc.add_options()("port,p",
406  po::value<int>(&system_parameters.omnisci_server_port)
407  ->default_value(system_parameters.omnisci_server_port),
408  "TCP Port number.");
409  }
410  help_desc.add_options()("num-gpus",
411  po::value<int>(&system_parameters.num_gpus)
412  ->default_value(system_parameters.num_gpus),
413  "Number of gpus to use.");
414  help_desc.add_options()(
415  "read-only",
416  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
417  "Enable read-only mode.");
418 
419  help_desc.add_options()(
420  "res-gpu-mem",
421  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
422  "Reduces GPU memory available to the HeavyDB allocator by this amount. Used for "
423  "compiled code cache and ancillary GPU functions and other processes that may also "
424  "be using the GPU concurrent with HeavyDB.");
425 
426  help_desc.add_options()("start-gpu",
427  po::value<int>(&system_parameters.start_gpu)
428  ->default_value(system_parameters.start_gpu),
429  "First gpu to use.");
430  help_desc.add_options()("trivial-loop-join-threshold",
431  po::value<unsigned>(&g_trivial_loop_join_threshold)
432  ->default_value(g_trivial_loop_join_threshold)
433  ->implicit_value(1000),
434  "The maximum number of rows in the inner table of a loop join "
435  "considered to be trivially small.");
436  help_desc.add_options()("verbose",
437  po::value<bool>(&verbose_logging)
438  ->default_value(verbose_logging)
439  ->implicit_value(true),
440  "Write additional debug log messages to server logs.");
441  help_desc.add_options()(
442  "enable-runtime-udf",
443  po::value<bool>(&enable_runtime_udf)
444  ->default_value(enable_runtime_udf)
445  ->implicit_value(true),
446  "DEPRECATED. Please use `enable-runtime-udfs` instead as this flag will be removed "
447  "in the near future.");
448  help_desc.add_options()(
449  "enable-runtime-udfs",
450  po::value<bool>(&enable_runtime_udfs)
451  ->default_value(enable_runtime_udfs)
452  ->implicit_value(true),
453  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
454  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
455  "Compiler server, packaged separately.");
456  help_desc.add_options()("enable-udf-registration-for-all-users",
457  po::value<bool>(&enable_udf_registration_for_all_users)
458  ->default_value(enable_udf_registration_for_all_users)
459  ->implicit_value(true),
460  "Allow all users, not just superusers, to register runtime "
461  "UDFs/UDTFs. Option only valid if "
462  "`--enable-runtime-udfs` is set to true.");
463  help_desc.add_options()("version,v", "Print Version Number.");
464  help_desc.add_options()("enable-string-functions",
465  po::value<bool>(&g_enable_string_functions)
466  ->default_value(g_enable_string_functions)
467  ->implicit_value(true),
468  "Enable experimental string functions.");
469  help_desc.add_options()("enable-experimental-string-functions",
470  po::value<bool>(&g_enable_string_functions)
471  ->default_value(g_enable_string_functions)
472  ->implicit_value(true),
473  "DEPRECATED. String functions are now enabled by default, "
474  "but can still be controlled with --enable-string-functions.");
475  help_desc.add_options()(
476  "enable-fsi",
477  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
478  "Enable foreign storage interface.");
479 
480  help_desc.add_options()("enable-legacy-delimited-import",
481  po::value<bool>(&g_enable_legacy_delimited_import)
482  ->default_value(g_enable_legacy_delimited_import)
483  ->implicit_value(true),
484  "Use legacy importer for delimited sources.");
485 #ifdef ENABLE_IMPORT_PARQUET
486  help_desc.add_options()("enable-legacy-parquet-import",
487  po::value<bool>(&g_enable_legacy_parquet_import)
488  ->default_value(g_enable_legacy_parquet_import)
489  ->implicit_value(true),
490  "Use legacy importer for parquet sources.");
491 #endif
492  help_desc.add_options()("enable-fsi-regex-import",
493  po::value<bool>(&g_enable_fsi_regex_import)
494  ->default_value(g_enable_fsi_regex_import)
495  ->implicit_value(true),
496  "Use FSI importer for regex parsed sources.");
497 
498  help_desc.add_options()("enable-add-metadata-columns",
499  po::value<bool>(&g_enable_add_metadata_columns)
500  ->default_value(g_enable_add_metadata_columns)
501  ->implicit_value(true),
502  "Enable add_metadata_columns COPY FROM WITH option (Beta).");
503 
504  help_desc.add_options()("disk-cache-path",
505  po::value<std::string>(&disk_cache_config.path),
506  "Specify the path for the disk cache.");
507 
508  help_desc.add_options()(
509  "disk-cache-level",
510  po::value<std::string>(&(disk_cache_level))->default_value("foreign_tables"),
511  "Specify level of disk cache. Valid options are 'foreign_tables', "
512  "'local_tables', 'none', and 'all'.");
513 
514  help_desc.add_options()("disk-cache-size",
515  po::value<size_t>(&(disk_cache_config.size_limit)),
516  "Specify a maximum size for the disk cache in bytes.");
517 
518 #ifdef HAVE_AWS_S3
519  help_desc.add_options()(
520  "allow-s3-server-privileges",
521  po::value<bool>(&g_allow_s3_server_privileges)
522  ->default_value(g_allow_s3_server_privileges)
523  ->implicit_value(true),
524  "Allow S3 server privileges, if IAM user credentials are not provided. Credentials "
525  "may be specified with "
526  "environment variables (such as AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, etc), "
527  "an AWS credentials file, or when running on an EC2 instance, with an IAM role "
528  "that is attached to the instance.");
529 #endif // defined(HAVE_AWS_S3)
530  help_desc.add_options()(
531  "enable-interoperability",
532  po::value<bool>(&g_enable_interop)
533  ->default_value(g_enable_interop)
534  ->implicit_value(true),
535  "Enable offloading of query portions to an external execution engine.");
536  help_desc.add_options()("enable-union",
537  po::value<bool>(&g_enable_union)
538  ->default_value(g_enable_union)
539  ->implicit_value(true),
540  "DEPRECATED. UNION ALL is enabled by default. Please remove "
541  "use of this option, as it may be disabled in the future.");
542  help_desc.add_options()(
543  "calcite-service-timeout",
544  po::value<size_t>(&system_parameters.calcite_timeout)
545  ->default_value(system_parameters.calcite_timeout),
546  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
547  "schema changes or when running large numbers of parallel queries.");
548  help_desc.add_options()("calcite-service-keepalive",
549  po::value<size_t>(&system_parameters.calcite_keepalive)
550  ->default_value(system_parameters.calcite_keepalive)
551  ->implicit_value(true),
552  "Enable keepalive on Calcite connections.");
553  help_desc.add_options()(
554  "stringdict-parallelizm",
555  po::value<bool>(&g_enable_stringdict_parallel)
556  ->default_value(g_enable_stringdict_parallel)
557  ->implicit_value(true),
558  "Allow StringDictionary to parallelize loads using multiple threads");
559  help_desc.add_options()(
560  "log-user-id",
561  po::value<bool>(&Catalog_Namespace::g_log_user_id)
562  ->default_value(Catalog_Namespace::g_log_user_id)
563  ->implicit_value(true),
564  "Log userId integer in place of the userName (when available).");
565  help_desc.add_options()("log-user-origin",
566  po::value<bool>(&log_user_origin)
567  ->default_value(log_user_origin)
568  ->implicit_value(true),
569  "Lookup the origin of inbound connections by IP address/DNS "
570  "name, and print this information as part of stdlog.");
571  help_desc.add_options()(
572  "allowed-import-paths",
573  po::value<std::string>(&allowed_import_paths),
574  "List of allowed root paths that can be used in import operations.");
575  help_desc.add_options()(
576  "allowed-export-paths",
577  po::value<std::string>(&allowed_export_paths),
578  "List of allowed root paths that can be used in export operations.");
579  help_desc.add_options()("enable-system-tables",
580  po::value<bool>(&g_enable_system_tables)
581  ->default_value(g_enable_system_tables)
582  ->implicit_value(true),
583  "Enable use of system tables.");
584  help_desc.add_options()("enable-table-functions",
585  po::value<bool>(&g_enable_table_functions)
586  ->default_value(g_enable_table_functions)
587  ->implicit_value(true),
588  "Enable system table functions support.");
589  help_desc.add_options()("enable-logs-system-tables",
590  po::value<bool>(&g_enable_logs_system_tables)
591  ->default_value(g_enable_logs_system_tables)
592  ->implicit_value(true),
593  "Enable use of logs system tables.");
594  help_desc.add_options()("logs-system-tables-refresh-interval",
595  po::value<std::string>(&g_logs_system_tables_refresh_interval)
596  ->default_value(g_logs_system_tables_refresh_interval),
597  "Refresh interval for logs system tables.");
598  help_desc.add_options()(
599  "logs-system-tables-max-files-count",
600  po::value<size_t>(&g_logs_system_tables_max_files_count)
601  ->default_value(g_logs_system_tables_max_files_count),
602  "Maximum number of log files that will be processed by each logs system table.");
603 #ifdef ENABLE_MEMKIND
604  help_desc.add_options()("enable-tiered-cpu-mem",
605  po::value<bool>(&g_enable_tiered_cpu_mem)
606  ->default_value(g_enable_tiered_cpu_mem)
607  ->implicit_value(true),
608  "Enable additional tiers of CPU memory (PMEM, etc...)");
609  help_desc.add_options()("pmem-size", po::value<size_t>(&g_pmem_size)->default_value(0));
610  help_desc.add_options()("pmem-path", po::value<std::string>(&g_pmem_path));
611 #endif
612 
613  help_desc.add(log_options_.get_options());
614 }
615 
617  developer_desc.add_options()("dev-options", "Print internal developer options.");
618  developer_desc.add_options()(
619  "enable-calcite-view-optimize",
620  po::value<bool>(&system_parameters.enable_calcite_view_optimize)
621  ->default_value(system_parameters.enable_calcite_view_optimize)
622  ->implicit_value(true),
623  "Enable additional calcite (query plan) optimizations when a view is part of the "
624  "query.");
625  developer_desc.add_options()(
626  "enable-columnar-output",
627  po::value<bool>(&g_enable_columnar_output)
628  ->default_value(g_enable_columnar_output)
629  ->implicit_value(true),
630  "Enable columnar output for intermediate/final query steps.");
631  developer_desc.add_options()(
632  "enable-left-join-filter-hoisting",
633  po::value<bool>(&g_enable_left_join_filter_hoisting)
634  ->default_value(g_enable_left_join_filter_hoisting)
635  ->implicit_value(true),
636  "Enable hoisting left hand side filters through left joins.");
637  developer_desc.add_options()("optimize-row-init",
638  po::value<bool>(&g_optimize_row_initialization)
639  ->default_value(g_optimize_row_initialization)
640  ->implicit_value(true),
641  "Optimize row initialization.");
642  developer_desc.add_options()("enable-legacy-syntax",
643  po::value<bool>(&enable_legacy_syntax)
644  ->default_value(enable_legacy_syntax)
645  ->implicit_value(true),
646  "Enable legacy syntax.");
647  developer_desc.add_options()(
648  "enable-multifrag",
649  po::value<bool>(&allow_multifrag)
650  ->default_value(allow_multifrag)
651  ->implicit_value(true),
652  "Enable execution over multiple fragments in a single round-trip to GPU.");
653  developer_desc.add_options()("enable-lazy-fetch",
654  po::value<bool>(&g_enable_lazy_fetch)
655  ->default_value(g_enable_lazy_fetch)
656  ->implicit_value(true),
657  "Enable lazy fetch columns in query results.");
658  developer_desc.add_options()(
659  "enable-shared-mem-group-by",
660  po::value<bool>(&g_enable_smem_group_by)
661  ->default_value(g_enable_smem_group_by)
662  ->implicit_value(true),
663  "Enable using GPU shared memory for some GROUP BY queries.");
664  developer_desc.add_options()("num-executors",
665  po::value<int>(&system_parameters.num_executors)
666  ->default_value(system_parameters.num_executors),
667  "Number of executors to run in parallel.");
668  developer_desc.add_options()(
669  "gpu-shared-mem-threshold",
670  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
671  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
672  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
673  developer_desc.add_options()(
674  "enable-shared-mem-grouped-non-count-agg",
675  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
676  ->default_value(g_enable_smem_grouped_non_count_agg)
677  ->implicit_value(true),
678  "Enable using GPU shared memory for grouped non-count aggregate queries.");
679  developer_desc.add_options()(
680  "enable-shared-mem-non-grouped-agg",
681  po::value<bool>(&g_enable_smem_non_grouped_agg)
682  ->default_value(g_enable_smem_non_grouped_agg)
683  ->implicit_value(true),
684  "Enable using GPU shared memory for non-grouped aggregate queries.");
685  developer_desc.add_options()("enable-direct-columnarization",
686  po::value<bool>(&g_enable_direct_columnarization)
687  ->default_value(g_enable_direct_columnarization)
688  ->implicit_value(true),
689  "Enables/disables a more optimized columnarization method "
690  "for intermediate steps in multi-step queries.");
691  developer_desc.add_options()(
692  "offset-device-by-table-id",
693  po::value<bool>(&g_use_table_device_offset)
694  ->default_value(g_use_table_device_offset)
695  ->implicit_value(true),
696  "Enables/disables offseting the chosen device ID by the table ID for a given "
697  "fragment. This improves balance of fragments across GPUs.");
698  developer_desc.add_options()("enable-window-functions",
699  po::value<bool>(&g_enable_window_functions)
700  ->default_value(g_enable_window_functions)
701  ->implicit_value(true),
702  "Enable window function support.");
703  developer_desc.add_options()(
704  "enable-parallel-window-partition-compute",
707  ->implicit_value(true),
708  "Enable parallel window function partition computation.");
709  developer_desc.add_options()(
710  "enable-parallel-window-partition-sort",
713  ->implicit_value(true),
714  "Enable parallel window function partition sorting.");
715  developer_desc.add_options()(
716  "window-function-frame-aggregation-tree-fanout",
717  po::value<size_t>(&g_window_function_aggregation_tree_fanout)->default_value(8),
718  "A tree fanout for aggregation tree used to compute aggregation over "
719  "window frame");
720  developer_desc.add_options()("enable-dev-table-functions",
721  po::value<bool>(&g_enable_dev_table_functions)
722  ->default_value(g_enable_dev_table_functions)
723  ->implicit_value(true),
724  "Enable dev (test or alpha) table functions. Also "
725  "requires --enable-table-functions to be turned on");
726 
727  developer_desc.add_options()(
728  "enable-geo-ops-on-uncompressed-coords",
731  ->implicit_value(true),
732  "Enable faster geo operations on uncompressed coords");
733  developer_desc.add_options()(
734  "jit-debug-ir",
735  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
736  "Enable runtime debugger support for the JIT. Note that this flag is "
737  "incompatible "
738  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
739  "`/tmp/mapdquery`.");
740  developer_desc.add_options()(
741  "intel-jit-profile",
742  po::value<bool>(&intel_jit_profile)
743  ->default_value(intel_jit_profile)
744  ->implicit_value(true),
745  "Enable runtime support for the JIT code profiling using Intel VTune.");
746  developer_desc.add_options()(
747  "enable-cpu-sub-tasks",
748  po::value<bool>(&g_enable_cpu_sub_tasks)
749  ->default_value(g_enable_cpu_sub_tasks)
750  ->implicit_value(true),
751  "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
752  "load balance and decrease reduction overhead.");
753  developer_desc.add_options()(
754  "cpu-sub-task-size",
755  po::value<size_t>(&g_cpu_sub_task_size)->default_value(g_cpu_sub_task_size),
756  "Set CPU sub-task size in rows.");
757  developer_desc.add_options()(
758  "skip-intermediate-count",
759  po::value<bool>(&g_skip_intermediate_count)
760  ->default_value(g_skip_intermediate_count)
761  ->implicit_value(true),
762  "Skip pre-flight counts for intermediate projections with no filters.");
763  developer_desc.add_options()(
764  "strip-join-covered-quals",
765  po::value<bool>(&g_strip_join_covered_quals)
766  ->default_value(g_strip_join_covered_quals)
767  ->implicit_value(true),
768  "Remove quals from the filtered count if they are covered by a "
769  "join condition (currently only ST_Contains).");
770 
771  developer_desc.add_options()(
772  "min-cpu-slab-size",
773  po::value<size_t>(&system_parameters.min_cpu_slab_size)
774  ->default_value(system_parameters.min_cpu_slab_size),
775  "Min slab size (size of memory allocations) for CPU buffer pool.");
776  developer_desc.add_options()(
777  "max-cpu-slab-size",
778  po::value<size_t>(&system_parameters.max_cpu_slab_size)
779  ->default_value(system_parameters.max_cpu_slab_size),
780  "Max CPU buffer pool slab size (size of memory allocations). Note if "
781  "there is not enough free memory to accomodate the target slab size, smaller "
782  "slabs will be allocated, down to the minimum size specified by "
783  "min-cpu-slab-size.");
784  developer_desc.add_options()(
785  "min-gpu-slab-size",
786  po::value<size_t>(&system_parameters.min_gpu_slab_size)
787  ->default_value(system_parameters.min_gpu_slab_size),
788  "Min slab size (size of memory allocations) for GPU buffer pools.");
789  developer_desc.add_options()(
790  "max-gpu-slab-size",
791  po::value<size_t>(&system_parameters.max_gpu_slab_size)
792  ->default_value(system_parameters.max_gpu_slab_size),
793  "Max GPU buffer pool slab size (size of memory allocations). Note if "
794  "there is not enough free memory to accomodate the target slab size, smaller "
795  "slabs will be allocated, down to the minimum size speified by "
796  "min-gpu-slab-size.");
797 
798  developer_desc.add_options()(
799  "max-output-projection-allocation-bytes",
800  po::value<size_t>(&g_max_memory_allocation_size)
801  ->default_value(g_max_memory_allocation_size),
802  "Maximum allocation size for a fixed output buffer allocation for projection "
803  "queries with no pre-flight count. Default is the maximum slab size (sizes "
804  "greater "
805  "than the maximum slab size have no affect). Requires bump allocator.");
806  developer_desc.add_options()(
807  "min-output-projection-allocation-bytes",
808  po::value<size_t>(&g_min_memory_allocation_size)
809  ->default_value(g_min_memory_allocation_size),
810  "Minimum allocation size for a fixed output buffer allocation for projection "
811  "queries with no pre-flight count. If an allocation of this size cannot be "
812  "obtained, the query will be retried with different execution parameters and/or "
813  "on "
814  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
815  developer_desc.add_options()("enable-bump-allocator",
816  po::value<bool>(&g_enable_bump_allocator)
817  ->default_value(g_enable_bump_allocator)
818  ->implicit_value(true),
819  "Enable the bump allocator for projection queries on "
820  "GPU. The bump allocator will "
821  "allocate a fixed size buffer for each query, track the "
822  "number of rows passing the "
823  "kernel during query execution, and copy back only the "
824  "rows that passed the kernel "
825  "to CPU after execution. When disabled, pre-flight "
826  "count queries are used to size "
827  "the output buffer for projection queries.");
828  developer_desc.add_options()(
829  "code-cache-eviction-percent",
830  po::value<float>(&g_fraction_code_cache_to_evict)
831  ->default_value(g_fraction_code_cache_to_evict),
832  "Percentage of the GPU code cache to evict if an out of memory error is "
833  "encountered while attempting to place generated code on the GPU.");
834 
835  developer_desc.add_options()("ssl-cert",
836  po::value<std::string>(&system_parameters.ssl_cert_file)
837  ->default_value(std::string("")),
838  "SSL Validated public certficate.");
839 
840  developer_desc.add_options()("ssl-private-key",
841  po::value<std::string>(&system_parameters.ssl_key_file)
842  ->default_value(std::string("")),
843  "SSL private key file.");
844  // Note ssl_trust_store is passed through to Calcite via system_parameters
845  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
846  developer_desc.add_options()("ssl-trust-store",
847  po::value<std::string>(&system_parameters.ssl_trust_store)
848  ->default_value(std::string("")),
849  "SSL public CA certifcates (java trust store) to validate "
850  "TLS connections (passed through to the Calcite server).");
851 
852  developer_desc.add_options()(
853  "ssl-trust-password",
854  po::value<std::string>(&system_parameters.ssl_trust_password)
855  ->default_value(std::string("")),
856  "SSL password for java trust store provided via --ssl-trust-store parameter.");
857 
858  developer_desc.add_options()(
859  "ssl-trust-ca",
860  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
861  ->default_value(std::string("")),
862  "SSL public CA certificates to validate TLS connection(as a client).");
863 
864  developer_desc.add_options()(
865  "ssl-trust-ca-server",
866  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
867  "SSL public CA certificates to validate TLS connection(as a server).");
868 
869  developer_desc.add_options()("ssl-keystore",
870  po::value<std::string>(&system_parameters.ssl_keystore)
871  ->default_value(std::string("")),
872  "SSL server credentials as a java key store (passed "
873  "through to the Calcite server).");
874 
875  developer_desc.add_options()(
876  "ssl-keystore-password",
877  po::value<std::string>(&system_parameters.ssl_keystore_password)
878  ->default_value(std::string("")),
879  "SSL password for java keystore, provide by via --ssl-keystore.");
880 
881  developer_desc.add_options()(
882  "udf",
883  po::value<std::string>(&udf_file_name),
884  "Load user defined extension functions from this file at startup. The file is "
885  "expected to be a C/C++ file with extension .cpp.");
886 
887  developer_desc.add_options()(
888  "udf-compiler-path",
889  po::value<std::string>(&udf_compiler_path),
890  "Provide absolute path to clang++ used in udf compilation.");
891 
892  developer_desc.add_options()("udf-compiler-options",
893  po::value<std::vector<std::string>>(&udf_compiler_options),
894  "Specify compiler options to tailor udf compilation.");
895 
896 #ifdef ENABLE_GEOS
897  developer_desc.add_options()("libgeos-so-filename",
898  po::value<std::string>(&libgeos_so_filename),
899  "Specify libgeos shared object filename to be used for "
900  "geos-backed geo opertations.");
901 #endif
902  developer_desc.add_options()(
903  "large-ndv-threshold",
904  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
905  developer_desc.add_options()(
906  "large-ndv-multiplier",
907  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
908  developer_desc.add_options()("approx_quantile_buffer",
909  po::value<size_t>(&g_approx_quantile_buffer)
910  ->default_value(g_approx_quantile_buffer));
911  developer_desc.add_options()("approx_quantile_centroids",
912  po::value<size_t>(&g_approx_quantile_centroids)
913  ->default_value(g_approx_quantile_centroids));
914  developer_desc.add_options()(
915  "bitmap-memory-limit",
916  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
917  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
918  "size of the group by buffer (entry count in Query Memory Descriptor) and "
919  "multiplying it by the number of count distinct expression and the size of bitmap "
920  "required for each. For approx_count_distinct this is typically 8192 bytes.");
921  developer_desc.add_options()(
922  "enable-filter-function",
923  po::value<bool>(&g_enable_filter_function)
924  ->default_value(g_enable_filter_function)
925  ->implicit_value(true),
926  "Enable the filter function protection feature for the SQL JIT compiler. "
927  "Normally should be on but techs might want to disable for troubleshooting.");
928  developer_desc.add_options()(
929  "enable-idp-temporary-users",
930  po::value<bool>(&g_enable_idp_temporary_users)
931  ->default_value(g_enable_idp_temporary_users)
932  ->implicit_value(true),
933  "Enable temporary users for SAML and LDAP logins on read-only servers. "
934  "Normally should be on but techs might want to disable for troubleshooting.");
935  developer_desc.add_options()(
936  "enable-seconds-refresh-interval",
937  po::value<bool>(&g_enable_seconds_refresh)
938  ->default_value(g_enable_seconds_refresh)
939  ->implicit_value(true),
940  "Enable foreign table seconds refresh interval for testing purposes.");
941  developer_desc.add_options()("enable-auto-metadata-update",
942  po::value<bool>(&g_enable_auto_metadata_update)
943  ->default_value(g_enable_auto_metadata_update)
944  ->implicit_value(true),
945  "Enable automatic metadata update.");
946  developer_desc.add_options()(
947  "parallel-top-min",
948  po::value<size_t>(&g_parallel_top_min)->default_value(g_parallel_top_min),
949  "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
950  "parallelTop() to sort.");
951  developer_desc.add_options()(
952  "parallel-top-max",
953  po::value<size_t>(&g_parallel_top_max)->default_value(g_parallel_top_max),
954  "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
955  "watchdog.");
956  developer_desc.add_options()(
957  "streaming-top-n-max",
958  po::value<size_t>(&g_streaming_topn_max)->default_value(g_streaming_topn_max),
959  "The maximum number of rows allowing streaming top-N sorting.");
960  developer_desc.add_options()("vacuum-min-selectivity",
961  po::value<float>(&g_vacuum_min_selectivity)
962  ->default_value(g_vacuum_min_selectivity),
963  "Minimum selectivity for automatic vacuuming. "
964  "This specifies the percentage (with a value of 0 "
965  "implying 0% and a value of 1 implying 100%) of "
966  "deleted rows in a fragment at which to perform "
967  "automatic vacuuming. A number greater than 1 can "
968  "be used to disable automatic vacuuming.");
969  developer_desc.add_options()("enable-automatic-ir-metadata",
970  po::value<bool>(&g_enable_automatic_ir_metadata)
971  ->default_value(g_enable_automatic_ir_metadata)
972  ->implicit_value(true),
973  "Enable automatic IR metadata (debug builds only).");
974  developer_desc.add_options()(
975  "max-log-length",
976  po::value<size_t>(&g_max_log_length)->default_value(g_max_log_length),
977  "The maximum number of characters that a log message can has. If the log message "
978  "is longer than this, we only record \'g_max_log_message_length\' characters.");
979  developer_desc.add_options()(
980  "estimator-failure-max-groupby-size",
981  po::value<size_t>(&g_estimator_failure_max_groupby_size)
982  ->default_value(g_estimator_failure_max_groupby_size),
983  "Maximum size of the groupby buffer if the estimator fails. By default we use the "
984  "number of tuples in the table up to this value.");
985  developer_desc.add_options()("columnar-large-projections",
986  po::value<bool>(&g_columnar_large_projections)
987  ->default_value(g_columnar_large_projections)
988  ->implicit_value(true),
989  "Prefer columnar output if projection size is >= "
990  "threshold set by --columnar-large-projections-threshold "
991  "(default 1,000,000 rows).");
992  developer_desc.add_options()(
993  "columnar-large-projections-threshold",
994  po::value<size_t>(&g_columnar_large_projections_threshold)
996  "Threshold (in minimum number of rows) to prefer columnar output for projections. "
997  "Requires --columnar-large-projections to be set.");
998 
999  help_desc.add_options()(
1000  "allow-query-step-cpu-retry",
1001  po::value<bool>(&g_allow_query_step_cpu_retry)
1002  ->default_value(g_allow_query_step_cpu_retry)
1003  ->implicit_value(true),
1004  R"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
1005  help_desc.add_options()("enable-http-binary-server",
1006  po::value<bool>(&g_enable_http_binary_server)
1007  ->default_value(g_enable_http_binary_server)
1008  ->implicit_value(true),
1009  "Enable binary over HTTP Thrift server");
1010 
1011  help_desc.add_options()("enable-assign-render-groups",
1012  po::value<bool>(&g_enable_assign_render_groups)
1013  ->default_value(g_enable_assign_render_groups)
1014  ->implicit_value(true),
1015  "Enable Render Group assignment");
1016 
1017  help_desc.add_options()("enable-query-engine-cuda-streams",
1018  po::value<bool>(&g_query_engine_cuda_streams)
1019  ->default_value(g_query_engine_cuda_streams)
1020  ->implicit_value(true),
1021  "Enable Query Engine CUDA streams");
1022 }
1023 
1024 namespace {
1025 
1026 std::stringstream sanitize_config_file(std::ifstream& in) {
1027  // Strip the web section out of the config file so boost can validate program options
1028  std::stringstream ss;
1029  std::string line;
1030  while (std::getline(in, line)) {
1031  ss << line << "\n";
1032  if (line == "[web]") {
1033  break;
1034  }
1035  }
1036  return ss;
1037 }
1038 
1039 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
1040  if (!filename.empty()) {
1041  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
1042  if (!boost::filesystem::exists(filename)) {
1043  std::cerr << desc << " " << filename << " does not exist." << std::endl;
1044  return false;
1045  }
1046  }
1047  return true;
1048 }
1049 
1051  if (!filename.empty()) {
1053  }
1054 }
1055 
1056 } // namespace
1057 
1059  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1060  if (!boost::filesystem::exists(base_path)) {
1061  throw std::runtime_error("HeavyDB base directory does not exist at " + base_path);
1062  }
1063 }
1064 
1066  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1067  const auto data_path = boost::filesystem::path(base_path) / shared::kDataDirectoryName;
1068  if (!boost::filesystem::exists(data_path)) {
1069  throw std::runtime_error("HeavyDB data directory does not exist at '" + base_path +
1070  "'");
1071  }
1072 
1073 // TODO: support lock on Windows
1074 #ifndef _WIN32
1075  {
1076  // If we aren't sharing the data directory, take and hold a write lock on
1077  // heavydb_pid.lck to prevent other processes from trying to share our dir.
1078  // TODO(sy): Probably need to get rid of this PID file because it doesn't make much
1079  // sense to store only one server's PID when we have the --multi-instance option.
1080  auto exe_filename = boost::filesystem::path(exe_name).filename().string();
1081  const std::string lock_file =
1082  (boost::filesystem::path(base_path) / std::string(exe_filename + "_pid.lck"))
1083  .string();
1084  auto pid = std::to_string(getpid());
1085  if (!g_multi_instance) {
1086  VLOG(1) << "taking [" << lock_file << "] read+write lock until process exit";
1087  } else {
1088  VLOG(1) << "taking [" << lock_file << "] read-only lock until process exit";
1089  }
1090 
1091  int fd;
1092  fd = heavyai::safe_open(lock_file.c_str(), O_RDWR | O_CREAT, 0664);
1093  if (fd == -1) {
1094  throw std::runtime_error("failed to open lockfile: " + lock_file + ": " +
1095  std::string(strerror(errno)) + " (" +
1096  std::to_string(errno) + ")");
1097  }
1098 
1099  struct flock fl;
1100  memset(&fl, 0, sizeof(fl));
1101  fl.l_type = !g_multi_instance ? F_WRLCK : F_RDLCK;
1102  fl.l_whence = SEEK_SET;
1103  int cmd;
1104 #ifdef __linux__
1105  // cmd = F_OFD_SETLK; // TODO(sy): broken on centos
1106  cmd = F_SETLK;
1107 #else
1108  cmd = F_SETLK;
1109 #endif // __linux__
1110  int ret = heavyai::safe_fcntl(fd, cmd, &fl);
1111  if (ret == -1 && (errno == EACCES || errno == EAGAIN)) { // locked by someone else
1112  heavyai::safe_close(fd);
1113  throw std::runtime_error(
1114  "another HeavyDB server instance is already using data directory: " +
1115  base_path);
1116  } else if (ret == -1) {
1117  auto errno0 = errno;
1118  heavyai::safe_close(fd);
1119  throw std::runtime_error("failed to lock lockfile: " + lock_file + ": " +
1120  std::string(strerror(errno0)) + " (" +
1121  std::to_string(errno0) + ")");
1122  }
1123 
1124  if (!g_multi_instance) {
1125  if (heavyai::ftruncate(fd, 0) == -1) {
1126  auto errno0 = errno;
1127  heavyai::safe_close(fd);
1128  throw std::runtime_error("failed to truncate lockfile: " + lock_file + ": " +
1129  std::string(strerror(errno0)) + " (" +
1130  std::to_string(errno0) + ")");
1131  }
1132  if (heavyai::safe_write(fd, pid.c_str(), pid.length()) == -1) {
1133  auto errno0 = errno;
1134  heavyai::safe_close(fd);
1135  throw std::runtime_error("failed to write lockfile: " + lock_file + ": " +
1136  std::string(strerror(errno0)) + " (" +
1137  std::to_string(errno0) + ")");
1138  }
1139  }
1140 
1141  // Intentionally leak the file descriptor. Lock will be held until process exit.
1142  }
1143 #endif // _WIN32
1144 
1145  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
1146  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
1147  throw std::runtime_error("File containing DB queries " + db_query_file +
1148  " does not exist.");
1149  }
1150  const auto db_file = boost::filesystem::path(base_path) /
1152  if (!boost::filesystem::exists(db_file)) {
1153  { // check old system catalog existsense
1154  const auto db_file =
1155  boost::filesystem::path(base_path) / shared::kCatalogDirectoryName / "mapd";
1156  if (!boost::filesystem::exists(db_file)) {
1157  throw std::runtime_error("System catalog " + shared::kSystemCatalogName +
1158  " does not exist.");
1159  }
1160  }
1161  }
1162  if (license_path.length() == 0) {
1163  license_path = base_path + "/" + shared::kDefaultLicenseFileName;
1164  }
1165 
1166  // add all parameters to be displayed on startup
1167  LOG(INFO) << "HeavyDB started with data directory at '" << base_path << "'";
1168  if (vm.count("license-path")) {
1169  LOG(INFO) << "License key path set to '" << license_path << "'";
1170  }
1171  g_read_only = read_only;
1172  LOG(INFO) << " Server read-only mode is " << read_only << " (--read-only)";
1173  if (g_multi_instance) {
1174  LOG(INFO) << " Multiple servers per --data directory is " << g_multi_instance
1175  << " (--multi-instance)";
1176  }
1177 #if DISABLE_CONCURRENCY
1178  LOG(INFO) << " Threading layer: serial";
1179 #elif ENABLE_TBB
1180  LOG(INFO) << " Threading layer: TBB";
1181 #else
1182  LOG(INFO) << " Threading layer: std";
1183 #endif
1184  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
1185  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
1186  if (enable_dynamic_watchdog) {
1187  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
1188  }
1189  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
1190  if (enable_runtime_query_interrupt) {
1191  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
1192  << pending_query_interrupt_freq << " (in ms.)";
1193  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
1194  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
1195  }
1196  LOG(INFO) << " Non-kernel time query interrupt is set to "
1197  << enable_non_kernel_time_query_interrupt;
1198 
1199  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
1200  LOG(INFO) << " LogUserId is set to " << Catalog_Namespace::g_log_user_id;
1201  LOG(INFO) << " Maximum idle session duration " << idle_session_duration;
1202  LOG(INFO) << " Maximum active session duration " << max_session_duration;
1203  LOG(INFO) << " Maximum number of sessions " << system_parameters.num_sessions;
1204 
1205  LOG(INFO) << "Legacy delimited import is set to " << g_enable_legacy_delimited_import;
1206 #ifdef ENABLE_IMPORT_PARQUET
1207  LOG(INFO) << "Legacy parquet import is set to " << g_enable_legacy_parquet_import;
1208 #endif
1209  LOG(INFO) << "FSI regex parsed import is set to " << g_enable_fsi_regex_import;
1210 
1211  LOG(INFO) << "Allowed import paths is set to " << allowed_import_paths;
1212  LOG(INFO) << "Allowed export paths is set to " << allowed_export_paths;
1214  base_path, allowed_import_paths, allowed_export_paths);
1215 
1218  ddl_utils::FilePathBlacklist::addToBlacklist(base_path + "/temporary/" +
1225  g_enable_s3_fsi = false;
1226 
1228 #ifdef ENABLE_IMPORT_PARQUET
1229  !g_enable_legacy_parquet_import ||
1230 #endif
1232  g_enable_fsi =
1233  true; // a requirement for FSI import code-paths is for FSI to be enabled
1234  }
1235 
1236  if (disk_cache_level == "foreign_tables") {
1237  if (g_enable_fsi) {
1238  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::fsi;
1239  LOG(INFO) << "Disk cache enabled for foreign tables only";
1240  } else {
1241  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1242  "disk cache disabled";
1243  }
1244  } else if (disk_cache_level == "all") {
1245  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::all;
1246  LOG(INFO) << "Disk cache enabled for all tables";
1247  } else if (disk_cache_level == "local_tables") {
1248  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::non_fsi;
1249  LOG(INFO) << "Disk cache enabled for non-FSI tables";
1250  } else if (disk_cache_level == "none") {
1251  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::none;
1252  LOG(INFO) << "Disk cache disabled";
1253  } else {
1254  throw std::runtime_error{
1255  "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1256  ". Valid options are 'foreign_tables', "
1257  "'local_tables', 'none', and 'all'."};
1258  }
1259 
1260  if (disk_cache_config.size_limit < File_Namespace::CachingFileMgr::getMinimumSize()) {
1261  throw std::runtime_error{"disk-cache-size must be at least " +
1263  }
1264 
1265  if (disk_cache_config.path.empty()) {
1266  disk_cache_config.path = base_path + "/" + shared::kDefaultDiskCacheDirName;
1267  }
1268  ddl_utils::FilePathBlacklist::addToBlacklist(disk_cache_config.path);
1269 
1272 
1273  // If passed in, blacklist all security config files
1274  addOptionalFileToBlacklist(license_path);
1275  addOptionalFileToBlacklist(system_parameters.ssl_cert_file);
1276  addOptionalFileToBlacklist(authMetadata.ca_file_name);
1277  addOptionalFileToBlacklist(system_parameters.ssl_trust_store);
1278  addOptionalFileToBlacklist(system_parameters.ssl_keystore);
1279  addOptionalFileToBlacklist(system_parameters.ssl_key_file);
1280  addOptionalFileToBlacklist(system_parameters.ssl_trust_ca_file);
1281  addOptionalFileToBlacklist(cluster_file);
1282 
1283  if (g_vacuum_min_selectivity < 0) {
1284  throw std::runtime_error{"vacuum-min-selectivity cannot be less than 0."};
1285  }
1286  LOG(INFO) << "Vacuum Min Selectivity: " << g_vacuum_min_selectivity;
1287 
1288  LOG(INFO) << "Enable system tables is set to " << g_enable_system_tables;
1289  if (g_enable_system_tables) {
1290  // System tables currently reuse FSI infrastructure and therefore, require FSI to be
1291  // enabled
1292  if (!g_enable_fsi) {
1293  g_enable_fsi = true;
1294  LOG(INFO) << "FSI has been enabled as a side effect of enabling system tables";
1295  }
1296  }
1297  LOG(INFO) << "Enable FSI is set to " << g_enable_fsi;
1298  LOG(INFO) << "Enable logs system tables set to " << g_enable_logs_system_tables;
1299 
1300  static const boost::regex interval_regex{"^\\d{1,}[SHD]$",
1301  boost::regex::extended | boost::regex::icase};
1302  if (!boost::regex_match(g_logs_system_tables_refresh_interval, interval_regex)) {
1303  throw std::runtime_error{
1304  "Invalid interval value provided for the \"logs-system-tables-refresh-interval\" "
1305  "option. Interval should have the following format: nS, nH, or nD"};
1306  }
1307  LOG(INFO) << "Logs system tables refresh interval set to "
1309 
1311  throw std::runtime_error{
1312  "Invalid value provided for the \"logs-system-tables-max-files-count\" "
1313  "option. Value must be greater than 0."};
1314  }
1315  LOG(INFO) << "Maximum number of logs system table files set to "
1317 
1318 #ifdef ENABLE_MEMKIND
1319  if (g_enable_tiered_cpu_mem) {
1320  if (g_pmem_path == "") {
1321  throw std::runtime_error{"pmem-path must be set to use tiered cpu memory"};
1322  }
1323  if (g_pmem_size == 0) {
1324  throw std::runtime_error{"pmem-size must be set to use tiered cpu memory"};
1325  }
1326  if (!std::filesystem::exists(g_pmem_path.c_str())) {
1327  throw std::runtime_error{"path to PMem directory (" + g_pmem_path +
1328  ") does not exist."};
1329  }
1330  }
1331 #endif
1332 }
1333 
1335  const bool enable_runtime_udfs,
1336  const bool enable_udf_registration_for_all_users) {
1337  return enable_runtime_udfs
1338  ? (enable_udf_registration_for_all_users
1343 }
1344 
1346  int argc,
1347  char const* const* argv,
1348  const bool should_init_logging) {
1349  po::options_description all_desc("All options");
1350  all_desc.add(help_desc).add(developer_desc);
1351 
1352  try {
1353  po::store(po::command_line_parser(argc, argv)
1354  .options(all_desc)
1355  .positional(positional_options)
1356  .run(),
1357  vm);
1358  po::notify(vm);
1359 
1360  if (vm.count("help")) {
1361  std::cerr << "Usage: heavydb <data directory path> [-p <port number>] "
1362  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1363  << std::endl
1364  << std::endl;
1365  std::cout << help_desc << std::endl;
1366  return 0;
1367  }
1368  if (vm.count("dev-options")) {
1369  std::cout << "Usage: heavydb <data directory path> [-p <port number>] "
1370  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1371  << std::endl
1372  << std::endl;
1373  std::cout << developer_desc << std::endl;
1374  return 0;
1375  }
1376  if (vm.count("version")) {
1377  std::cout << "HeavyDB Version: " << MAPD_RELEASE << std::endl;
1378  return 0;
1379  }
1380 
1381  if (vm.count("config")) {
1382  std::ifstream settings_file(system_parameters.config_file);
1383 
1384  auto sanitized_settings = sanitize_config_file(settings_file);
1385 
1386  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
1387  po::notify(vm);
1388  settings_file.close();
1389  }
1390 
1391  if (!g_enable_union) {
1392  std::cerr
1393  << "The enable-union option is DEPRECATED and is now enabled by default. "
1394  "Please remove use of this option, as it may be disabled in the future."
1395  << std::endl;
1396  }
1397 
1398  // Trim base path before executing migration
1399  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1400 
1401  // Execute rebrand migration before accessing any system files.
1402  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
1403  if (!boost::filesystem::exists(lockfiles_path)) {
1404  if (!boost::filesystem::create_directory(lockfiles_path)) {
1405  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
1406  " subdirectory under "
1407  << base_path << std::endl;
1408  return 1;
1409  }
1410  }
1411  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
1412  if (!boost::filesystem::exists(lockfiles_path2)) {
1413  if (!boost::filesystem::create_directory(lockfiles_path2)) {
1414  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1415  shared::kCatalogDirectoryName + " subdirectory under "
1416  << base_path << std::endl;
1417  return 1;
1418  }
1419  }
1420  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
1421  if (!boost::filesystem::exists(lockfiles_path3)) {
1422  if (!boost::filesystem::create_directory(lockfiles_path3)) {
1423  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1424  shared::kDataDirectoryName + " subdirectory under "
1425  << base_path << std::endl;
1426  return 1;
1427  }
1428  }
1432  }
1433 
1434  if (!vm["enable-runtime-udf"].defaulted()) {
1435  if (!vm["enable-runtime-udfs"].defaulted()) {
1436  std::cerr << "Usage Error: Both enable-runtime-udf and enable-runtime-udfs "
1437  "specified. Please remove use of the enable-runtime-udfs flag, "
1438  "as it will be deprecated in the future."
1439  << std::endl;
1440  return 1;
1441  } else {
1442  enable_runtime_udfs = enable_runtime_udf;
1443  std::cerr << "The enable-runtime-udf flag has been deprecated and replaced "
1444  "with enable-runtime-udfs. Please remove use of this option "
1445  "as it will be disabled in the future."
1446  << std::endl;
1447  }
1448  }
1449  system_parameters.runtime_udf_registration_policy =
1450  construct_runtime_udf_registration_policy(enable_runtime_udfs,
1451  enable_udf_registration_for_all_users);
1452 
1453  if (should_init_logging) {
1454  init_logging();
1455  }
1456 
1457  if (!trim_and_check_file_exists(system_parameters.ssl_cert_file, "ssl cert file")) {
1458  return 1;
1459  }
1460  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
1461  return 1;
1462  }
1463  if (!trim_and_check_file_exists(system_parameters.ssl_trust_store,
1464  "ssl trust store")) {
1465  return 1;
1466  }
1467  if (!trim_and_check_file_exists(system_parameters.ssl_keystore, "ssl key store")) {
1468  return 1;
1469  }
1470  if (!trim_and_check_file_exists(system_parameters.ssl_key_file, "ssl key file")) {
1471  return 1;
1472  }
1473  if (!trim_and_check_file_exists(system_parameters.ssl_trust_ca_file, "ssl ca file")) {
1474  return 1;
1475  }
1476 
1477  g_enable_watchdog = enable_watchdog;
1479  watchdog_none_encoded_string_translation_limit;
1480  g_enable_dynamic_watchdog = enable_dynamic_watchdog;
1481  g_dynamic_watchdog_time_limit = dynamic_watchdog_time_limit;
1482  g_enable_runtime_query_interrupt = enable_runtime_query_interrupt;
1483  g_enable_non_kernel_time_query_interrupt = enable_non_kernel_time_query_interrupt;
1484  g_pending_query_interrupt_freq = pending_query_interrupt_freq;
1485  g_running_query_interrupt_freq = running_query_interrupt_freq;
1486  g_use_estimator_result_cache = use_estimator_result_cache;
1487  g_enable_data_recycler = enable_data_recycler;
1488  g_use_hashtable_cache = use_hashtable_cache;
1489  g_max_cacheable_hashtable_size_bytes = max_cacheable_hashtable_size_bytes;
1490  g_hashtable_cache_total_bytes = hashtable_cache_total_bytes;
1491 
1492  } catch (po::error& e) {
1493  std::cerr << "Usage Error: " << e.what() << std::endl;
1494  return 1;
1495  }
1496 
1497  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1498  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
1499  return 1;
1500  }
1501 
1502  if (!g_from_table_reordering) {
1503  LOG(INFO) << " From clause table reordering is disabled";
1504  }
1505 
1507  LOG(INFO) << " Filter push down for JOIN is enabled";
1508  }
1509 
1510  if (vm.count("udf")) {
1511  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
1512 
1513  if (!boost::filesystem::exists(udf_file_name)) {
1514  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
1515  return 1;
1516  }
1517 
1518  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
1519  }
1520 
1521  if (vm.count("udf-compiler-path")) {
1522  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
1523  }
1524 
1525  auto trim_string = [](std::string& s) {
1526  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
1527  };
1528 
1529  if (vm.count("udf-compiler-options")) {
1530  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1531  }
1532 
1533  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
1534  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
1535  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
1536  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1537  boost::is_any_of("\"'"));
1538 
1539  if (!system_parameters.ha_group_id.empty()) {
1540  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
1541  if (system_parameters.ha_unique_server_id.empty()) {
1542  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
1543  return 5;
1544  } else {
1545  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
1546  }
1547  if (system_parameters.ha_brokers.empty()) {
1548  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
1549  return 6;
1550  } else {
1551  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
1552  }
1553  if (system_parameters.ha_shared_data.empty()) {
1554  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
1555  return 7;
1556  } else {
1557  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
1558  }
1559  }
1560 
1561  boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of("\"'"));
1562  if (!system_parameters.master_address.empty()) {
1563  if (!read_only) {
1564  LOG(ERROR) << "The master-address setting is only allowed in read-only mode";
1565  return 9;
1566  }
1567  LOG(INFO) << " Master Address is " << system_parameters.master_address;
1568  LOG(INFO) << " Master Port is " << system_parameters.master_port;
1569  }
1570 
1571  if (g_max_import_threads < 1) {
1572  std::cerr << "max-import-threads must be >= 1 (was set to " << g_max_import_threads
1573  << ")." << std::endl;
1574  return 8;
1575  } else {
1576  LOG(INFO) << " Max import threads " << g_max_import_threads;
1577  }
1578 
1579  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
1580  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
1581  LOG(INFO) << " Min CPU buffer pool slab size " << system_parameters.min_cpu_slab_size;
1582  LOG(INFO) << " Max CPU buffer pool slab size " << system_parameters.max_cpu_slab_size;
1583  LOG(INFO) << " Min GPU buffer pool slab size " << system_parameters.min_gpu_slab_size;
1584  LOG(INFO) << " Max GPU buffer pool slab size " << system_parameters.max_gpu_slab_size;
1585  LOG(INFO) << " calcite JVM max memory " << system_parameters.calcite_max_mem;
1586  LOG(INFO) << " HeavyDB Server Port " << system_parameters.omnisci_server_port;
1587  LOG(INFO) << " HeavyDB Calcite Port " << system_parameters.calcite_port;
1588  LOG(INFO) << " Enable Calcite view optimize "
1589  << system_parameters.enable_calcite_view_optimize;
1590  LOG(INFO) << " Allow Local Auth Fallback: "
1591  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
1592  LOG(INFO) << " ParallelTop min threshold: " << g_parallel_top_min;
1593  LOG(INFO) << " ParallelTop watchdog max: " << g_parallel_top_max;
1594 
1595  LOG(INFO) << " Enable Data Recycler: "
1596  << (g_enable_data_recycler ? "enabled" : "disabled");
1597  if (g_enable_data_recycler) {
1598  LOG(INFO) << " \t Use hashtable cache: "
1599  << (g_use_hashtable_cache ? "enabled" : "disabled");
1600  if (g_use_hashtable_cache) {
1601  LOG(INFO) << " \t\t Total amount of bytes that hashtable cache keeps: "
1602  << g_hashtable_cache_total_bytes / (1024 * 1024) << " MB.";
1603  LOG(INFO) << " \t\t Per-hashtable size limit: "
1604  << g_max_cacheable_hashtable_size_bytes / (1024 * 1024) << " MB.";
1605  }
1606  LOG(INFO) << " \t Use query resultset cache: "
1607  << (g_use_query_resultset_cache ? "enabled" : "disabled");
1609  LOG(INFO) << " \t\t Total amount of bytes that query resultset cache keeps: "
1610  << g_query_resultset_cache_total_bytes / (1024 * 1024) << " MB.";
1611  LOG(INFO) << " \t\t Per-query resultset size limit: "
1612  << g_max_cacheable_query_resultset_size_bytes / (1024 * 1024) << " MB.";
1613  }
1614  LOG(INFO) << " \t\t Use auto query resultset caching: "
1615  << (g_allow_auto_resultset_caching ? "enabled" : "disabled");
1617  LOG(INFO) << " \t\t\t The maximum bytes of a query resultset which is "
1618  "automatically cached: "
1619  << g_auto_resultset_caching_threshold << " Bytes.";
1620  }
1621  LOG(INFO) << " \t\t Use query step skipping: "
1622  << (g_allow_query_step_skipping ? "enabled" : "disabled");
1623  LOG(INFO) << " \t Use chunk metadata cache: "
1624  << (g_use_chunk_metadata_cache ? "enabled" : "disabled");
1625  }
1626 
1627  const std::string udf_reg_policy_log_prefix{
1628  " \t\t Runtime UDF/UDTF Registration Policy: "};
1629  switch (system_parameters.runtime_udf_registration_policy) {
1631  LOG(INFO) << udf_reg_policy_log_prefix << " DISALLOWED";
1632  break;
1633  }
1635  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for superusers only";
1636  break;
1637  }
1639  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for all users";
1640  break;
1641  }
1642  default: {
1643  UNREACHABLE() << "Unrecognized option for Runtime UDF/UDTF registration policy.";
1644  }
1645  }
1646 
1647  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
1648  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
1649  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
1650  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
1651  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
1652 
1653  return boost::none;
1654 }
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
bool g_enable_parallel_window_partition_sort
unsigned connect_timeout
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:100
double g_running_query_interrupt_freq
Definition: Execute.cpp:129
bool g_enable_smem_group_by
size_t g_pmem_size
size_t g_parallel_top_max
Definition: ResultSet.cpp:48
int safe_open(const char *path, int flags, mode_t mode) noexcept
Definition: heavyai_fs.cpp:90
float g_filter_push_down_low_frac
Definition: Execute.cpp:96
const std::string kDataDirectoryName
bool g_use_query_resultset_cache
Definition: Execute.cpp:148
bool g_multi_instance
Definition: heavyai_locks.h:21
size_t g_cpu_sub_task_size
Definition: Execute.cpp:83
SystemParameters::RuntimeUdfRegistrationPolicy construct_runtime_udf_registration_policy(const bool enable_runtime_udfs, const bool enable_udf_registration_for_all_users)
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
bool g_strip_join_covered_quals
Definition: Execute.cpp:107
bool g_enable_logs_system_tables
Definition: Catalog.cpp:100
bool g_enable_direct_columnarization
Definition: Execute.cpp:122
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:826
bool g_enable_lazy_fetch
Definition: Execute.cpp:124
const std::string kDefaultDiskCacheDirName
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:81
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:128
#define LOG(tag)
Definition: Logger.h:216
bool g_allow_query_step_skipping
Definition: Execute.cpp:151
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
Definition: Logger.cpp:17
const std::string kDefaultLogDirName
#define UNREACHABLE()
Definition: Logger.h:266
const std::string kSystemCatalogName
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:98
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
size_t g_streaming_topn_max
Definition: ResultSet.cpp:49
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:80
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:152
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:89
bool g_enable_geo_ops_on_uncompressed_coords
Definition: Execute.cpp:114
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:126
int g_hll_precision_bits
bool g_enable_data_recycler
Definition: Execute.cpp:146
std::string to_string(char const *&&v)
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:91
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:149
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:153
bool g_enable_string_functions
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:138
size_t g_watchdog_none_encoded_string_translation_limit
Definition: Execute.cpp:81
bool g_null_div_by_zero
Definition: Execute.cpp:88
bool g_enable_interop
size_t g_parallel_top_min
Definition: ResultSet.cpp:47
bool g_enable_columnar_output
Definition: Execute.cpp:99
ssize_t safe_write(const int fd, const void *buffer, const size_t buffer_size) noexcept
Definition: heavyai_fs.cpp:144
bool g_enable_s3_fsi
Definition: Catalog.cpp:97
bool g_enable_idp_temporary_users
Definition: SysCatalog.cpp:63
bool g_from_table_reordering
Definition: Execute.cpp:90
size_t g_window_function_aggregation_tree_fanout
static void setDefaultImportPath(const std::string &base_path)
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_assign_render_groups
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:104
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
void init(LogOptions const &log_opts)
Definition: Logger.cpp:308
std::string g_logs_system_tables_refresh_interval
Definition: Catalog.cpp:102
bool g_enable_http_binary_server
std::string g_pmem_path
static bool migrationEnabled()
Definition: MigrationMgr.h:43
float g_filter_push_down_high_frac
Definition: Execute.cpp:97
bool g_enable_distance_rangejoin
Definition: Execute.cpp:103
bool g_bigint_count
bool g_enable_watchdog
int64_t g_bitmap_memory_limit
size_t g_max_memory_allocation_size
Definition: Execute.cpp:116
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:106
size_t g_approx_quantile_buffer
Definition: Execute.cpp:158
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:150
size_t g_max_log_length
Definition: Execute.cpp:163
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:105
bool g_enable_dev_table_functions
Definition: Execute.cpp:113
Global bool for controlling render group assignment, remove along with legacy poly rendering...
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:154
bool g_enable_window_functions
Definition: Execute.cpp:111
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:155
size_t g_min_memory_allocation_size
Definition: Execute.cpp:117
bool with_keepalive
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool g_read_only
Definition: File.cpp:40
bool g_enable_seconds_refresh
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:85
size_t g_estimator_failure_max_groupby_size
tuple line
Definition: parse_ast.py:10
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:135
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:161
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:84
bool g_cache_string_hash
const std::string kCatalogDirectoryName
float g_fraction_code_cache_to_evict
bool g_allow_system_dashboard_update
Definition: DBHandler.cpp:121
bool g_enable_filter_push_down
Definition: Execute.cpp:95
bool g_use_estimator_result_cache
Definition: Execute.cpp:127
const std::string kDefaultLicenseFileName
bool g_enable_bump_allocator
Definition: Execute.cpp:120
bool g_enable_parallel_window_partition_compute
bool g_enable_union
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:82
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:87
bool g_allow_cpu_retry
Definition: Execute.cpp:86
int32_t ftruncate(const int32_t fd, int64_t length)
Definition: heavyai_fs.cpp:86
size_t g_approx_quantile_centroids
Definition: Execute.cpp:159
const std::string kLockfilesDirectoryName
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:873
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:42
bool g_optimize_row_initialization
Definition: Execute.cpp:101
static bool run
int safe_fcntl(int fd, int cmd, struct flock *fl) noexcept
Definition: heavyai_fs.cpp:112
bool g_columnar_large_projections
int safe_close(int fd) noexcept
Definition: heavyai_fs.cpp:101
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:85
bool g_enable_fsi
Definition: Catalog.cpp:96
size_t g_columnar_large_projections_threshold
bool g_query_engine_cuda_streams
Definition: QueryEngine.h:9
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:289
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:87
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:125
size_t g_max_import_threads
Definition: Importer.cpp:106
bool g_use_hashtable_cache
Definition: Execute.cpp:147
#define VLOG(n)
Definition: Logger.h:316
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:156
size_t g_large_ndv_multiplier
bool g_enable_table_functions
Definition: Execute.cpp:112
std::string cluster_command_line_arg
size_t g_gpu_smem_threshold
Definition: Execute.cpp:130