19 #include <sys/types.h>
25 using namespace std::string_literals;
30 #include "MapDRelease.h"
81 namespace Catalog_Namespace {
96 log_options_.set_base_path(base_path);
100 po::options_description& desc = help_desc_;
102 desc.add_options()(
"help,h",
"Show available options.");
107 ->implicit_value(
true),
108 R
"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
109 desc.add_options()("allow-loop-joins",
110 po::value<bool>(&allow_loop_joins)
111 ->default_value(allow_loop_joins)
112 ->implicit_value(
true),
113 "Enable loop joins.");
114 desc.add_options()(
"bigint-count",
117 ->implicit_value(
true),
118 "Use 64-bit count.");
121 "enable-executor-resource-mgr",
124 ->implicit_value(
true),
125 "Enable executor resource manager to track execution resources and selectively "
126 "gate concurrency based on resource availability.");
132 "executor-cpu-result-mem-ratio",
135 "Set executor resource manager reserved memory for query result sets as a ratio "
136 "greater than 0, representing the fraction of the system memory not allocated for "
137 "the CPU buffer pool. Values of 1.0 are permitted to allow oversubscription when "
138 "warranted, but too high a value can cause out-of-memory errors. Requires "
139 "--executor-resource-mgr to be set");
142 "executor-cpu-result-mem-bytes",
145 "Set executor resource manager reserved memory for query result sets in bytes, "
146 "this overrides the default reservation of 80% the size of the system memory that "
147 "is not allocated for the CPU buffer pool. Use 0 for auto. Requires "
148 "--enable-executor-resource-mgr to be set.");
155 "executor-per-query-max-cpu-threads-ratio",
158 "Set max fraction of executor resource manager total CPU slots/threads that can be "
159 "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
165 "executor-per-query-max-cpu-result-mem-ratio",
168 "Set max fraction of executor resource manager total CPU result memory reservation "
170 "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
173 "allow-cpu-kernel-concurrency",
176 ->implicit_value(
true),
177 "Allow for multiple queries to run execution kernels concurrently on CPU. Requires "
178 "--enable-executor-resource-mgr to be set.");
181 "allow-cpu-gpu-kernel-concurrency",
184 ->implicit_value(
true),
185 "Allow multiple queries to run execution kernels concurrently on CPU while a "
186 "GPU query is executing. Requires --enable-executor-resource-mgr to be set.");
193 "allow-cpu-thread-oversubscription-concurrency",
198 ->implicit_value(
true),
199 "Allow for concurrent query kernel execution even if it results in "
200 "oversubscription of CPU threads. Caution should be used when turning this on as "
201 "it can lead to thread exhaustion. Requires --enable-executor-resource-mgr to be "
210 "allow-cpu-result-mem-oversubscription-concurrency",
215 ->implicit_value(
true),
216 "Allow for concurrent query kernel execution even if it results in "
217 "oversubscription of CPU memory. Caution should be used when turning this on as it "
218 "can lead to out-of-memory errors. Requires --enable-executor-resource-mgr to be "
222 "executor-max-available-resource-use-ratio",
225 "Set max proportion (0 < ratio <= 1.0) of available resources that should be "
226 "granted to a query. Requires --executor-resource-mgr to be set");
228 desc.add_options()(
"calcite-max-mem",
229 po::value<size_t>(&system_parameters.calcite_max_mem)
230 ->default_value(system_parameters.calcite_max_mem),
231 "Max memory available to calcite JVM.");
233 desc.add_options()(
"calcite-port",
234 po::value<int>(&system_parameters.calcite_port)
235 ->default_value(system_parameters.calcite_port),
236 "Calcite port number.");
238 desc.add_options()(
"config",
239 po::value<std::string>(&system_parameters.config_file),
240 "Path to server configuration file.");
241 desc.add_options()(
"cpu-buffer-mem-bytes",
242 po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
243 ->default_value(system_parameters.cpu_buffer_mem_bytes),
244 "Size of memory reserved for CPU buffers, in bytes.");
246 desc.add_options()(
"cpu-only",
247 po::value<bool>(&system_parameters.cpu_only)
248 ->default_value(system_parameters.cpu_only)
249 ->implicit_value(
true),
250 "Run on CPU only, even if GPUs are available.");
251 desc.add_options()(
"cuda-block-size",
252 po::value<size_t>(&system_parameters.cuda_block_size)
253 ->default_value(system_parameters.cuda_block_size),
254 "Size of block to use on NVIDIA GPU.");
255 desc.add_options()(
"cuda-grid-size",
256 po::value<size_t>(&system_parameters.cuda_grid_size)
257 ->default_value(system_parameters.cuda_grid_size),
258 "Size of grid to use on NVIDIA GPU.");
259 desc.add_options()(
"optimize-cuda-block-and-grid-sizes",
260 po::value<bool>(&optimize_cuda_block_and_grid_sizes)
261 ->default_value(
false)
262 ->implicit_value(
true));
267 po::value<std::string>(&base_path)->
required()->default_value(
"storage"),
268 "Directory path to HeavyDB data storage (catalogs, raw data, log files, etc).");
269 positional_options.add(
"data", 1);
271 desc.add_options()(
"db-query-list",
272 po::value<std::string>(&db_query_file),
273 "Path to file containing HeavyDB warmup queries.");
276 po::value<bool>(&exit_after_warmup)->default_value(
false)->implicit_value(
true),
277 "Exit after HeavyDB warmup queries.");
278 desc.add_options()(
"dynamic-watchdog-time-limit",
279 po::value<unsigned>(&dynamic_watchdog_time_limit)
280 ->default_value(dynamic_watchdog_time_limit)
281 ->implicit_value(10000),
282 "Dynamic watchdog time limit, in milliseconds.");
283 desc.add_options()(
"enable-data-recycler",
284 po::value<bool>(&enable_data_recycler)
285 ->default_value(enable_data_recycler)
286 ->implicit_value(
true),
287 "Use data recycler.");
288 desc.add_options()(
"use-hashtable-cache",
289 po::value<bool>(&use_hashtable_cache)
290 ->default_value(use_hashtable_cache)
291 ->implicit_value(
true),
292 "Use hashtable cache.");
293 desc.add_options()(
"use-query-resultset-cache",
296 ->implicit_value(
true),
297 "Use query resultset cache.");
298 desc.add_options()(
"use-chunk-metadata-cache",
301 ->implicit_value(
true),
302 "Use chunk metadata cache.");
304 "hashtable-cache-total-bytes",
305 po::value<size_t>(&hashtable_cache_total_bytes)
306 ->default_value(hashtable_cache_total_bytes)
307 ->implicit_value(4294967296),
308 "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
309 desc.add_options()(
"max-cacheable-hashtable-size-bytes",
310 po::value<size_t>(&max_cacheable_hashtable_size_bytes)
311 ->default_value(max_cacheable_hashtable_size_bytes)
312 ->implicit_value(2147483648),
313 "The maximum size of hashtable that is available to cache, in "
314 "bytes (default: 2GB).");
316 "query-resultset-cache-total-bytes",
319 "Size of total memory space for query resultset cache, in bytes (default: 4GB).");
320 desc.add_options()(
"max-query-resultset-size-bytes",
323 "The maximum size of query resultset that is available to cache, in "
324 "bytes (default: 2GB).");
325 desc.add_options()(
"allow-auto-query-resultset-caching",
328 ->implicit_value(
true),
329 "Allow automatic query resultset caching when the size of "
330 "query resultset is smaller or equal to the threshold defined "
331 "by `auto-resultset-caching-threshold-bytes`, in bytes (to "
332 "enable this, query resultset recycler "
333 "should be enabled, default: 1048576 bytes (or 1MB)).");
335 "auto-resultset-caching-threshold-bytes",
338 "A threshold that allows caching query resultset automatically if the size of "
339 "resultset is less than it, in bytes (default: 1MB).");
340 desc.add_options()(
"allow-query-step-skipping",
343 ->implicit_value(
true),
344 "Allow query step skipping when multi-step query has at least "
345 "one cached query resultset.");
346 desc.add_options()(
"enable-debug-timer",
349 ->implicit_value(
true),
350 "Enable debug timer logging.");
351 desc.add_options()(
"enable-dynamic-watchdog",
352 po::value<bool>(&enable_dynamic_watchdog)
353 ->default_value(enable_dynamic_watchdog)
354 ->implicit_value(
true),
355 "Enable dynamic watchdog.");
356 desc.add_options()(
"enable-filter-push-down",
359 ->implicit_value(
true),
360 "Enable filter push down through joins.");
362 "enable-bbox-intersect-hashjoin",
365 ->implicit_value(
true),
366 "Enable the bounding box intersect hash join framework to enable post-filtering of "
367 "pairs of geometries before actually comptuing geometry function.");
368 desc.add_options()(
"enable-hashjoin-many-to-many",
371 ->implicit_value(
true),
372 "Enable the bounding box intersect hash join framework to more "
373 "spatial join operators for pairs of geometry types corresponding "
374 "to many-to-many relationship.");
375 desc.add_options()(
"enable-distance-rangejoin",
378 ->implicit_value(
true),
379 "Enable accelerating point distance joins with a hash table. "
380 "This rewrites ST_Distance when using an upperbound (<= X).");
381 desc.add_options()(
"enable-runtime-query-interrupt",
382 po::value<bool>(&enable_runtime_query_interrupt)
383 ->default_value(enable_runtime_query_interrupt)
384 ->implicit_value(
true),
385 "Enable runtime query interrupt.");
386 desc.add_options()(
"enable-non-kernel-time-query-interrupt",
387 po::value<bool>(&enable_non_kernel_time_query_interrupt)
388 ->default_value(enable_non_kernel_time_query_interrupt)
389 ->implicit_value(
true),
390 "Enable non-kernel time query interrupt.");
391 desc.add_options()(
"pending-query-interrupt-freq",
392 po::value<unsigned>(&pending_query_interrupt_freq)
393 ->default_value(pending_query_interrupt_freq)
394 ->implicit_value(1000),
395 "A frequency of checking the request of pending query "
396 "interrupt from user (in millisecond).");
397 desc.add_options()(
"running-query-interrupt-freq",
398 po::value<double>(&running_query_interrupt_freq)
399 ->default_value(running_query_interrupt_freq)
400 ->implicit_value(0.5),
401 "A frequency of checking the request of running query "
402 "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
403 desc.add_options()(
"use-estimator-result-cache",
404 po::value<bool>(&use_estimator_result_cache)
405 ->default_value(use_estimator_result_cache)
406 ->implicit_value(
true),
407 "Use estimator result cache.");
410 "enable-string-dict-hash-cache",
413 ->implicit_value(
true),
414 "Cache string hash values in the string dictionary server during import.");
416 desc.add_options()(
"enable-thrift-logs",
419 ->implicit_value(
true),
420 "Enable writing messages directly from thrift to stdout/stderr.");
421 desc.add_options()(
"enable-watchdog",
422 po::value<bool>(&enable_watchdog)
423 ->default_value(enable_watchdog)
424 ->implicit_value(
true),
426 desc.add_options()(
"watchdog-max-projected-rows-per-device",
427 po::value<size_t>(&watchdog_max_projected_rows_per_device)
428 ->default_value(watchdog_max_projected_rows_per_device),
429 "Max number of rows allowed to be projected when running a query "
430 "with watchdog enabled.");
432 "preflight-count-query-threshold",
433 po::value<size_t>(&preflight_count_query_threshold)
434 ->default_value(preflight_count_query_threshold),
435 "Threshold to run pre-flight count query which computes # output rows accurately.");
436 desc.add_options()(
"watchdog-none-encoded-string-translation-limit",
437 po::value<size_t>(&watchdog_none_encoded_string_translation_limit)
438 ->default_value(watchdog_none_encoded_string_translation_limit),
439 "Max number of none-encoded strings allowed to be translated "
440 "to dictionary-encoded with watchdog enabled");
441 desc.add_options()(
"filter-push-down-low-frac",
445 "Lower threshold for selectivity of filters that are pushed down.");
446 desc.add_options()(
"filter-push-down-high-frac",
450 "Higher threshold for selectivity of filters that are pushed down.");
451 desc.add_options()(
"filter-push-down-passing-row-ubound",
455 "Upperbound on the number of rows that should pass the filter "
456 "if the selectivity is less than "
457 "the high fraction threshold.");
458 desc.add_options()(
"from-table-reordering",
461 ->implicit_value(
true),
462 "Enable automatic table reordering in FROM clause.");
463 desc.add_options()(
"gpu-buffer-mem-bytes",
464 po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
465 ->default_value(system_parameters.gpu_buffer_mem_bytes),
466 "Size of memory reserved for GPU buffers, in bytes, per GPU.");
467 desc.add_options()(
"gpu-input-mem-limit",
468 po::value<double>(&system_parameters.gpu_input_mem_limit)
469 ->default_value(system_parameters.gpu_input_mem_limit),
470 "Force query to CPU when input data memory usage exceeds this "
471 "percentage of available GPU memory.");
473 "hll-precision-bits",
477 "Number of bits used from the hash value used to specify the bucket number.");
479 desc.add_options()(
"http-port",
480 po::value<int>(&http_port)->default_value(http_port),
481 "HTTP port number.");
482 desc.add_options()(
"http-binary-port",
483 po::value<int>(&http_binary_port)->default_value(http_binary_port),
484 "HTTP binary port number.");
487 "idle-session-duration",
488 po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
489 "Maximum duration of idle session.");
490 desc.add_options()(
"inner-join-fragment-skipping",
493 ->implicit_value(
true),
494 "Enable/disable inner join fragment skipping. This feature is "
495 "considered stable and is enabled by default. This "
496 "parameter will be removed in a future release.");
498 "max-session-duration",
499 po::value<int>(&max_session_duration)->default_value(max_session_duration),
500 "Maximum duration of active session.");
501 desc.add_options()(
"num-sessions",
502 po::value<int>(&system_parameters.num_sessions)
503 ->default_value(system_parameters.num_sessions),
504 "Maximum number of active session.");
505 desc.add_options()(
"null-div-by-zero",
508 ->implicit_value(
true),
509 "Return null on division by zero instead of throwing an exception.");
511 "num-reader-threads",
512 po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
513 "Number of reader threads to use.");
515 "max-import-threads",
517 "Max number of default import threads to use (num hardware threads will be used "
518 "instead if lower). Can be overriden with copy statement threads option).");
520 "bbox-intersect-max-table-size-bytes",
523 "The maximum size in bytes of the hash table for bounding box intersect.");
524 desc.add_options()(
"bbox-intersect-target-entries-per-bin",
527 "The target number of entries per bin for bounding box intersect");
529 desc.add_options()(
"port,p",
530 po::value<int>(&system_parameters.omnisci_server_port)
531 ->default_value(system_parameters.omnisci_server_port),
534 desc.add_options()(
"num-gpus",
535 po::value<int>(&system_parameters.num_gpus)
536 ->default_value(system_parameters.num_gpus),
537 "Number of gpus to use.");
540 po::value<bool>(&read_only)->default_value(read_only)->implicit_value(
true),
541 "Enable read-only mode.");
545 po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
546 "Reduces GPU memory available to the HeavyDB allocator by this amount. Used for "
547 "compiled code cache and ancillary GPU functions and other processes that may also "
548 "be using the GPU concurrent with HeavyDB.");
550 desc.add_options()(
"start-gpu",
551 po::value<int>(&system_parameters.start_gpu)
552 ->default_value(system_parameters.start_gpu),
553 "First gpu to use.");
554 desc.add_options()(
"trivial-loop-join-threshold",
557 ->implicit_value(1000),
558 "The maximum number of rows in the inner table of a loop join "
559 "considered to be trivially small.");
561 "uniform-request-ids-per-thrift-call",
564 ->implicit_value(
true),
565 "If true (default) then assign the same request_id to thrift calls that were "
566 "initiated by the same external thrift call. If false then assign different "
567 "request_ids and log the parent/child relationships.");
568 desc.add_options()(
"verbose",
569 po::value<bool>(&verbose_logging)
570 ->default_value(verbose_logging)
571 ->implicit_value(
true),
572 "Write additional debug log messages to server logs.");
574 "enable-runtime-udf",
575 po::value<bool>(&enable_runtime_udf)
576 ->default_value(enable_runtime_udf)
577 ->implicit_value(
true),
578 "DEPRECATED. Please use `enable-runtime-udfs` instead as this flag will be removed "
579 "in the near future.");
581 "enable-runtime-udfs",
582 po::value<bool>(&enable_runtime_udfs)
583 ->default_value(enable_runtime_udfs)
584 ->implicit_value(
true),
585 "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
586 "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
587 "Compiler server, packaged separately.");
588 desc.add_options()(
"enable-udf-registration-for-all-users",
589 po::value<bool>(&enable_udf_registration_for_all_users)
590 ->default_value(enable_udf_registration_for_all_users)
591 ->implicit_value(
true),
592 "Allow all users, not just superusers, to register runtime "
593 "UDFs/UDTFs. Option only valid if "
594 "`--enable-runtime-udfs` is set to true.");
595 desc.add_options()(
"version,v",
"Print Version Number.");
596 desc.add_options()(
"enable-string-functions",
599 ->implicit_value(
true),
600 "Enable experimental string functions.");
601 desc.add_options()(
"enable-experimental-string-functions",
604 ->implicit_value(
true),
605 "DEPRECATED. String functions are now enabled by default, "
606 "but can still be controlled with --enable-string-functions.");
610 "Enable foreign storage interface.");
612 desc.add_options()(
"enable-legacy-delimited-import",
615 ->implicit_value(
true),
616 "Use legacy importer for delimited sources.");
617 #ifdef ENABLE_IMPORT_PARQUET
618 desc.add_options()(
"enable-legacy-parquet-import",
619 po::value<bool>(&g_enable_legacy_parquet_import)
620 ->default_value(g_enable_legacy_parquet_import)
621 ->implicit_value(
true),
622 "Use legacy importer for parquet sources.");
624 desc.add_options()(
"enable-fsi-regex-import",
627 ->implicit_value(
true),
628 "Use FSI importer for regex parsed sources.");
630 desc.add_options()(
"enable-add-metadata-columns",
633 ->implicit_value(
true),
634 "Enable add_metadata_columns COPY FROM WITH option (Beta).");
636 desc.add_options()(
"disk-cache-path",
637 po::value<std::string>(&disk_cache_config.path),
638 "Specify the path for the disk cache.");
642 po::value<std::string>(&(disk_cache_level))->default_value(
"foreign_tables"),
643 "Specify level of disk cache. Valid options are 'foreign_tables', "
644 "'local_tables', 'none', and 'all'.");
646 desc.add_options()(
"disk-cache-size",
647 po::value<size_t>(&(disk_cache_config.size_limit)),
648 "Specify a maximum size for the disk cache in bytes.");
651 "enable-interoperability",
654 ->implicit_value(
true),
655 "Enable offloading of query portions to an external execution engine.");
656 desc.add_options()(
"enable-union",
659 ->implicit_value(
true),
660 "DEPRECATED. UNION ALL is enabled by default. Please remove "
661 "use of this option, as it may be disabled in the future.");
663 "calcite-service-timeout",
664 po::value<size_t>(&system_parameters.calcite_timeout)
665 ->default_value(system_parameters.calcite_timeout),
666 "Calcite server timeout (milliseconds). Increase this on systems with frequent "
667 "schema changes or when running large numbers of parallel queries.");
668 desc.add_options()(
"calcite-service-keepalive",
669 po::value<size_t>(&system_parameters.calcite_keepalive)
670 ->default_value(system_parameters.calcite_keepalive)
671 ->implicit_value(
true),
672 "Enable keepalive on Calcite connections.");
674 "stringdict-parallelizm",
677 ->implicit_value(
true),
678 "Allow StringDictionary to parallelize loads using multiple threads");
679 desc.add_options()(
"log-user-id",
682 ->implicit_value(
true),
683 "Log userId integer in place of the userName (when available).");
684 desc.add_options()(
"log-user-origin",
685 po::value<bool>(&log_user_origin)
686 ->default_value(log_user_origin)
687 ->implicit_value(
true),
688 "Lookup the origin of inbound connections by IP address/DNS "
689 "name, and print this information as part of stdlog.");
690 desc.add_options()(
"allowed-import-paths",
691 po::value<std::string>(&allowed_import_paths),
692 "List of allowed root paths that can be used in import operations.");
693 desc.add_options()(
"allowed-export-paths",
694 po::value<std::string>(&allowed_export_paths),
695 "List of allowed root paths that can be used in export operations.");
696 desc.add_options()(
"enable-system-tables",
699 ->implicit_value(
true),
700 "Enable use of system tables.");
701 desc.add_options()(
"enable-table-functions",
704 ->implicit_value(
true),
705 "Enable system table functions support.");
706 desc.add_options()(
"enable-ml-functions",
709 ->implicit_value(
true),
710 "Enable ML support.");
711 desc.add_options()(
"restrict-ml-model-metadata-to-superusers",
714 ->implicit_value(
true),
715 "RESTRICT SHOW MODEL and SHOW MODEL DETAILS to superusers only.");
716 desc.add_options()(
"enable-logs-system-tables",
719 ->implicit_value(
true),
720 "Enable use of logs system tables.");
722 "logs-system-tables-max-files-count",
725 "Maximum number of log files that will be processed by each logs system table.");
726 #ifdef ENABLE_MEMKIND
727 desc.add_options()(
"enable-tiered-cpu-mem",
728 po::value<bool>(&g_enable_tiered_cpu_mem)
729 ->default_value(g_enable_tiered_cpu_mem)
730 ->implicit_value(
true),
731 "Enable additional tiers of CPU memory (PMEM, etc...)");
732 desc.add_options()(
"pmem-size", po::value<size_t>(&
g_pmem_size)->default_value(0));
733 desc.add_options()(
"pmem-path", po::value<std::string>(&
g_pmem_path));
736 desc.add(log_options_.get_options());
740 po::options_description& desc = developer_desc_;
742 desc.add_options()(
"dev-options",
"Print internal developer options.");
744 "enable-calcite-view-optimize",
745 po::value<bool>(&system_parameters.enable_calcite_view_optimize)
746 ->default_value(system_parameters.enable_calcite_view_optimize)
747 ->implicit_value(
true),
748 "Enable additional calcite (query plan) optimizations when a view is part of the "
750 desc.add_options()(
"enable-columnar-output",
753 ->implicit_value(
true),
754 "Enable columnar output for intermediate/final query steps.");
755 desc.add_options()(
"enable-left-join-filter-hoisting",
758 ->implicit_value(
true),
759 "Enable hoisting left hand side filters through left joins.");
760 desc.add_options()(
"optimize-row-init",
763 ->implicit_value(
true),
764 "Optimize row initialization.");
765 desc.add_options()(
"enable-legacy-syntax",
766 po::value<bool>(&enable_legacy_syntax)
767 ->default_value(enable_legacy_syntax)
768 ->implicit_value(
true),
769 "Enable legacy syntax.");
772 po::value<bool>(&allow_multifrag)
773 ->default_value(allow_multifrag)
774 ->implicit_value(
true),
775 "Enable execution over multiple fragments in a single round-trip to GPU.");
776 desc.add_options()(
"enable-lazy-fetch",
779 ->implicit_value(
true),
780 "Enable lazy fetch columns in query results.");
781 desc.add_options()(
"enable-shared-mem-group-by",
784 ->implicit_value(
true),
785 "Enable using GPU shared memory for some GROUP BY queries.");
786 desc.add_options()(
"num-executors",
787 po::value<int>(&system_parameters.num_executors)
788 ->default_value(system_parameters.num_executors),
789 "Number of executors to run in parallel.");
791 "num-tuple-threshold-switch-to-baseline",
794 ->implicit_value(100000),
795 "Control a threshold to switch perfect hash join to baseline hash join by "
796 "comparing a hash entry range of the join column to the input table cardinality."
797 "This condition checks the following: |INPUT_TABLE| < {THIS_THRESHOLD}"
798 "We switch hash table layout when this condition and the condition related to "
799 "\'col-range-to-num-hash-entries-threshold-switch-to-baseline\' are satisfied "
802 "ratio-num-hash-entry-to-num-tuple-switch-to-baseline",
805 ->implicit_value(100),
806 "Control a threshold to switch perfect hash join to baseline hash join by "
807 "comparing a hash entry range of the join column to the input table cardinality."
808 "This condition checks the following: HASH_ENTRY_RANGE / |INPUT_TABLE| < "
810 "We switch hash table layout when this condition and the condition related to "
811 "\'num-tuple-threshold-switch-to-baseline\' are satisfied together.");
813 "gpu-shared-mem-threshold",
815 "GPU shared memory threshold (in bytes). If query requires larger buffers than "
816 "this threshold, we disable those optimizations. 0 (default) means no static cap.");
818 "enable-shared-mem-grouped-non-count-agg",
821 ->implicit_value(
true),
822 "Enable using GPU shared memory for grouped non-count aggregate queries.");
823 desc.add_options()(
"enable-shared-mem-non-grouped-agg",
826 ->implicit_value(
true),
827 "Enable using GPU shared memory for non-grouped aggregate queries.");
828 desc.add_options()(
"enable-direct-columnarization",
831 ->implicit_value(
true),
832 "Enables/disables a more optimized columnarization method "
833 "for intermediate steps in multi-step queries.");
835 "offset-device-by-table-id",
838 ->implicit_value(
true),
839 "Enables/disables offseting the chosen device ID by the table ID for a given "
840 "fragment. This improves balance of fragments across GPUs.");
841 desc.add_options()(
"enable-window-functions",
844 ->implicit_value(
true),
845 "Enable window function support.");
846 desc.add_options()(
"enable-parallel-window-partition-compute",
849 ->implicit_value(
true),
850 "Enable parallel window function partition computation.");
851 desc.add_options()(
"enable-parallel-window-partition-sort",
854 ->implicit_value(
true),
855 "Enable parallel window function partition sorting.");
857 "window-function-frame-aggregation-tree-fanout",
859 "A tree fanout for aggregation tree used to compute aggregation over "
861 desc.add_options()(
"enable-dev-table-functions",
864 ->implicit_value(
true),
865 "Enable dev (test or alpha) table functions. Also "
866 "requires --enable-table-functions to be turned on");
868 desc.add_options()(
"enable-geo-ops-on-uncompressed-coords",
871 ->implicit_value(
true),
872 "Enable faster geo operations on uncompressed coords");
875 po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(
true),
876 "Enable runtime debugger support for the JIT. Note that this flag is "
878 "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
879 "`/tmp/mapdquery`.");
882 po::value<bool>(&intel_jit_profile)
883 ->default_value(intel_jit_profile)
884 ->implicit_value(
true),
885 "Enable runtime support for the JIT code profiling using Intel VTune.");
887 "enable-cpu-sub-tasks",
890 ->implicit_value(
true),
891 "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
892 "load balance and decrease reduction overhead.");
896 "Set CPU sub-task size in rows.");
900 "Set max CPU concurrent threads. Values <= 0 will use default of 2X the number of "
901 "hardware threads.");
903 "skip-intermediate-count",
906 ->implicit_value(
true),
907 "Skip pre-flight counts for intermediate projections with no filters.");
908 desc.add_options()(
"strip-join-covered-quals",
911 ->implicit_value(
true),
912 "Remove quals from the filtered count if they are covered by a "
913 "join condition (currently only ST_Contains).");
915 desc.add_options()(
"min-cpu-slab-size",
916 po::value<size_t>(&system_parameters.min_cpu_slab_size)
917 ->default_value(system_parameters.min_cpu_slab_size),
918 "Min slab size (size of memory allocations) for CPU buffer pool.");
921 po::value<size_t>(&system_parameters.max_cpu_slab_size)
922 ->default_value(system_parameters.max_cpu_slab_size),
923 "Max CPU buffer pool slab size (size of memory allocations). Note if "
924 "there is not enough free memory to accomodate the target slab size, smaller "
925 "slabs will be allocated, down to the minimum size specified by "
926 "min-cpu-slab-size.");
927 desc.add_options()(
"min-gpu-slab-size",
928 po::value<size_t>(&system_parameters.min_gpu_slab_size)
929 ->default_value(system_parameters.min_gpu_slab_size),
930 "Min slab size (size of memory allocations) for GPU buffer pools.");
933 po::value<size_t>(&system_parameters.max_gpu_slab_size)
934 ->default_value(system_parameters.max_gpu_slab_size),
935 "Max GPU buffer pool slab size (size of memory allocations). Note if "
936 "there is not enough free memory to accomodate the target slab size, smaller "
937 "slabs will be allocated, down to the minimum size speified by "
938 "min-gpu-slab-size.");
941 "max-output-projection-allocation-bytes",
944 "Maximum allocation size for a fixed output buffer allocation for projection "
945 "queries with no pre-flight count. Default is the maximum slab size (sizes "
947 "than the maximum slab size have no affect). Requires bump allocator.");
949 "min-output-projection-allocation-bytes",
952 "Minimum allocation size for a fixed output buffer allocation for projection "
953 "queries with no pre-flight count. If an allocation of this size cannot be "
954 "obtained, the query will be retried with different execution parameters and/or "
956 "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
957 desc.add_options()(
"enable-bump-allocator",
960 ->implicit_value(
true),
961 "Enable the bump allocator for projection queries on "
962 "GPU. The bump allocator will "
963 "allocate a fixed size buffer for each query, track the "
964 "number of rows passing the "
965 "kernel during query execution, and copy back only the "
966 "rows that passed the kernel "
967 "to CPU after execution. When disabled, pre-flight "
968 "count queries are used to size "
969 "the output buffer for projection queries.");
971 "code-cache-eviction-percent",
974 "Percentage of the GPU code cache to evict if an out of memory error is "
975 "encountered while attempting to place generated code on the GPU.");
977 desc.add_options()(
"ssl-cert",
978 po::value<std::string>(&system_parameters.ssl_cert_file)
979 ->default_value(std::string(
"")),
980 "SSL Validated public certficate.");
983 "gpu-code-cache-max-size-in-bytes",
986 "The maximum size of cached compiled codes for the gpu code cache in bytes.");
988 desc.add_options()(
"ssl-private-key",
989 po::value<std::string>(&system_parameters.ssl_key_file)
990 ->default_value(std::string(
"")),
991 "SSL private key file.");
994 desc.add_options()(
"ssl-trust-store",
995 po::value<std::string>(&system_parameters.ssl_trust_store)
996 ->default_value(std::string(
"")),
997 "SSL public CA certifcates (java trust store) to validate "
998 "TLS connections (passed through to the Calcite server).");
1001 "ssl-trust-password",
1002 po::value<std::string>(&system_parameters.ssl_trust_password)
1003 ->default_value(std::string(
"")),
1004 "SSL password for java trust store provided via --ssl-trust-store parameter.");
1008 po::value<std::string>(&system_parameters.ssl_trust_ca_file)
1009 ->default_value(std::string(
"")),
1010 "SSL public CA certificates to validate TLS connection(as a client).");
1013 "ssl-trust-ca-server",
1014 po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string(
"")),
1015 "SSL public CA certificates to validate TLS connection(as a server).");
1017 desc.add_options()(
"ssl-keystore",
1018 po::value<std::string>(&system_parameters.ssl_keystore)
1019 ->default_value(std::string(
"")),
1020 "SSL server credentials as a java key store (passed "
1021 "through to the Calcite server).");
1023 desc.add_options()(
"ssl-keystore-password",
1024 po::value<std::string>(&system_parameters.ssl_keystore_password)
1025 ->default_value(std::string(
"")),
1026 "SSL password for java keystore, provide by via --ssl-keystore.");
1030 po::value<std::string>(&udf_file_name),
1031 "Load user defined extension functions from this file at startup. The file is "
1032 "expected to be a C/C++ file with extension .cpp.");
1034 desc.add_options()(
"udf-compiler-path",
1035 po::value<std::string>(&udf_compiler_path),
1036 "Provide absolute path to clang++ used in udf compilation.");
1038 desc.add_options()(
"udf-compiler-options",
1039 po::value<std::vector<std::string>>(&udf_compiler_options),
1040 "Specify compiler options to tailor udf compilation.");
1043 desc.add_options()(
"libgeos-so-filename",
1044 po::value<std::string>(&libgeos_so_filename),
1045 "Specify libgeos shared object filename to be used for "
1046 "geos-backed geo opertations.");
1049 "large-ndv-threshold",
1052 "large-ndv-multiplier",
1054 desc.add_options()(
"approx_quantile_buffer",
1057 desc.add_options()(
"approx_quantile_centroids",
1061 "bitmap-memory-limit",
1063 "Limit for count distinct bitmap memory use. The limit is computed by taking the "
1064 "size of the group by buffer (entry count in Query Memory Descriptor) and "
1065 "multiplying it by the number of count distinct expression and the size of bitmap "
1066 "required for each. For approx_count_distinct this is typically 8192 bytes.");
1068 "enable-filter-function",
1071 ->implicit_value(
true),
1072 "Enable the filter function protection feature for the SQL JIT compiler. "
1073 "Normally should be on but techs might want to disable for troubleshooting.");
1075 "enable-idp-temporary-users",
1078 ->implicit_value(
true),
1079 "Enable temporary users for SAML and LDAP logins on read-only servers. "
1080 "Normally should be on but techs might want to disable for troubleshooting.");
1081 desc.add_options()(
"enable-foreign-table-scheduled-refresh",
1084 ->implicit_value(
true),
1085 "Enable scheduled foreign table refresh.");
1087 "enable-seconds-refresh-interval",
1090 ->implicit_value(
true),
1091 "Enable foreign table seconds refresh interval for testing purposes.");
1092 desc.add_options()(
"enable-auto-metadata-update",
1095 ->implicit_value(
true),
1096 "Enable automatic metadata update.");
1100 "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
1101 "parallelTop() to sort.");
1105 "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
1108 "streaming-top-n-max",
1110 "The maximum number of rows allowing streaming top-N sorting.");
1111 desc.add_options()(
"vacuum-min-selectivity",
1114 "Minimum selectivity for automatic vacuuming. "
1115 "This specifies the percentage (with a value of 0 "
1116 "implying 0% and a value of 1 implying 100%) of "
1117 "deleted rows in a fragment at which to perform "
1118 "automatic vacuuming. A number greater than 1 can "
1119 "be used to disable automatic vacuuming.");
1120 desc.add_options()(
"enable-automatic-ir-metadata",
1123 ->implicit_value(
true),
1124 "Enable automatic IR metadata (debug builds only).");
1128 "The maximum number of characters that a log message can has. If the log message "
1129 "is longer than this, we only record \'g_max_log_message_length\' characters.");
1131 "estimator-failure-max-groupby-size",
1134 "Maximum size of the groupby buffer if the estimator fails. By default we use the "
1135 "number of tuples in the table up to this value.");
1136 desc.add_options()(
"columnar-large-projections",
1139 ->implicit_value(
true),
1140 "Prefer columnar output if projection size is >= "
1141 "threshold set by --columnar-large-projections-threshold "
1142 "(default 1,000,000 rows).");
1144 "columnar-large-projections-threshold",
1147 "Threshold (in minimum number of rows) to prefer columnar output for projections. "
1148 "Requires --columnar-large-projections to be set.");
1151 "allow-memory-status-log",
1154 "Allow CPU (and GPU if necessary) memory status before/after the query execution.");
1157 "allow-query-step-cpu-retry",
1160 ->implicit_value(
true),
1161 R
"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
1162 desc.add_options()("enable-http-binary-server",
1165 ->implicit_value(
true),
1166 "Enable binary over HTTP Thrift server");
1168 desc.add_options()(
"enable-query-engine-cuda-streams",
1171 ->implicit_value(
true),
1172 "Enable Query Engine CUDA streams");
1175 "allow-invalid-literal-buffer-reads",
1178 ->implicit_value(
true),
1179 "For backwards compatibility. Enabling may cause invalid query results.");
1181 #ifdef HAVE_TORCH_TFS
1182 desc.add_options()(
"torch-lib-path",
1183 po::value<std::string>(&torch_lib_path),
1184 "Absolute path to custom LibTorch shared library location to be "
1185 "loaded at runtime. (If not provided, the library will be searched "
1186 "for in the system's default library path.)");
1194 std::stringstream ss;
1196 while (std::getline(in, line)) {
1198 if (line ==
"[web]" || line ==
"[iq]") {
1206 if (!filename.empty()) {
1207 boost::algorithm::trim_if(filename, boost::is_any_of(
"\"'"));
1208 if (!boost::filesystem::exists(filename)) {
1209 std::cerr << desc <<
" " << filename <<
" does not exist." << std::endl;
1217 if (!filename.empty()) {
1225 boost::algorithm::trim_if(base_path, boost::is_any_of(
"\"'"));
1226 if (!boost::filesystem::exists(base_path)) {
1227 throw std::runtime_error(
"HeavyDB base directory does not exist at " + base_path);
1232 boost::algorithm::trim_if(base_path, boost::is_any_of(
"\"'"));
1234 if (!boost::filesystem::exists(
data_path)) {
1235 throw std::runtime_error(
"HeavyDB data directory does not exist at '" + base_path +
1246 auto exe_filename = boost::filesystem::path(exe_name).filename().string();
1247 const std::string lock_file =
1248 (boost::filesystem::path(base_path) / std::string(exe_filename +
"_pid.lck"))
1252 VLOG(1) <<
"taking [" << lock_file <<
"] read+write lock until process exit";
1254 VLOG(1) <<
"taking [" << lock_file <<
"] read-only lock until process exit";
1260 throw std::runtime_error(
"failed to open lockfile: " + lock_file +
": " +
1261 std::string(strerror(errno)) +
" (" +
1266 memset(&fl, 0,
sizeof(fl));
1268 fl.l_whence = SEEK_SET;
1277 if (ret == -1 && (errno == EACCES || errno == EAGAIN)) {
1279 throw std::runtime_error(
1280 "another HeavyDB server instance is already using data directory: " +
1282 }
else if (ret == -1) {
1283 auto errno0 = errno;
1285 throw std::runtime_error(
"failed to lock lockfile: " + lock_file +
": " +
1286 std::string(strerror(errno0)) +
" (" +
1292 auto errno0 = errno;
1294 throw std::runtime_error(
"failed to truncate lockfile: " + lock_file +
": " +
1295 std::string(strerror(errno0)) +
" (" +
1299 auto errno0 = errno;
1301 throw std::runtime_error(
"failed to write lockfile: " + lock_file +
": " +
1302 std::string(strerror(errno0)) +
" (" +
1311 boost::algorithm::trim_if(db_query_file, boost::is_any_of(
"\"'"));
1312 if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
1313 throw std::runtime_error(
"File containing DB queries " + db_query_file +
1314 " does not exist.");
1316 const auto db_file = boost::filesystem::path(base_path) /
1318 if (!boost::filesystem::exists(db_file)) {
1320 const auto db_file =
1322 if (!boost::filesystem::exists(db_file)) {
1324 " does not exist.");
1328 if (license_path.length() == 0) {
1333 LOG(
INFO) <<
"HeavyDB started with data directory at '" << base_path <<
"'";
1334 if (vm.count(
"license-path")) {
1335 LOG(
INFO) <<
"License key path set to '" << license_path <<
"'";
1338 LOG(
INFO) <<
" Server read-only mode is " << read_only <<
" (--read-only)";
1341 <<
" (--multi-instance)";
1344 throw std::runtime_error(
1345 "You may not use the --read-only and --multi-instance configuration flags "
1349 LOG(
WARNING) <<
" Allowing invalid reads from the literal buffer. May cause invalid "
1350 "query results! (--allow-invalid-literal-buffer-reads)";
1352 #if DISABLE_CONCURRENCY
1353 LOG(
INFO) <<
" Threading layer: serial";
1355 LOG(
INFO) <<
" Threading layer: TBB";
1357 LOG(
INFO) <<
" Threading layer: std";
1359 LOG(
INFO) <<
" Watchdog is set to " << enable_watchdog;
1360 LOG(
INFO) <<
" Dynamic Watchdog is set to " << enable_dynamic_watchdog;
1361 if (enable_dynamic_watchdog) {
1362 LOG(
INFO) <<
" Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
1364 LOG(
INFO) <<
" Runtime query interrupt is set to " << enable_runtime_query_interrupt;
1365 if (enable_runtime_query_interrupt) {
1366 LOG(
INFO) <<
" A frequency of checking pending query interrupt request is set to "
1367 << pending_query_interrupt_freq <<
" (in ms.)";
1368 LOG(
INFO) <<
" A frequency of checking running query interrupt request is set to "
1369 << running_query_interrupt_freq <<
" (0.0 ~ 1.0)";
1371 LOG(
INFO) <<
" Non-kernel time query interrupt is set to "
1372 << enable_non_kernel_time_query_interrupt;
1376 LOG(
INFO) <<
" Maximum idle session duration " << idle_session_duration;
1377 LOG(
INFO) <<
" Maximum active session duration " << max_session_duration;
1378 LOG(
INFO) <<
" Maximum number of sessions " << system_parameters.num_sessions;
1381 #ifdef ENABLE_IMPORT_PARQUET
1382 LOG(
INFO) <<
"Legacy parquet import is set to " << g_enable_legacy_parquet_import;
1386 LOG(
INFO) <<
"Allowed import paths is set to " << allowed_import_paths;
1387 LOG(
INFO) <<
"Allowed export paths is set to " << allowed_export_paths;
1389 base_path, allowed_import_paths, allowed_export_paths);
1403 #ifdef ENABLE_IMPORT_PARQUET
1404 !g_enable_legacy_parquet_import ||
1409 LOG(
INFO) <<
"FSI has been enabled as a side effect of enabling non-legacy import.";
1412 const bool executor_resource_mgr_cpu_result_mem_ratio_flag_set =
1413 vm[
"executor-cpu-result-mem-ratio"].defaulted() ?
false :
true;
1414 const bool executor_resource_mgr_cpu_result_mem_bytes_flag_set =
1415 vm[
"executor-cpu-result-mem-bytes"].defaulted() ?
false :
true;
1416 const bool executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set =
1417 vm[
"executor-per-query-max-cpu-threads-ratio"].defaulted() ?
false :
true;
1418 const bool executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set =
1419 vm[
"executor-per-query-max-cpu-result-mem-ratio"].defaulted() ?
false :
true;
1420 const bool executor_resource_mgr_cpu_kernel_concurrency_flag_set =
1421 vm[
"allow-cpu-kernel-concurrency"].defaulted() ?
false :
true;
1422 const bool executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set =
1423 vm[
"allow-cpu-gpu-kernel-concurrency"].defaulted() ?
false :
true;
1424 const bool executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set =
1425 vm[
"allow-cpu-thread-oversubscription-concurrency"].defaulted() ?
false :
true;
1426 const bool executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set =
1427 vm[
"allow-cpu-result-mem-oversubscription-concurrency"].defaulted() ?
false :
true;
1430 if (executor_resource_mgr_cpu_result_mem_bytes_flag_set) {
1431 throw std::runtime_error(
1432 "Cannot set executor-cpu-result-mem-bytes without enable-executor-resource-mgr "
1435 if (executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1436 throw std::runtime_error(
1437 "Cannot set executor-cpu-result-mem-ratio without enable-executor-resource-mgr "
1440 if (executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set) {
1441 throw std::runtime_error(
1442 "Cannot set executor-per-query-max-cpu-slots-ratio without "
1443 "enable-executor-resource-mgr option enabled");
1445 if (executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set) {
1446 throw std::runtime_error(
1447 "Cannot set executor-per-query-max-cpu-result-mem-ratio without "
1448 "enable-executor-resource-mgr option enabled");
1450 if (executor_resource_mgr_cpu_kernel_concurrency_flag_set) {
1451 throw std::runtime_error(
1452 "Cannot set allow-cpu-kernel-concurrency without "
1453 "enable-executor-resource-mgr option enabled");
1455 if (executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set) {
1456 throw std::runtime_error(
1457 "Cannot set allow-cpu-gpu-kernel-concurrency without "
1458 "enable-executor-resource-mgr option enabled");
1460 if (executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set) {
1461 throw std::runtime_error(
1462 "Cannot set allow-cpu-thread-oversubscription-concurrency without "
1463 "enable-executor-resource-mgr option enabled");
1465 if (executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set) {
1466 throw std::runtime_error(
1467 "Cannot set allow-cpu-thread-result-mem-concurrency without "
1468 "enable-executor-resource-mgr option enabled");
1471 if (executor_resource_mgr_cpu_result_mem_bytes_flag_set &&
1472 executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1473 throw std::runtime_error(
1474 "Setting both executor-cpu-result-mem-bytes and executor-cpu-result-mem-ratio is "
1475 "not allowed as the flags are mutually exclusive.");
1480 throw std::runtime_error(
1481 "allow-cpu-thread-oversubscription-concurrency cannot be set without at least "
1482 "one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency being "
1486 throw std::runtime_error(
1487 "allow-cpu-result-mem-oversubscription-concurrency cannot be set without at "
1488 "least one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency "
1494 throw std::runtime_error(
1495 "Invalid value for executor-cpu-result-mem-ratio, must be greater than 0.");
1498 throw std::runtime_error(
1499 "Invalid value for executor-per-query-max-cpu-slots-ratio, must be greater than "
1503 throw std::runtime_error(
1504 "Invalid value for executor-per-query-max-cpu-result-mem-ratio, must be greater "
1510 throw std::runtime_error(
1511 "Invalid value for executor-max-available-resource-use-ratio, must be greater "
1513 "0. and less than or equal to 1.0");
1516 #ifndef HAVE_SYSTEM_TFS
1519 LOG(
INFO) <<
"System table functions turned off due to HeavyDB being built without "
1520 "table function support.";
1522 #endif // HAVE_SYSTEM_TFS
1525 LOG(
INFO) <<
"ML functions turned off due to `--enable-table-functions` being set to "
1526 "false. Please enable table functions to use ML functionality.";
1529 if (disk_cache_level ==
"foreign_tables") {
1532 LOG(
INFO) <<
"Disk cache enabled for foreign tables only";
1534 LOG(
INFO) <<
"Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1535 "disk cache disabled";
1537 }
else if (disk_cache_level ==
"all") {
1539 LOG(
INFO) <<
"Disk cache enabled for all tables";
1540 }
else if (disk_cache_level ==
"local_tables") {
1542 LOG(
INFO) <<
"Disk cache enabled for non-FSI tables";
1543 }
else if (disk_cache_level ==
"none") {
1545 LOG(
INFO) <<
"Disk cache disabled";
1547 throw std::runtime_error{
1548 "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1549 ". Valid options are 'foreign_tables', "
1550 "'local_tables', 'none', and 'all'."};
1554 throw std::runtime_error{
"disk-cache-size must be at least " +
1558 if (disk_cache_config.path.empty()) {
1577 throw std::runtime_error{
"vacuum-min-selectivity cannot be less than 0."};
1587 LOG(
INFO) <<
"FSI has been enabled as a side effect of enabling system tables";
1594 throw std::runtime_error{
1595 "Invalid value provided for the \"logs-system-tables-max-files-count\" "
1596 "option. Value must be greater than 0."};
1598 LOG(
INFO) <<
"Maximum number of logs system table files set to "
1601 #ifdef ENABLE_MEMKIND
1602 if (g_enable_tiered_cpu_mem) {
1604 throw std::runtime_error{
"pmem-path must be set to use tiered cpu memory"};
1607 throw std::runtime_error{
"pmem-size must be set to use tiered cpu memory"};
1609 if (!std::filesystem::exists(
g_pmem_path.c_str())) {
1610 throw std::runtime_error{
"path to PMem directory (" +
g_pmem_path +
1611 ") does not exist."};
1618 const bool enable_runtime_udfs,
1619 const bool enable_udf_registration_for_all_users) {
1620 return enable_runtime_udfs
1621 ? (enable_udf_registration_for_all_users
1630 char const*
const* argv,
1631 const bool should_init_logging) {
1632 po::options_description all_desc(
"All options");
1633 all_desc.add(help_desc_).add(developer_desc_);
1636 po::store(po::command_line_parser(argc, argv)
1638 .positional(positional_options)
1643 if (vm.count(
"help")) {
1644 std::cerr <<
"Usage: heavydb <data directory path> [-p <port number>] "
1645 "[--http-port <http port number>] [--flush-log] [--version|-v]"
1648 std::cout << help_desc_ << std::endl;
1651 if (vm.count(
"dev-options")) {
1652 std::cout <<
"Usage: heavydb <data directory path> [-p <port number>] "
1653 "[--http-port <http port number>] [--flush-log] [--version|-v]"
1656 std::cout << developer_desc_ << std::endl;
1659 if (vm.count(
"version")) {
1660 std::cout <<
"HeavyDB Version: " <<
MAPD_RELEASE << std::endl;
1664 if (vm.count(
"config")) {
1665 std::ifstream settings_file(system_parameters.config_file);
1669 po::store(po::parse_config_file(sanitized_settings, all_desc,
false), vm);
1671 settings_file.close();
1676 <<
"The enable-union option is DEPRECATED and is now enabled by default. "
1677 "Please remove use of this option, as it may be disabled in the future."
1682 boost::algorithm::trim_if(base_path, boost::is_any_of(
"\"'"));
1683 if (!boost::filesystem::exists(base_path)) {
1684 std::cerr <<
"Storage folder (--data) not found: " << base_path << std::endl;
1685 std::cerr <<
"Need to run initheavy before heavydb." << std::endl;
1691 if (!boost::filesystem::exists(lockfiles_path)) {
1692 if (!boost::filesystem::create_directory(lockfiles_path)) {
1693 std::cerr <<
"Cannot create " + shared::kLockfilesDirectoryName +
1694 " subdirectory under "
1695 << base_path << std::endl;
1700 if (!boost::filesystem::exists(lockfiles_path2)) {
1701 if (!boost::filesystem::create_directory(lockfiles_path2)) {
1702 std::cerr <<
"Cannot create " + shared::kLockfilesDirectoryName +
"/" +
1703 shared::kCatalogDirectoryName +
" subdirectory under "
1704 << base_path << std::endl;
1709 if (!boost::filesystem::exists(lockfiles_path3)) {
1710 if (!boost::filesystem::create_directory(lockfiles_path3)) {
1711 std::cerr <<
"Cannot create " + shared::kLockfilesDirectoryName +
"/" +
1712 shared::kDataDirectoryName +
" subdirectory under "
1713 << base_path << std::endl;
1722 if (!vm[
"enable-runtime-udf"].defaulted()) {
1723 if (!vm[
"enable-runtime-udfs"].defaulted()) {
1724 std::cerr <<
"Usage Error: Both enable-runtime-udf and enable-runtime-udfs "
1725 "specified. Please remove use of the enable-runtime-udfs flag, "
1726 "as it will be deprecated in the future."
1730 enable_runtime_udfs = enable_runtime_udf;
1731 std::cerr <<
"The enable-runtime-udf flag has been deprecated and replaced "
1732 "with enable-runtime-udfs. Please remove use of this option "
1733 "as it will be disabled in the future."
1737 system_parameters.runtime_udf_registration_policy =
1739 enable_udf_registration_for_all_users);
1741 if (should_init_logging) {
1752 "ssl trust store")) {
1767 watchdog_none_encoded_string_translation_limit;
1796 }
catch (po::error& e) {
1797 std::cerr <<
"Usage Error: " << e.what() << std::endl;
1801 if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1802 std::cerr <<
"hll-precision-bits must be between 1 and 16." << std::endl;
1807 LOG(
INFO) <<
" From clause table reordering is disabled";
1811 LOG(
INFO) <<
" Filter push down for JOIN is enabled";
1814 if (vm.count(
"udf")) {
1815 boost::algorithm::trim_if(udf_file_name, boost::is_any_of(
"\"'"));
1817 if (!boost::filesystem::exists(udf_file_name)) {
1818 LOG(
ERROR) <<
" User defined function file " << udf_file_name <<
" does not exist.";
1822 LOG(
INFO) <<
" User provided extension functions loaded from " << udf_file_name;
1825 if (vm.count(
"udf-compiler-path")) {
1826 boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of(
"\"'"));
1829 #ifdef HAVE_TORCH_TFS
1830 if (vm.count(
"torch-lib-path")) {
1831 boost::algorithm::trim_if(torch_lib_path, boost::is_any_of(
"\"'"));
1835 auto trim_string = [](std::string& s) {
1836 boost::algorithm::trim_if(s, boost::is_any_of(
"\"'"));
1839 if (vm.count(
"udf-compiler-options")) {
1840 std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1843 boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of(
"\"'"));
1844 boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of(
"\"'"));
1845 boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of(
"\"'"));
1846 boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1847 boost::is_any_of(
"\"'"));
1849 if (!system_parameters.ha_group_id.empty()) {
1850 LOG(
INFO) <<
" HA group id " << system_parameters.ha_group_id;
1851 if (system_parameters.ha_unique_server_id.empty()) {
1852 LOG(
ERROR) <<
"Starting server in HA mode --ha-unique-server-id must be set ";
1855 LOG(
INFO) <<
" HA unique server id " << system_parameters.ha_unique_server_id;
1857 if (system_parameters.ha_brokers.empty()) {
1858 LOG(
ERROR) <<
"Starting server in HA mode --ha-brokers must be set ";
1861 LOG(
INFO) <<
" HA brokers " << system_parameters.ha_brokers;
1863 if (system_parameters.ha_shared_data.empty()) {
1864 LOG(
ERROR) <<
"Starting server in HA mode --ha-shared-data must be set ";
1867 LOG(
INFO) <<
" HA shared data is " << system_parameters.ha_shared_data;
1871 boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of(
"\"'"));
1872 if (!system_parameters.master_address.empty()) {
1874 LOG(
ERROR) <<
"The master-address setting is only allowed in read-only mode";
1877 LOG(
INFO) <<
" Master Address is " << system_parameters.master_address;
1878 LOG(
INFO) <<
" Master Port is " << system_parameters.master_port;
1883 <<
")." << std::endl;
1889 if (system_parameters.cuda_block_size) {
1890 LOG(
INFO) <<
" cuda block size " << system_parameters.cuda_block_size;
1892 if (system_parameters.cuda_grid_size) {
1893 LOG(
INFO) <<
" cuda grid size " << system_parameters.cuda_grid_size;
1895 LOG(
INFO) <<
" Min CPU buffer pool slab size (in bytes) "
1896 << system_parameters.min_cpu_slab_size;
1897 LOG(
INFO) <<
" Max CPU buffer pool slab size (in bytes) "
1898 << system_parameters.max_cpu_slab_size;
1899 LOG(
INFO) <<
" Min GPU buffer pool slab size (in bytes) "
1900 << system_parameters.min_gpu_slab_size;
1901 LOG(
INFO) <<
" Max GPU buffer pool slab size (in bytes) "
1902 << system_parameters.max_gpu_slab_size;
1903 LOG(
INFO) <<
" calcite JVM max memory (in MB) " << system_parameters.calcite_max_mem;
1904 LOG(
INFO) <<
" HeavyDB Server Port " << system_parameters.omnisci_server_port;
1905 LOG(
INFO) <<
" HeavyDB Calcite Port " << system_parameters.calcite_port;
1906 LOG(
INFO) <<
" Enable Calcite view optimize "
1907 << system_parameters.enable_calcite_view_optimize;
1908 LOG(
INFO) <<
" Allow Local Auth Fallback: "
1909 << (authMetadata.allowLocalAuthFallback ?
"enabled" :
"disabled");
1913 LOG(
INFO) <<
" Enable Data Recycler: "
1916 LOG(
INFO) <<
" \t Use hashtable cache: "
1919 LOG(
INFO) <<
" \t\t Total amount of bytes that hashtable cache keeps: "
1921 LOG(
INFO) <<
" \t\t Per-hashtable size limit: "
1924 LOG(
INFO) <<
" \t Use query resultset cache: "
1927 LOG(
INFO) <<
" \t\t Total amount of bytes that query resultset cache keeps: "
1929 LOG(
INFO) <<
" \t\t Per-query resultset size limit: "
1932 LOG(
INFO) <<
" \t\t Use auto query resultset caching: "
1935 LOG(
INFO) <<
" \t\t\t The maximum bytes of a query resultset which is "
1936 "automatically cached: "
1939 LOG(
INFO) <<
" \t\t Use query step skipping: "
1941 LOG(
INFO) <<
" \t Use chunk metadata cache: "
1944 LOG(
INFO) <<
"Executor Resource Manager: "
1947 LOG(
INFO) <<
"\tCPU kernel concurrency: "
1950 LOG(
INFO) <<
"\tCPU-GPU kernel concurrency: "
1954 LOG(
INFO) <<
"\tCPU result set reserved allocation: "
1957 LOG(
INFO) <<
"\tCPU result set reserved ratio of CPU buffer pool size: "
1960 LOG(
INFO) <<
"\tPer-query max CPU threads ratio: "
1962 LOG(
INFO) <<
"\tPer-query max CPU result memory ratio of allocated total: "
1964 LOG(
INFO) <<
"\tAllow concurrent CPU thread/slot oversubscription: "
1969 <<
"\tAllow concurrent CPU result memory oversubscription: "
1973 LOG(
INFO) <<
"\tPer-query Max available resource utilization ratio: "
1977 const std::string udf_reg_policy_log_prefix{
"Runtime UDF/UDTF Registration Policy: "};
1978 switch (system_parameters.runtime_udf_registration_policy) {
1980 LOG(
INFO) << udf_reg_policy_log_prefix <<
" DISALLOWED";
1984 LOG(
INFO) << udf_reg_policy_log_prefix <<
" ALLOWED for superusers only";
1988 LOG(
INFO) << udf_reg_policy_log_prefix <<
" ALLOWED for all users";
1992 UNREACHABLE() <<
"Unrecognized option for Runtime UDF/UDTF registration policy.";
1996 boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of(
"\"'"));
1997 boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of(
"\"'"));
1998 boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of(
"\"'"));
1999 boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of(
"\"'"));
2000 boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of(
"\"'"));
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
bool g_enable_parallel_window_partition_sort
bool g_enable_left_join_filter_hoisting
double g_running_query_interrupt_freq
bool g_enable_smem_group_by
size_t g_parallel_top_max
int safe_open(const char *path, int flags, mode_t mode) noexcept
float g_filter_push_down_low_frac
const std::string kDataDirectoryName
size_t g_num_tuple_threshold_switch_to_baseline
bool g_use_query_resultset_cache
size_t g_cpu_sub_task_size
static size_t getMinimumSize()
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
bool g_allow_memory_status_log
SystemParameters::RuntimeUdfRegistrationPolicy construct_runtime_udf_registration_policy(const bool enable_runtime_udfs, const bool enable_udf_registration_for_all_users)
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
bool g_strip_join_covered_quals
bool g_enable_logs_system_tables
size_t g_gpu_code_cache_max_size_in_bytes
bool g_enable_direct_columnarization
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
const std::string kDefaultDiskCacheDirName
bool g_enable_legacy_delimited_import
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
bool g_allow_query_step_skipping
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
const std::string kDefaultLogDirName
size_t g_preflight_count_query_threshold
const std::string kSystemCatalogName
double g_bbox_intersect_target_entries_per_bin
unsigned g_cpu_threads_override
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
void addOptionalFileToBlacklist(std::string &filename)
size_t g_streaming_topn_max
bool g_enable_dynamic_watchdog
size_t g_hashtable_cache_total_bytes
unsigned g_trivial_loop_join_threshold
bool g_enable_geo_ops_on_uncompressed_coords
bool g_enable_non_kernel_time_query_interrupt
void setMaxCacheItemSize(CacheItemType item_type, size_t new_max_cache_item_size)
void fillDeveloperOptions()
double g_executor_resource_mgr_cpu_result_mem_ratio
bool g_enable_data_recycler
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
bool g_inner_join_fragment_skipping
bool g_use_chunk_metadata_cache
size_t g_max_cacheable_hashtable_size_bytes
bool g_enable_string_functions
bool g_enable_smem_non_grouped_agg
size_t g_watchdog_none_encoded_string_translation_limit
bool g_enable_executor_resource_mgr
bool g_restrict_ml_model_metadata_to_superusers
size_t g_parallel_top_min
bool g_enable_columnar_output
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
ssize_t safe_write(const int fd, const void *buffer, const size_t buffer_size) noexcept
bool g_enable_idp_temporary_users
bool g_from_table_reordering
size_t g_window_function_aggregation_tree_fanout
static void setDefaultImportPath(const std::string &base_path)
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_hashjoin_many_to_many
bool g_enable_system_tables
void init(LogOptions const &log_opts)
static HashtableRecycler * getHashTableCache()
bool g_enable_http_binary_server
static bool migrationEnabled()
size_t g_watchdog_max_projected_rows_per_device
float g_filter_push_down_high_frac
bool g_enable_distance_rangejoin
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
int64_t g_bitmap_memory_limit
size_t g_max_memory_allocation_size
size_t g_approx_quantile_buffer
bool g_allow_auto_resultset_caching
bool g_enable_dev_table_functions
void setTotalCacheSize(CacheItemType item_type, size_t new_total_cache_size)
static const std::string cluster_command_line_arg
bool g_optimize_cuda_block_and_grid_sizes
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
size_t g_query_resultset_cache_total_bytes
bool g_enable_window_functions
size_t g_max_cacheable_query_resultset_size_bytes
size_t g_min_memory_allocation_size
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
bool g_enable_seconds_refresh
bool g_enable_fsi_regex_import
void validate_base_path()
size_t g_estimator_failure_max_groupby_size
bool g_enable_bbox_intersect_hashjoin
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
bool g_enable_foreign_table_scheduled_refresh
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
const std::string kCatalogDirectoryName
bool g_enable_ml_functions
float g_fraction_code_cache_to_evict
bool g_allow_invalid_literal_buffer_reads
bool g_allow_system_dashboard_update
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
bool g_uniform_request_ids_per_thrift_call
size_t g_executor_resource_mgr_cpu_result_mem_bytes
std::string filename(char const *path)
bool g_enable_filter_push_down
bool g_use_estimator_result_cache
const std::string kDefaultLicenseFileName
bool g_enable_bump_allocator
bool g_enable_parallel_window_partition_compute
static HashtableRecycler * getHashTableCache()
RuntimeUdfRegistrationPolicy
double g_executor_resource_mgr_max_available_resource_use_ratio
bool g_enable_cpu_sub_tasks
bool g_allow_query_step_cpu_retry
int32_t ftruncate(const int32_t fd, int64_t length)
size_t g_approx_quantile_centroids
const std::string kLockfilesDirectoryName
static void addToBlacklist(const std::string &path)
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
bool g_optimize_row_initialization
static HashtableRecycler * getHashTableCache()
int safe_fcntl(int fd, int cmd, struct flock *fl) noexcept
bool g_columnar_large_projections
int safe_close(int fd) noexcept
unsigned g_dynamic_watchdog_time_limit
size_t g_columnar_large_projections_threshold
bool g_query_engine_cuda_streams
bool g_enable_thrift_logs
bool g_enable_add_metadata_columns
bool g_enable_runtime_query_interrupt
size_t g_max_import_threads
bool g_use_hashtable_cache
size_t g_auto_resultset_caching_threshold
size_t g_bbox_intersect_max_table_size_bytes
size_t g_large_ndv_multiplier
bool g_enable_table_functions
size_t g_gpu_smem_threshold