OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions Class Reference

#include <CommandLineOptions.h>

+ Collaboration diagram for CommandLineOptions:

Public Member Functions

 CommandLineOptions (char const *argv0, bool dist_v5_=false)
 
void fillOptions ()
 
void fillDeveloperOptions ()
 
std::string getNodeIds ()
 
std::vector< std::string > getNodeIdsArray ()
 
boost::optional< int > parse_command_line (int argc, char const *const *argv, const bool should_init_logging=false)
 
void validate ()
 
void validate_base_path ()
 
void init_logging ()
 

Public Attributes

int http_port = 6278
 
int http_binary_port = 6276
 
size_t reserved_gpu_mem = 384 * 1024 * 1024
 
std::string base_path
 
File_Namespace::DiskCacheConfig disk_cache_config
 
std::string cluster_file = {"cluster.conf"}
 
std::string cluster_topology_file = {"cluster_topology.conf"}
 
std::string license_path = {""}
 
std::string encryption_key_store_path = {}
 
bool verbose_logging = false
 
bool jit_debug = false
 
bool intel_jit_profile = false
 
bool allow_multifrag = true
 
bool read_only = false
 
bool allow_loop_joins = false
 
bool enable_legacy_syntax = true
 
bool log_user_origin = true
 
AuthMetadata authMetadata
 
SystemParameters system_parameters
 
bool enable_rendering = false
 
bool enable_auto_clear_render_mem = false
 
int render_oom_retry_threshold = 0
 
size_t render_mem_bytes = 1000000000
 
size_t max_concurrent_render_sessions = 500
 
bool render_compositor_use_last_gpu = true
 
bool renderer_prefer_igpu = false
 
unsigned renderer_vulkan_timeout_ms = 60000
 
bool renderer_use_parallel_executors = true
 
bool renderer_enable_slab_allocation = false
 
bool enable_watchdog = true
 
bool enable_dynamic_watchdog = false
 
size_t watchdog_none_encoded_string_translation_limit = 1000000
 
size_t watchdog_max_projected_rows_per_device
 
size_t preflight_count_query_threshold = g_preflight_count_query_threshold
 
bool enable_runtime_query_interrupt = true
 
bool enable_non_kernel_time_query_interrupt = true
 
bool use_estimator_result_cache = true
 
double running_query_interrupt_freq = 0.1
 
unsigned pending_query_interrupt_freq = 1000
 
unsigned dynamic_watchdog_time_limit = 10000
 
std::string disk_cache_level = ""
 
bool enable_data_recycler = true
 
bool use_hashtable_cache = true
 
size_t hashtable_cache_total_bytes = 4294967296
 
size_t max_cacheable_hashtable_size_bytes = 2147483648
 
bool optimize_cuda_block_and_grid_sizes = false
 
size_t num_reader_threads = 0
 
std::string db_query_file = {""}
 
bool exit_after_warmup = false
 
int idle_session_duration = kMinsPerHour
 
int max_session_duration = kMinsPerMonth
 
std::string udf_file_name = {""}
 
std::string udf_compiler_path = {""}
 
std::vector< std::string > udf_compiler_options
 
std::string allowed_import_paths {}
 
std::string allowed_export_paths {}
 
std::string compressor = std::string(BLOSC_LZ4HC_COMPNAME)
 
po::options_description help_desc_
 
po::options_description developer_desc_
 
logger::LogOptions log_options_
 
std::string exe_name
 
po::positional_options_description positional_options
 
std::vector< LeafHostInfodb_leaves
 
std::vector< LeafHostInfostring_leaves
 
po::variables_map vm
 
std::string clusterIds_arg
 
const bool dist_v5_
 

Static Public Attributes

static const std::string nodeIds_token = {"node_id"}
 
static const std::string cluster_command_line_arg {"cluster_topology"}
 

Private Attributes

bool enable_runtime_udfs = true
 
bool enable_runtime_udf = true
 
bool enable_udf_registration_for_all_users = false
 

Detailed Description

Definition at line 43 of file CommandLineOptions.h.

Constructor & Destructor Documentation

CommandLineOptions::CommandLineOptions ( char const *  argv0,
bool  dist_v5_ = false 
)
inline

Definition at line 45 of file CommandLineOptions.h.

References fillDeveloperOptions(), and fillOptions().

46  : log_options_(argv0), exe_name(argv0), dist_v5_(dist_v5_) {
47  fillOptions();
49  }
logger::LogOptions log_options_

+ Here is the call graph for this function:

Member Function Documentation

void CommandLineOptions::fillDeveloperOptions ( )

Definition at line 739 of file CommandLineOptions.cpp.

References g_allow_invalid_literal_buffer_reads, g_allow_memory_status_log, g_allow_query_step_cpu_retry, g_approx_quantile_buffer, g_approx_quantile_centroids, g_bitmap_memory_limit, g_columnar_large_projections, g_columnar_large_projections_threshold, g_cpu_sub_task_size, g_cpu_threads_override, g_enable_auto_metadata_update, g_enable_automatic_ir_metadata, g_enable_bump_allocator, g_enable_columnar_output, g_enable_cpu_sub_tasks, g_enable_dev_table_functions, g_enable_direct_columnarization, g_enable_filter_function, g_enable_foreign_table_scheduled_refresh, g_enable_geo_ops_on_uncompressed_coords, g_enable_http_binary_server, g_enable_idp_temporary_users, g_enable_lazy_fetch, g_enable_left_join_filter_hoisting, g_enable_parallel_window_partition_compute, g_enable_parallel_window_partition_sort, g_enable_seconds_refresh, g_enable_smem_group_by, g_enable_smem_grouped_non_count_agg, g_enable_smem_non_grouped_agg, g_enable_window_functions, g_estimator_failure_max_groupby_size, g_fraction_code_cache_to_evict, g_gpu_code_cache_max_size_in_bytes, g_gpu_smem_threshold, g_large_ndv_multiplier, g_large_ndv_threshold, g_max_log_length, g_max_memory_allocation_size, g_min_memory_allocation_size, g_num_tuple_threshold_switch_to_baseline, g_optimize_row_initialization, g_parallel_top_max, g_parallel_top_min, g_query_engine_cuda_streams, g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline, g_skip_intermediate_count, g_streaming_topn_max, g_strip_join_covered_quals, g_use_table_device_offset, g_vacuum_min_selectivity, and g_window_function_aggregation_tree_fanout.

Referenced by CommandLineOptions().

739  {
740  po::options_description& desc = developer_desc_;
741 
742  desc.add_options()("dev-options", "Print internal developer options.");
743  desc.add_options()(
744  "enable-calcite-view-optimize",
747  ->implicit_value(true),
748  "Enable additional calcite (query plan) optimizations when a view is part of the "
749  "query.");
750  desc.add_options()("enable-columnar-output",
751  po::value<bool>(&g_enable_columnar_output)
752  ->default_value(g_enable_columnar_output)
753  ->implicit_value(true),
754  "Enable columnar output for intermediate/final query steps.");
755  desc.add_options()("enable-left-join-filter-hoisting",
756  po::value<bool>(&g_enable_left_join_filter_hoisting)
757  ->default_value(g_enable_left_join_filter_hoisting)
758  ->implicit_value(true),
759  "Enable hoisting left hand side filters through left joins.");
760  desc.add_options()("optimize-row-init",
761  po::value<bool>(&g_optimize_row_initialization)
762  ->default_value(g_optimize_row_initialization)
763  ->implicit_value(true),
764  "Optimize row initialization.");
765  desc.add_options()("enable-legacy-syntax",
766  po::value<bool>(&enable_legacy_syntax)
767  ->default_value(enable_legacy_syntax)
768  ->implicit_value(true),
769  "Enable legacy syntax.");
770  desc.add_options()(
771  "enable-multifrag",
772  po::value<bool>(&allow_multifrag)
773  ->default_value(allow_multifrag)
774  ->implicit_value(true),
775  "Enable execution over multiple fragments in a single round-trip to GPU.");
776  desc.add_options()("enable-lazy-fetch",
777  po::value<bool>(&g_enable_lazy_fetch)
778  ->default_value(g_enable_lazy_fetch)
779  ->implicit_value(true),
780  "Enable lazy fetch columns in query results.");
781  desc.add_options()("enable-shared-mem-group-by",
782  po::value<bool>(&g_enable_smem_group_by)
783  ->default_value(g_enable_smem_group_by)
784  ->implicit_value(true),
785  "Enable using GPU shared memory for some GROUP BY queries.");
786  desc.add_options()("num-executors",
787  po::value<int>(&system_parameters.num_executors)
788  ->default_value(system_parameters.num_executors),
789  "Number of executors to run in parallel.");
790  desc.add_options()(
791  "num-tuple-threshold-switch-to-baseline",
792  po::value<size_t>(&g_num_tuple_threshold_switch_to_baseline)
794  ->implicit_value(100000),
795  "Control a threshold to switch perfect hash join to baseline hash join by "
796  "comparing a hash entry range of the join column to the input table cardinality."
797  "This condition checks the following: |INPUT_TABLE| < {THIS_THRESHOLD}"
798  "We switch hash table layout when this condition and the condition related to "
799  "\'col-range-to-num-hash-entries-threshold-switch-to-baseline\' are satisfied "
800  "together.");
801  desc.add_options()(
802  "ratio-num-hash-entry-to-num-tuple-switch-to-baseline",
805  ->implicit_value(100),
806  "Control a threshold to switch perfect hash join to baseline hash join by "
807  "comparing a hash entry range of the join column to the input table cardinality."
808  "This condition checks the following: HASH_ENTRY_RANGE / |INPUT_TABLE| < "
809  "{THIS_THRESHOLD}"
810  "We switch hash table layout when this condition and the condition related to "
811  "\'num-tuple-threshold-switch-to-baseline\' are satisfied together.");
812  desc.add_options()(
813  "gpu-shared-mem-threshold",
814  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
815  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
816  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
817  desc.add_options()(
818  "enable-shared-mem-grouped-non-count-agg",
819  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
820  ->default_value(g_enable_smem_grouped_non_count_agg)
821  ->implicit_value(true),
822  "Enable using GPU shared memory for grouped non-count aggregate queries.");
823  desc.add_options()("enable-shared-mem-non-grouped-agg",
824  po::value<bool>(&g_enable_smem_non_grouped_agg)
825  ->default_value(g_enable_smem_non_grouped_agg)
826  ->implicit_value(true),
827  "Enable using GPU shared memory for non-grouped aggregate queries.");
828  desc.add_options()("enable-direct-columnarization",
829  po::value<bool>(&g_enable_direct_columnarization)
830  ->default_value(g_enable_direct_columnarization)
831  ->implicit_value(true),
832  "Enables/disables a more optimized columnarization method "
833  "for intermediate steps in multi-step queries.");
834  desc.add_options()(
835  "offset-device-by-table-id",
836  po::value<bool>(&g_use_table_device_offset)
837  ->default_value(g_use_table_device_offset)
838  ->implicit_value(true),
839  "Enables/disables offseting the chosen device ID by the table ID for a given "
840  "fragment. This improves balance of fragments across GPUs.");
841  desc.add_options()("enable-window-functions",
842  po::value<bool>(&g_enable_window_functions)
843  ->default_value(g_enable_window_functions)
844  ->implicit_value(true),
845  "Enable window function support.");
846  desc.add_options()("enable-parallel-window-partition-compute",
849  ->implicit_value(true),
850  "Enable parallel window function partition computation.");
851  desc.add_options()("enable-parallel-window-partition-sort",
854  ->implicit_value(true),
855  "Enable parallel window function partition sorting.");
856  desc.add_options()(
857  "window-function-frame-aggregation-tree-fanout",
858  po::value<size_t>(&g_window_function_aggregation_tree_fanout)->default_value(8),
859  "A tree fanout for aggregation tree used to compute aggregation over "
860  "window frame");
861  desc.add_options()("enable-dev-table-functions",
862  po::value<bool>(&g_enable_dev_table_functions)
863  ->default_value(g_enable_dev_table_functions)
864  ->implicit_value(true),
865  "Enable dev (test or alpha) table functions. Also "
866  "requires --enable-table-functions to be turned on");
867 
868  desc.add_options()("enable-geo-ops-on-uncompressed-coords",
871  ->implicit_value(true),
872  "Enable faster geo operations on uncompressed coords");
873  desc.add_options()(
874  "jit-debug-ir",
875  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
876  "Enable runtime debugger support for the JIT. Note that this flag is "
877  "incompatible "
878  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
879  "`/tmp/mapdquery`.");
880  desc.add_options()(
881  "intel-jit-profile",
882  po::value<bool>(&intel_jit_profile)
883  ->default_value(intel_jit_profile)
884  ->implicit_value(true),
885  "Enable runtime support for the JIT code profiling using Intel VTune.");
886  desc.add_options()(
887  "enable-cpu-sub-tasks",
888  po::value<bool>(&g_enable_cpu_sub_tasks)
889  ->default_value(g_enable_cpu_sub_tasks)
890  ->implicit_value(true),
891  "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
892  "load balance and decrease reduction overhead.");
893  desc.add_options()(
894  "cpu-sub-task-size",
895  po::value<size_t>(&g_cpu_sub_task_size)->default_value(g_cpu_sub_task_size),
896  "Set CPU sub-task size in rows.");
897  desc.add_options()(
898  "cpu-threads",
899  po::value<unsigned>(&g_cpu_threads_override)->default_value(g_cpu_threads_override),
900  "Set max CPU concurrent threads. Values <= 0 will use default of 2X the number of "
901  "hardware threads.");
902  desc.add_options()(
903  "skip-intermediate-count",
904  po::value<bool>(&g_skip_intermediate_count)
905  ->default_value(g_skip_intermediate_count)
906  ->implicit_value(true),
907  "Skip pre-flight counts for intermediate projections with no filters.");
908  desc.add_options()("strip-join-covered-quals",
909  po::value<bool>(&g_strip_join_covered_quals)
910  ->default_value(g_strip_join_covered_quals)
911  ->implicit_value(true),
912  "Remove quals from the filtered count if they are covered by a "
913  "join condition (currently only ST_Contains).");
914 
915  desc.add_options()("min-cpu-slab-size",
916  po::value<size_t>(&system_parameters.min_cpu_slab_size)
917  ->default_value(system_parameters.min_cpu_slab_size),
918  "Min slab size (size of memory allocations) for CPU buffer pool.");
919  desc.add_options()(
920  "max-cpu-slab-size",
921  po::value<size_t>(&system_parameters.max_cpu_slab_size)
922  ->default_value(system_parameters.max_cpu_slab_size),
923  "Max CPU buffer pool slab size (size of memory allocations). Note if "
924  "there is not enough free memory to accomodate the target slab size, smaller "
925  "slabs will be allocated, down to the minimum size specified by "
926  "min-cpu-slab-size.");
927  desc.add_options()("min-gpu-slab-size",
928  po::value<size_t>(&system_parameters.min_gpu_slab_size)
929  ->default_value(system_parameters.min_gpu_slab_size),
930  "Min slab size (size of memory allocations) for GPU buffer pools.");
931  desc.add_options()(
932  "max-gpu-slab-size",
933  po::value<size_t>(&system_parameters.max_gpu_slab_size)
934  ->default_value(system_parameters.max_gpu_slab_size),
935  "Max GPU buffer pool slab size (size of memory allocations). Note if "
936  "there is not enough free memory to accomodate the target slab size, smaller "
937  "slabs will be allocated, down to the minimum size speified by "
938  "min-gpu-slab-size.");
939 
940  desc.add_options()(
941  "max-output-projection-allocation-bytes",
942  po::value<size_t>(&g_max_memory_allocation_size)
943  ->default_value(g_max_memory_allocation_size),
944  "Maximum allocation size for a fixed output buffer allocation for projection "
945  "queries with no pre-flight count. Default is the maximum slab size (sizes "
946  "greater "
947  "than the maximum slab size have no affect). Requires bump allocator.");
948  desc.add_options()(
949  "min-output-projection-allocation-bytes",
950  po::value<size_t>(&g_min_memory_allocation_size)
951  ->default_value(g_min_memory_allocation_size),
952  "Minimum allocation size for a fixed output buffer allocation for projection "
953  "queries with no pre-flight count. If an allocation of this size cannot be "
954  "obtained, the query will be retried with different execution parameters and/or "
955  "on "
956  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
957  desc.add_options()("enable-bump-allocator",
958  po::value<bool>(&g_enable_bump_allocator)
959  ->default_value(g_enable_bump_allocator)
960  ->implicit_value(true),
961  "Enable the bump allocator for projection queries on "
962  "GPU. The bump allocator will "
963  "allocate a fixed size buffer for each query, track the "
964  "number of rows passing the "
965  "kernel during query execution, and copy back only the "
966  "rows that passed the kernel "
967  "to CPU after execution. When disabled, pre-flight "
968  "count queries are used to size "
969  "the output buffer for projection queries.");
970  desc.add_options()(
971  "code-cache-eviction-percent",
972  po::value<float>(&g_fraction_code_cache_to_evict)
973  ->default_value(g_fraction_code_cache_to_evict),
974  "Percentage of the GPU code cache to evict if an out of memory error is "
975  "encountered while attempting to place generated code on the GPU.");
976 
977  desc.add_options()("ssl-cert",
978  po::value<std::string>(&system_parameters.ssl_cert_file)
979  ->default_value(std::string("")),
980  "SSL Validated public certficate.");
981 
982  desc.add_options()(
983  "gpu-code-cache-max-size-in-bytes",
984  po::value<size_t>(&g_gpu_code_cache_max_size_in_bytes)
985  ->default_value(g_gpu_code_cache_max_size_in_bytes),
986  "The maximum size of cached compiled codes for the gpu code cache in bytes.");
987 
988  desc.add_options()("ssl-private-key",
989  po::value<std::string>(&system_parameters.ssl_key_file)
990  ->default_value(std::string("")),
991  "SSL private key file.");
992  // Note ssl_trust_store is passed through to Calcite via system_parameters
993  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
994  desc.add_options()("ssl-trust-store",
995  po::value<std::string>(&system_parameters.ssl_trust_store)
996  ->default_value(std::string("")),
997  "SSL public CA certifcates (java trust store) to validate "
998  "TLS connections (passed through to the Calcite server).");
999 
1000  desc.add_options()(
1001  "ssl-trust-password",
1002  po::value<std::string>(&system_parameters.ssl_trust_password)
1003  ->default_value(std::string("")),
1004  "SSL password for java trust store provided via --ssl-trust-store parameter.");
1005 
1006  desc.add_options()(
1007  "ssl-trust-ca",
1008  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
1009  ->default_value(std::string("")),
1010  "SSL public CA certificates to validate TLS connection(as a client).");
1011 
1012  desc.add_options()(
1013  "ssl-trust-ca-server",
1014  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
1015  "SSL public CA certificates to validate TLS connection(as a server).");
1016 
1017  desc.add_options()("ssl-keystore",
1018  po::value<std::string>(&system_parameters.ssl_keystore)
1019  ->default_value(std::string("")),
1020  "SSL server credentials as a java key store (passed "
1021  "through to the Calcite server).");
1022 
1023  desc.add_options()("ssl-keystore-password",
1024  po::value<std::string>(&system_parameters.ssl_keystore_password)
1025  ->default_value(std::string("")),
1026  "SSL password for java keystore, provide by via --ssl-keystore.");
1027 
1028  desc.add_options()(
1029  "udf",
1030  po::value<std::string>(&udf_file_name),
1031  "Load user defined extension functions from this file at startup. The file is "
1032  "expected to be a C/C++ file with extension .cpp.");
1033 
1034  desc.add_options()("udf-compiler-path",
1035  po::value<std::string>(&udf_compiler_path),
1036  "Provide absolute path to clang++ used in udf compilation.");
1037 
1038  desc.add_options()("udf-compiler-options",
1039  po::value<std::vector<std::string>>(&udf_compiler_options),
1040  "Specify compiler options to tailor udf compilation.");
1041 
1042 #ifdef ENABLE_GEOS
1043  desc.add_options()("libgeos-so-filename",
1044  po::value<std::string>(&libgeos_so_filename),
1045  "Specify libgeos shared object filename to be used for "
1046  "geos-backed geo opertations.");
1047 #endif
1048  desc.add_options()(
1049  "large-ndv-threshold",
1050  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
1051  desc.add_options()(
1052  "large-ndv-multiplier",
1053  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
1054  desc.add_options()("approx_quantile_buffer",
1055  po::value<size_t>(&g_approx_quantile_buffer)
1056  ->default_value(g_approx_quantile_buffer));
1057  desc.add_options()("approx_quantile_centroids",
1058  po::value<size_t>(&g_approx_quantile_centroids)
1059  ->default_value(g_approx_quantile_centroids));
1060  desc.add_options()(
1061  "bitmap-memory-limit",
1062  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
1063  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
1064  "size of the group by buffer (entry count in Query Memory Descriptor) and "
1065  "multiplying it by the number of count distinct expression and the size of bitmap "
1066  "required for each. For approx_count_distinct this is typically 8192 bytes.");
1067  desc.add_options()(
1068  "enable-filter-function",
1069  po::value<bool>(&g_enable_filter_function)
1070  ->default_value(g_enable_filter_function)
1071  ->implicit_value(true),
1072  "Enable the filter function protection feature for the SQL JIT compiler. "
1073  "Normally should be on but techs might want to disable for troubleshooting.");
1074  desc.add_options()(
1075  "enable-idp-temporary-users",
1076  po::value<bool>(&g_enable_idp_temporary_users)
1077  ->default_value(g_enable_idp_temporary_users)
1078  ->implicit_value(true),
1079  "Enable temporary users for SAML and LDAP logins on read-only servers. "
1080  "Normally should be on but techs might want to disable for troubleshooting.");
1081  desc.add_options()("enable-foreign-table-scheduled-refresh",
1084  ->implicit_value(true),
1085  "Enable scheduled foreign table refresh.");
1086  desc.add_options()(
1087  "enable-seconds-refresh-interval",
1088  po::value<bool>(&g_enable_seconds_refresh)
1089  ->default_value(g_enable_seconds_refresh)
1090  ->implicit_value(true),
1091  "Enable foreign table seconds refresh interval for testing purposes.");
1092  desc.add_options()("enable-auto-metadata-update",
1093  po::value<bool>(&g_enable_auto_metadata_update)
1094  ->default_value(g_enable_auto_metadata_update)
1095  ->implicit_value(true),
1096  "Enable automatic metadata update.");
1097  desc.add_options()(
1098  "parallel-top-min",
1099  po::value<size_t>(&g_parallel_top_min)->default_value(g_parallel_top_min),
1100  "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
1101  "parallelTop() to sort.");
1102  desc.add_options()(
1103  "parallel-top-max",
1104  po::value<size_t>(&g_parallel_top_max)->default_value(g_parallel_top_max),
1105  "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
1106  "watchdog.");
1107  desc.add_options()(
1108  "streaming-top-n-max",
1109  po::value<size_t>(&g_streaming_topn_max)->default_value(g_streaming_topn_max),
1110  "The maximum number of rows allowing streaming top-N sorting.");
1111  desc.add_options()("vacuum-min-selectivity",
1112  po::value<float>(&g_vacuum_min_selectivity)
1113  ->default_value(g_vacuum_min_selectivity),
1114  "Minimum selectivity for automatic vacuuming. "
1115  "This specifies the percentage (with a value of 0 "
1116  "implying 0% and a value of 1 implying 100%) of "
1117  "deleted rows in a fragment at which to perform "
1118  "automatic vacuuming. A number greater than 1 can "
1119  "be used to disable automatic vacuuming.");
1120  desc.add_options()("enable-automatic-ir-metadata",
1121  po::value<bool>(&g_enable_automatic_ir_metadata)
1122  ->default_value(g_enable_automatic_ir_metadata)
1123  ->implicit_value(true),
1124  "Enable automatic IR metadata (debug builds only).");
1125  desc.add_options()(
1126  "max-log-length",
1127  po::value<size_t>(&g_max_log_length)->default_value(g_max_log_length),
1128  "The maximum number of characters that a log message can has. If the log message "
1129  "is longer than this, we only record \'g_max_log_message_length\' characters.");
1130  desc.add_options()(
1131  "estimator-failure-max-groupby-size",
1132  po::value<size_t>(&g_estimator_failure_max_groupby_size)
1133  ->default_value(g_estimator_failure_max_groupby_size),
1134  "Maximum size of the groupby buffer if the estimator fails. By default we use the "
1135  "number of tuples in the table up to this value.");
1136  desc.add_options()("columnar-large-projections",
1137  po::value<bool>(&g_columnar_large_projections)
1138  ->default_value(g_columnar_large_projections)
1139  ->implicit_value(true),
1140  "Prefer columnar output if projection size is >= "
1141  "threshold set by --columnar-large-projections-threshold "
1142  "(default 1,000,000 rows).");
1143  desc.add_options()(
1144  "columnar-large-projections-threshold",
1145  po::value<size_t>(&g_columnar_large_projections_threshold)
1146  ->default_value(g_columnar_large_projections_threshold),
1147  "Threshold (in minimum number of rows) to prefer columnar output for projections. "
1148  "Requires --columnar-large-projections to be set.");
1149 
1150  desc.add_options()(
1151  "allow-memory-status-log",
1152  po::value<bool>(&g_allow_memory_status_log)
1153  ->default_value(g_allow_memory_status_log),
1154  "Allow CPU (and GPU if necessary) memory status before/after the query execution.");
1155 
1156  desc.add_options()(
1157  "allow-query-step-cpu-retry",
1158  po::value<bool>(&g_allow_query_step_cpu_retry)
1159  ->default_value(g_allow_query_step_cpu_retry)
1160  ->implicit_value(true),
1161  R"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
1162  desc.add_options()("enable-http-binary-server",
1163  po::value<bool>(&g_enable_http_binary_server)
1164  ->default_value(g_enable_http_binary_server)
1165  ->implicit_value(true),
1166  "Enable binary over HTTP Thrift server");
1167 
1168  desc.add_options()("enable-query-engine-cuda-streams",
1169  po::value<bool>(&g_query_engine_cuda_streams)
1170  ->default_value(g_query_engine_cuda_streams)
1171  ->implicit_value(true),
1172  "Enable Query Engine CUDA streams");
1173 
1174  desc.add_options()(
1175  "allow-invalid-literal-buffer-reads",
1176  po::value<bool>(&g_allow_invalid_literal_buffer_reads)
1177  ->default_value(g_allow_invalid_literal_buffer_reads)
1178  ->implicit_value(true),
1179  "For backwards compatibility. Enabling may cause invalid query results.");
1180 
1181 #ifdef HAVE_TORCH_TFS
1182  desc.add_options()("torch-lib-path",
1183  po::value<std::string>(&torch_lib_path),
1184  "Absolute path to custom LibTorch shared library location to be "
1185  "loaded at runtime. (If not provided, the library will be searched "
1186  "for in the system's default library path.)");
1187 #endif
1188 }
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
bool g_enable_parallel_window_partition_sort
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:103
bool g_enable_smem_group_by
size_t g_parallel_top_max
Definition: ResultSet.cpp:50
size_t g_num_tuple_threshold_switch_to_baseline
Definition: Execute.cpp:106
size_t g_cpu_sub_task_size
Definition: Execute.cpp:86
bool g_allow_memory_status_log
Definition: Execute.cpp:123
bool g_strip_join_covered_quals
Definition: Execute.cpp:112
size_t g_gpu_code_cache_max_size_in_bytes
Definition: QueryEngine.h:12
bool g_enable_direct_columnarization
Definition: Execute.cpp:130
bool g_enable_lazy_fetch
Definition: Execute.cpp:132
std::string udf_compiler_path
bool g_skip_intermediate_count
bool enable_calcite_view_optimize
unsigned g_cpu_threads_override
bool g_enable_auto_metadata_update
size_t g_streaming_topn_max
Definition: ResultSet.cpp:51
bool g_enable_geo_ops_on_uncompressed_coords
Definition: Execute.cpp:121
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:146
size_t g_parallel_top_min
Definition: ResultSet.cpp:49
std::string ssl_trust_ca_file
bool g_enable_columnar_output
Definition: Execute.cpp:102
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
Definition: Execute.cpp:107
std::string ssl_trust_store
bool g_enable_idp_temporary_users
Definition: SysCatalog.cpp:63
size_t g_window_function_aggregation_tree_fanout
bool g_enable_http_binary_server
int64_t g_bitmap_memory_limit
size_t g_max_memory_allocation_size
Definition: Execute.cpp:124
size_t g_approx_quantile_buffer
Definition: Execute.cpp:167
size_t g_max_log_length
Definition: Execute.cpp:172
bool g_enable_dev_table_functions
Definition: Execute.cpp:120
std::string ca_file_name
Definition: AuthMetadata.h:31
std::string ssl_key_file
AuthMetadata authMetadata
bool g_enable_window_functions
Definition: Execute.cpp:116
size_t g_min_memory_allocation_size
Definition: Execute.cpp:125
bool g_enable_seconds_refresh
size_t g_estimator_failure_max_groupby_size
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:143
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:170
std::vector< std::string > udf_compiler_options
bool g_enable_foreign_table_scheduled_refresh
float g_vacuum_min_selectivity
bool g_enable_filter_function
Definition: Execute.cpp:87
float g_fraction_code_cache_to_evict
bool g_allow_invalid_literal_buffer_reads
Definition: ConstantIR.cpp:140
std::string ssl_keystore_password
std::string ssl_trust_password
bool g_enable_bump_allocator
Definition: Execute.cpp:128
bool g_enable_parallel_window_partition_compute
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:85
std::string ssl_keystore
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:90
size_t g_approx_quantile_centroids
Definition: Execute.cpp:168
bool g_optimize_row_initialization
Definition: Execute.cpp:104
bool g_columnar_large_projections
size_t g_columnar_large_projections_threshold
bool g_query_engine_cuda_streams
Definition: QueryEngine.h:10
po::options_description developer_desc_
size_t g_large_ndv_multiplier
SystemParameters system_parameters
size_t g_gpu_smem_threshold
Definition: Execute.cpp:138
std::string ssl_cert_file

+ Here is the caller graph for this function:

void CommandLineOptions::fillOptions ( )

Definition at line 99 of file CommandLineOptions.cpp.

References g_allow_auto_resultset_caching, g_allow_cpu_retry, g_allow_query_step_skipping, g_auto_resultset_caching_threshold, g_bbox_intersect_max_table_size_bytes, g_bbox_intersect_target_entries_per_bin, g_bigint_count, g_cache_string_hash, g_enable_add_metadata_columns, g_enable_bbox_intersect_hashjoin, g_enable_debug_timer, g_enable_distance_rangejoin, g_enable_executor_resource_mgr, g_enable_filter_push_down, g_enable_fsi, g_enable_fsi_regex_import, g_enable_hashjoin_many_to_many, g_enable_interop, g_enable_legacy_delimited_import, g_enable_logs_system_tables, g_enable_ml_functions, g_enable_string_functions, g_enable_stringdict_parallel, g_enable_system_tables, g_enable_table_functions, g_enable_thrift_logs, g_enable_union, g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency, g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency, g_executor_resource_mgr_cpu_result_mem_bytes, g_executor_resource_mgr_cpu_result_mem_ratio, g_executor_resource_mgr_max_available_resource_use_ratio, g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio, g_executor_resource_mgr_per_query_max_cpu_slots_ratio, g_filter_push_down_high_frac, g_filter_push_down_low_frac, g_filter_push_down_passing_row_ubound, g_from_table_reordering, g_hll_precision_bits, g_inner_join_fragment_skipping, Catalog_Namespace::g_log_user_id, g_logs_system_tables_max_files_count, g_max_cacheable_query_resultset_size_bytes, g_max_import_threads, g_null_div_by_zero, g_pmem_path, g_pmem_size, g_query_resultset_cache_total_bytes, g_restrict_ml_model_metadata_to_superusers, g_trivial_loop_join_threshold, g_uniform_request_ids_per_thrift_call, g_use_chunk_metadata_cache, g_use_query_resultset_cache, and run_benchmark_import::required.

Referenced by CommandLineOptions().

99  {
100  po::options_description& desc = help_desc_;
101 
102  desc.add_options()("help,h", "Show available options.");
103  desc.add_options()(
104  "allow-cpu-retry",
105  po::value<bool>(&g_allow_cpu_retry)
106  ->default_value(g_allow_cpu_retry)
107  ->implicit_value(true),
108  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
109  desc.add_options()("allow-loop-joins",
110  po::value<bool>(&allow_loop_joins)
111  ->default_value(allow_loop_joins)
112  ->implicit_value(true),
113  "Enable loop joins.");
114  desc.add_options()("bigint-count",
115  po::value<bool>(&g_bigint_count)
116  ->default_value(g_bigint_count)
117  ->implicit_value(true),
118  "Use 64-bit count.");
119 
120  desc.add_options()(
121  "enable-executor-resource-mgr",
122  po::value<bool>(&g_enable_executor_resource_mgr)
123  ->default_value(g_enable_executor_resource_mgr)
124  ->implicit_value(true),
125  "Enable executor resource manager to track execution resources and selectively "
126  "gate concurrency based on resource availability.");
127 
128  // Note we allow executor-cpu-result-mem-ratio to have values > 0 to allow
129  // oversubscription of memory when warranted, but user should be careful with this as
130  // too high a value can cause OOM errors.
131  desc.add_options()(
132  "executor-cpu-result-mem-ratio",
135  "Set executor resource manager reserved memory for query result sets as a ratio "
136  "greater than 0, representing the fraction of the system memory not allocated for "
137  "the CPU buffer pool. Values of 1.0 are permitted to allow oversubscription when "
138  "warranted, but too high a value can cause out-of-memory errors. Requires "
139  "--executor-resource-mgr to be set");
140 
141  desc.add_options()(
142  "executor-cpu-result-mem-bytes",
145  "Set executor resource manager reserved memory for query result sets in bytes, "
146  "this overrides the default reservation of 80% the size of the system memory that "
147  "is not allocated for the CPU buffer pool. Use 0 for auto. Requires "
148  "--enable-executor-resource-mgr to be set.");
149 
150  // Note we allow executor-per-query-max-cpu-threads-ratio to have values > 1 to allow
151  // oversubscription of threads when warranted, given we may be overly pessimistic about
152  // kernel core occupation for some classes of queries. Care should be taken however with
153  // setting this value too high as thrashing and thread starvation can result.
154  desc.add_options()(
155  "executor-per-query-max-cpu-threads-ratio",
158  "Set max fraction of executor resource manager total CPU slots/threads that can be "
159  "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
160 
161  // Note we allow executor-per-query-max-cpu-result-mem-ratio to have values > 0 to allow
162  // oversubscription of memory when warranted, but user should be careful with this as
163  // too high a value can cause OOM errors.
164  desc.add_options()(
165  "executor-per-query-max-cpu-result-mem-ratio",
168  "Set max fraction of executor resource manager total CPU result memory reservation "
169  "that can be "
170  "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
171 
172  desc.add_options()(
173  "allow-cpu-kernel-concurrency",
176  ->implicit_value(true),
177  "Allow for multiple queries to run execution kernels concurrently on CPU. Requires "
178  "--enable-executor-resource-mgr to be set.");
179 
180  desc.add_options()(
181  "allow-cpu-gpu-kernel-concurrency",
184  ->implicit_value(true),
185  "Allow multiple queries to run execution kernels concurrently on CPU while a "
186  "GPU query is executing. Requires --enable-executor-resource-mgr to be set.");
187 
188  // Below controls whether multiple concurrent queries in conjunction can oversubscribe
189  // CPU slots/threads Single query CPU slot oversubscription should be controlled with
190  // --executor-per-query-max-cpu-threads-ratio (i.e. by setting it to > 1.0)
191 
192  desc.add_options()(
193  "allow-cpu-thread-oversubscription-concurrency",
194  po::value<bool>(
196  ->default_value(
198  ->implicit_value(true),
199  "Allow for concurrent query kernel execution even if it results in "
200  "oversubscription of CPU threads. Caution should be used when turning this on as "
201  "it can lead to thread exhaustion. Requires --enable-executor-resource-mgr to be "
202  "set.");
203 
204  // Below controls whether multiple concurrent queries in conjunction can oversubscribe
205  // CPU result memory. Single query CPU result memory oversubscription should be
206  // controlled with
207  // --executor-per-query-cpu-result-mem-ratio (i.e. by setting it to > 1.0)
208 
209  desc.add_options()(
210  "allow-cpu-result-mem-oversubscription-concurrency",
211  po::value<bool>(
213  ->default_value(
215  ->implicit_value(true),
216  "Allow for concurrent query kernel execution even if it results in "
217  "oversubscription of CPU memory. Caution should be used when turning this on as it "
218  "can lead to out-of-memory errors. Requires --enable-executor-resource-mgr to be "
219  "set.");
220 
221  desc.add_options()(
222  "executor-max-available-resource-use-ratio",
225  "Set max proportion (0 < ratio <= 1.0) of available resources that should be "
226  "granted to a query. Requires --executor-resource-mgr to be set");
227 
228  desc.add_options()("calcite-max-mem",
229  po::value<size_t>(&system_parameters.calcite_max_mem)
230  ->default_value(system_parameters.calcite_max_mem),
231  "Max memory available to calcite JVM.");
232  if (!dist_v5_) {
233  desc.add_options()("calcite-port",
234  po::value<int>(&system_parameters.calcite_port)
235  ->default_value(system_parameters.calcite_port),
236  "Calcite port number.");
237  }
238  desc.add_options()("config",
239  po::value<std::string>(&system_parameters.config_file),
240  "Path to server configuration file.");
241  desc.add_options()("cpu-buffer-mem-bytes",
242  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
243  ->default_value(system_parameters.cpu_buffer_mem_bytes),
244  "Size of memory reserved for CPU buffers, in bytes.");
245 
246  desc.add_options()("cpu-only",
247  po::value<bool>(&system_parameters.cpu_only)
248  ->default_value(system_parameters.cpu_only)
249  ->implicit_value(true),
250  "Run on CPU only, even if GPUs are available.");
251  desc.add_options()("cuda-block-size",
252  po::value<size_t>(&system_parameters.cuda_block_size)
253  ->default_value(system_parameters.cuda_block_size),
254  "Size of block to use on NVIDIA GPU.");
255  desc.add_options()("cuda-grid-size",
256  po::value<size_t>(&system_parameters.cuda_grid_size)
257  ->default_value(system_parameters.cuda_grid_size),
258  "Size of grid to use on NVIDIA GPU.");
259  desc.add_options()("optimize-cuda-block-and-grid-sizes",
260  po::value<bool>(&optimize_cuda_block_and_grid_sizes)
261  ->default_value(false)
262  ->implicit_value(true));
263 
264  if (!dist_v5_) {
265  desc.add_options()(
266  "data",
267  po::value<std::string>(&base_path)->required()->default_value("storage"),
268  "Directory path to HeavyDB data storage (catalogs, raw data, log files, etc).");
269  positional_options.add("data", 1);
270  }
271  desc.add_options()("db-query-list",
272  po::value<std::string>(&db_query_file),
273  "Path to file containing HeavyDB warmup queries.");
274  desc.add_options()(
275  "exit-after-warmup",
276  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
277  "Exit after HeavyDB warmup queries.");
278  desc.add_options()("dynamic-watchdog-time-limit",
279  po::value<unsigned>(&dynamic_watchdog_time_limit)
280  ->default_value(dynamic_watchdog_time_limit)
281  ->implicit_value(10000),
282  "Dynamic watchdog time limit, in milliseconds.");
283  desc.add_options()("enable-data-recycler",
284  po::value<bool>(&enable_data_recycler)
285  ->default_value(enable_data_recycler)
286  ->implicit_value(true),
287  "Use data recycler.");
288  desc.add_options()("use-hashtable-cache",
289  po::value<bool>(&use_hashtable_cache)
290  ->default_value(use_hashtable_cache)
291  ->implicit_value(true),
292  "Use hashtable cache.");
293  desc.add_options()("use-query-resultset-cache",
294  po::value<bool>(&g_use_query_resultset_cache)
295  ->default_value(g_use_query_resultset_cache)
296  ->implicit_value(true),
297  "Use query resultset cache.");
298  desc.add_options()("use-chunk-metadata-cache",
299  po::value<bool>(&g_use_chunk_metadata_cache)
300  ->default_value(g_use_chunk_metadata_cache)
301  ->implicit_value(true),
302  "Use chunk metadata cache.");
303  desc.add_options()(
304  "hashtable-cache-total-bytes",
305  po::value<size_t>(&hashtable_cache_total_bytes)
306  ->default_value(hashtable_cache_total_bytes)
307  ->implicit_value(4294967296),
308  "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
309  desc.add_options()("max-cacheable-hashtable-size-bytes",
310  po::value<size_t>(&max_cacheable_hashtable_size_bytes)
311  ->default_value(max_cacheable_hashtable_size_bytes)
312  ->implicit_value(2147483648),
313  "The maximum size of hashtable that is available to cache, in "
314  "bytes (default: 2GB).");
315  desc.add_options()(
316  "query-resultset-cache-total-bytes",
317  po::value<size_t>(&g_query_resultset_cache_total_bytes)
318  ->default_value(g_query_resultset_cache_total_bytes),
319  "Size of total memory space for query resultset cache, in bytes (default: 4GB).");
320  desc.add_options()("max-query-resultset-size-bytes",
323  "The maximum size of query resultset that is available to cache, in "
324  "bytes (default: 2GB).");
325  desc.add_options()("allow-auto-query-resultset-caching",
326  po::value<bool>(&g_allow_auto_resultset_caching)
327  ->default_value(g_allow_auto_resultset_caching)
328  ->implicit_value(true),
329  "Allow automatic query resultset caching when the size of "
330  "query resultset is smaller or equal to the threshold defined "
331  "by `auto-resultset-caching-threshold-bytes`, in bytes (to "
332  "enable this, query resultset recycler "
333  "should be enabled, default: 1048576 bytes (or 1MB)).");
334  desc.add_options()(
335  "auto-resultset-caching-threshold-bytes",
336  po::value<size_t>(&g_auto_resultset_caching_threshold)
337  ->default_value(g_auto_resultset_caching_threshold),
338  "A threshold that allows caching query resultset automatically if the size of "
339  "resultset is less than it, in bytes (default: 1MB).");
340  desc.add_options()("allow-query-step-skipping",
341  po::value<bool>(&g_allow_query_step_skipping)
342  ->default_value(g_allow_query_step_skipping)
343  ->implicit_value(true),
344  "Allow query step skipping when multi-step query has at least "
345  "one cached query resultset.");
346  desc.add_options()("enable-debug-timer",
347  po::value<bool>(&g_enable_debug_timer)
348  ->default_value(g_enable_debug_timer)
349  ->implicit_value(true),
350  "Enable debug timer logging.");
351  desc.add_options()("enable-dynamic-watchdog",
352  po::value<bool>(&enable_dynamic_watchdog)
353  ->default_value(enable_dynamic_watchdog)
354  ->implicit_value(true),
355  "Enable dynamic watchdog.");
356  desc.add_options()("enable-filter-push-down",
357  po::value<bool>(&g_enable_filter_push_down)
358  ->default_value(g_enable_filter_push_down)
359  ->implicit_value(true),
360  "Enable filter push down through joins.");
361  desc.add_options()(
362  "enable-bbox-intersect-hashjoin",
363  po::value<bool>(&g_enable_bbox_intersect_hashjoin)
364  ->default_value(g_enable_bbox_intersect_hashjoin)
365  ->implicit_value(true),
366  "Enable the bounding box intersect hash join framework to enable post-filtering of "
367  "pairs of geometries before actually comptuing geometry function.");
368  desc.add_options()("enable-hashjoin-many-to-many",
369  po::value<bool>(&g_enable_hashjoin_many_to_many)
370  ->default_value(g_enable_hashjoin_many_to_many)
371  ->implicit_value(true),
372  "Enable the bounding box intersect hash join framework to more "
373  "spatial join operators for pairs of geometry types corresponding "
374  "to many-to-many relationship.");
375  desc.add_options()("enable-distance-rangejoin",
376  po::value<bool>(&g_enable_distance_rangejoin)
377  ->default_value(g_enable_distance_rangejoin)
378  ->implicit_value(true),
379  "Enable accelerating point distance joins with a hash table. "
380  "This rewrites ST_Distance when using an upperbound (<= X).");
381  desc.add_options()("enable-runtime-query-interrupt",
382  po::value<bool>(&enable_runtime_query_interrupt)
383  ->default_value(enable_runtime_query_interrupt)
384  ->implicit_value(true),
385  "Enable runtime query interrupt.");
386  desc.add_options()("enable-non-kernel-time-query-interrupt",
389  ->implicit_value(true),
390  "Enable non-kernel time query interrupt.");
391  desc.add_options()("pending-query-interrupt-freq",
392  po::value<unsigned>(&pending_query_interrupt_freq)
393  ->default_value(pending_query_interrupt_freq)
394  ->implicit_value(1000),
395  "A frequency of checking the request of pending query "
396  "interrupt from user (in millisecond).");
397  desc.add_options()("running-query-interrupt-freq",
398  po::value<double>(&running_query_interrupt_freq)
399  ->default_value(running_query_interrupt_freq)
400  ->implicit_value(0.5),
401  "A frequency of checking the request of running query "
402  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
403  desc.add_options()("use-estimator-result-cache",
404  po::value<bool>(&use_estimator_result_cache)
405  ->default_value(use_estimator_result_cache)
406  ->implicit_value(true),
407  "Use estimator result cache.");
408  if (!dist_v5_) {
409  desc.add_options()(
410  "enable-string-dict-hash-cache",
411  po::value<bool>(&g_cache_string_hash)
412  ->default_value(g_cache_string_hash)
413  ->implicit_value(true),
414  "Cache string hash values in the string dictionary server during import.");
415  }
416  desc.add_options()("enable-thrift-logs",
417  po::value<bool>(&g_enable_thrift_logs)
418  ->default_value(g_enable_thrift_logs)
419  ->implicit_value(true),
420  "Enable writing messages directly from thrift to stdout/stderr.");
421  desc.add_options()("enable-watchdog",
422  po::value<bool>(&enable_watchdog)
423  ->default_value(enable_watchdog)
424  ->implicit_value(true),
425  "Enable watchdog.");
426  desc.add_options()("watchdog-max-projected-rows-per-device",
427  po::value<size_t>(&watchdog_max_projected_rows_per_device)
429  "Max number of rows allowed to be projected when running a query "
430  "with watchdog enabled.");
431  desc.add_options()(
432  "preflight-count-query-threshold",
433  po::value<size_t>(&preflight_count_query_threshold)
434  ->default_value(preflight_count_query_threshold),
435  "Threshold to run pre-flight count query which computes # output rows accurately.");
436  desc.add_options()("watchdog-none-encoded-string-translation-limit",
439  "Max number of none-encoded strings allowed to be translated "
440  "to dictionary-encoded with watchdog enabled");
441  desc.add_options()("filter-push-down-low-frac",
442  po::value<float>(&g_filter_push_down_low_frac)
443  ->default_value(g_filter_push_down_low_frac)
444  ->implicit_value(g_filter_push_down_low_frac),
445  "Lower threshold for selectivity of filters that are pushed down.");
446  desc.add_options()("filter-push-down-high-frac",
447  po::value<float>(&g_filter_push_down_high_frac)
448  ->default_value(g_filter_push_down_high_frac)
449  ->implicit_value(g_filter_push_down_high_frac),
450  "Higher threshold for selectivity of filters that are pushed down.");
451  desc.add_options()("filter-push-down-passing-row-ubound",
452  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
454  ->implicit_value(g_filter_push_down_passing_row_ubound),
455  "Upperbound on the number of rows that should pass the filter "
456  "if the selectivity is less than "
457  "the high fraction threshold.");
458  desc.add_options()("from-table-reordering",
459  po::value<bool>(&g_from_table_reordering)
460  ->default_value(g_from_table_reordering)
461  ->implicit_value(true),
462  "Enable automatic table reordering in FROM clause.");
463  desc.add_options()("gpu-buffer-mem-bytes",
464  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
465  ->default_value(system_parameters.gpu_buffer_mem_bytes),
466  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
467  desc.add_options()("gpu-input-mem-limit",
468  po::value<double>(&system_parameters.gpu_input_mem_limit)
469  ->default_value(system_parameters.gpu_input_mem_limit),
470  "Force query to CPU when input data memory usage exceeds this "
471  "percentage of available GPU memory.");
472  desc.add_options()(
473  "hll-precision-bits",
474  po::value<int>(&g_hll_precision_bits)
475  ->default_value(g_hll_precision_bits)
476  ->implicit_value(g_hll_precision_bits),
477  "Number of bits used from the hash value used to specify the bucket number.");
478  if (!dist_v5_) {
479  desc.add_options()("http-port",
480  po::value<int>(&http_port)->default_value(http_port),
481  "HTTP port number.");
482  desc.add_options()("http-binary-port",
483  po::value<int>(&http_binary_port)->default_value(http_binary_port),
484  "HTTP binary port number.");
485  }
486  desc.add_options()(
487  "idle-session-duration",
488  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
489  "Maximum duration of idle session.");
490  desc.add_options()("inner-join-fragment-skipping",
491  po::value<bool>(&g_inner_join_fragment_skipping)
492  ->default_value(g_inner_join_fragment_skipping)
493  ->implicit_value(true),
494  "Enable/disable inner join fragment skipping. This feature is "
495  "considered stable and is enabled by default. This "
496  "parameter will be removed in a future release.");
497  desc.add_options()(
498  "max-session-duration",
499  po::value<int>(&max_session_duration)->default_value(max_session_duration),
500  "Maximum duration of active session.");
501  desc.add_options()("num-sessions",
502  po::value<int>(&system_parameters.num_sessions)
503  ->default_value(system_parameters.num_sessions),
504  "Maximum number of active session.");
505  desc.add_options()("null-div-by-zero",
506  po::value<bool>(&g_null_div_by_zero)
507  ->default_value(g_null_div_by_zero)
508  ->implicit_value(true),
509  "Return null on division by zero instead of throwing an exception.");
510  desc.add_options()(
511  "num-reader-threads",
512  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
513  "Number of reader threads to use.");
514  desc.add_options()(
515  "max-import-threads",
516  po::value<size_t>(&g_max_import_threads)->default_value(g_max_import_threads),
517  "Max number of default import threads to use (num hardware threads will be used "
518  "instead if lower). Can be overriden with copy statement threads option).");
519  desc.add_options()(
520  "bbox-intersect-max-table-size-bytes",
521  po::value<size_t>(&g_bbox_intersect_max_table_size_bytes)
522  ->default_value(g_bbox_intersect_max_table_size_bytes),
523  "The maximum size in bytes of the hash table for bounding box intersect.");
524  desc.add_options()("bbox-intersect-target-entries-per-bin",
525  po::value<double>(&g_bbox_intersect_target_entries_per_bin)
527  "The target number of entries per bin for bounding box intersect");
528  if (!dist_v5_) {
529  desc.add_options()("port,p",
530  po::value<int>(&system_parameters.omnisci_server_port)
531  ->default_value(system_parameters.omnisci_server_port),
532  "TCP Port number.");
533  }
534  desc.add_options()("num-gpus",
535  po::value<int>(&system_parameters.num_gpus)
536  ->default_value(system_parameters.num_gpus),
537  "Number of gpus to use.");
538  desc.add_options()(
539  "read-only",
540  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
541  "Enable read-only mode.");
542 
543  desc.add_options()(
544  "res-gpu-mem",
545  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
546  "Reduces GPU memory available to the HeavyDB allocator by this amount. Used for "
547  "compiled code cache and ancillary GPU functions and other processes that may also "
548  "be using the GPU concurrent with HeavyDB.");
549 
550  desc.add_options()("start-gpu",
551  po::value<int>(&system_parameters.start_gpu)
552  ->default_value(system_parameters.start_gpu),
553  "First gpu to use.");
554  desc.add_options()("trivial-loop-join-threshold",
555  po::value<unsigned>(&g_trivial_loop_join_threshold)
556  ->default_value(g_trivial_loop_join_threshold)
557  ->implicit_value(1000),
558  "The maximum number of rows in the inner table of a loop join "
559  "considered to be trivially small.");
560  desc.add_options()(
561  "uniform-request-ids-per-thrift-call",
562  po::value<bool>(&g_uniform_request_ids_per_thrift_call)
564  ->implicit_value(true),
565  "If true (default) then assign the same request_id to thrift calls that were "
566  "initiated by the same external thrift call. If false then assign different "
567  "request_ids and log the parent/child relationships.");
568  desc.add_options()("verbose",
569  po::value<bool>(&verbose_logging)
570  ->default_value(verbose_logging)
571  ->implicit_value(true),
572  "Write additional debug log messages to server logs.");
573  desc.add_options()(
574  "enable-runtime-udf",
575  po::value<bool>(&enable_runtime_udf)
576  ->default_value(enable_runtime_udf)
577  ->implicit_value(true),
578  "DEPRECATED. Please use `enable-runtime-udfs` instead as this flag will be removed "
579  "in the near future.");
580  desc.add_options()(
581  "enable-runtime-udfs",
582  po::value<bool>(&enable_runtime_udfs)
583  ->default_value(enable_runtime_udfs)
584  ->implicit_value(true),
585  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
586  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
587  "Compiler server, packaged separately.");
588  desc.add_options()("enable-udf-registration-for-all-users",
589  po::value<bool>(&enable_udf_registration_for_all_users)
591  ->implicit_value(true),
592  "Allow all users, not just superusers, to register runtime "
593  "UDFs/UDTFs. Option only valid if "
594  "`--enable-runtime-udfs` is set to true.");
595  desc.add_options()("version,v", "Print Version Number.");
596  desc.add_options()("enable-string-functions",
597  po::value<bool>(&g_enable_string_functions)
598  ->default_value(g_enable_string_functions)
599  ->implicit_value(true),
600  "Enable experimental string functions.");
601  desc.add_options()("enable-experimental-string-functions",
602  po::value<bool>(&g_enable_string_functions)
603  ->default_value(g_enable_string_functions)
604  ->implicit_value(true),
605  "DEPRECATED. String functions are now enabled by default, "
606  "but can still be controlled with --enable-string-functions.");
607  desc.add_options()(
608  "enable-fsi",
609  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
610  "Enable foreign storage interface.");
611 
612  desc.add_options()("enable-legacy-delimited-import",
613  po::value<bool>(&g_enable_legacy_delimited_import)
614  ->default_value(g_enable_legacy_delimited_import)
615  ->implicit_value(true),
616  "Use legacy importer for delimited sources.");
617 #ifdef ENABLE_IMPORT_PARQUET
618  desc.add_options()("enable-legacy-parquet-import",
619  po::value<bool>(&g_enable_legacy_parquet_import)
620  ->default_value(g_enable_legacy_parquet_import)
621  ->implicit_value(true),
622  "Use legacy importer for parquet sources.");
623 #endif
624  desc.add_options()("enable-fsi-regex-import",
625  po::value<bool>(&g_enable_fsi_regex_import)
626  ->default_value(g_enable_fsi_regex_import)
627  ->implicit_value(true),
628  "Use FSI importer for regex parsed sources.");
629 
630  desc.add_options()("enable-add-metadata-columns",
631  po::value<bool>(&g_enable_add_metadata_columns)
632  ->default_value(g_enable_add_metadata_columns)
633  ->implicit_value(true),
634  "Enable add_metadata_columns COPY FROM WITH option (Beta).");
635 
636  desc.add_options()("disk-cache-path",
637  po::value<std::string>(&disk_cache_config.path),
638  "Specify the path for the disk cache.");
639 
640  desc.add_options()(
641  "disk-cache-level",
642  po::value<std::string>(&(disk_cache_level))->default_value("foreign_tables"),
643  "Specify level of disk cache. Valid options are 'foreign_tables', "
644  "'local_tables', 'none', and 'all'.");
645 
646  desc.add_options()("disk-cache-size",
647  po::value<size_t>(&(disk_cache_config.size_limit)),
648  "Specify a maximum size for the disk cache in bytes.");
649 
650  desc.add_options()(
651  "enable-interoperability",
652  po::value<bool>(&g_enable_interop)
653  ->default_value(g_enable_interop)
654  ->implicit_value(true),
655  "Enable offloading of query portions to an external execution engine.");
656  desc.add_options()("enable-union",
657  po::value<bool>(&g_enable_union)
658  ->default_value(g_enable_union)
659  ->implicit_value(true),
660  "DEPRECATED. UNION ALL is enabled by default. Please remove "
661  "use of this option, as it may be disabled in the future.");
662  desc.add_options()(
663  "calcite-service-timeout",
664  po::value<size_t>(&system_parameters.calcite_timeout)
665  ->default_value(system_parameters.calcite_timeout),
666  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
667  "schema changes or when running large numbers of parallel queries.");
668  desc.add_options()("calcite-service-keepalive",
669  po::value<size_t>(&system_parameters.calcite_keepalive)
670  ->default_value(system_parameters.calcite_keepalive)
671  ->implicit_value(true),
672  "Enable keepalive on Calcite connections.");
673  desc.add_options()(
674  "stringdict-parallelizm",
675  po::value<bool>(&g_enable_stringdict_parallel)
676  ->default_value(g_enable_stringdict_parallel)
677  ->implicit_value(true),
678  "Allow StringDictionary to parallelize loads using multiple threads");
679  desc.add_options()("log-user-id",
680  po::value<bool>(&Catalog_Namespace::g_log_user_id)
681  ->default_value(Catalog_Namespace::g_log_user_id)
682  ->implicit_value(true),
683  "Log userId integer in place of the userName (when available).");
684  desc.add_options()("log-user-origin",
685  po::value<bool>(&log_user_origin)
686  ->default_value(log_user_origin)
687  ->implicit_value(true),
688  "Lookup the origin of inbound connections by IP address/DNS "
689  "name, and print this information as part of stdlog.");
690  desc.add_options()("allowed-import-paths",
691  po::value<std::string>(&allowed_import_paths),
692  "List of allowed root paths that can be used in import operations.");
693  desc.add_options()("allowed-export-paths",
694  po::value<std::string>(&allowed_export_paths),
695  "List of allowed root paths that can be used in export operations.");
696  desc.add_options()("enable-system-tables",
697  po::value<bool>(&g_enable_system_tables)
698  ->default_value(g_enable_system_tables)
699  ->implicit_value(true),
700  "Enable use of system tables.");
701  desc.add_options()("enable-table-functions",
702  po::value<bool>(&g_enable_table_functions)
703  ->default_value(g_enable_table_functions)
704  ->implicit_value(true),
705  "Enable system table functions support.");
706  desc.add_options()("enable-ml-functions",
707  po::value<bool>(&g_enable_ml_functions)
708  ->default_value(g_enable_ml_functions)
709  ->implicit_value(true),
710  "Enable ML support.");
711  desc.add_options()("restrict-ml-model-metadata-to-superusers",
714  ->implicit_value(true),
715  "RESTRICT SHOW MODEL and SHOW MODEL DETAILS to superusers only.");
716  desc.add_options()("enable-logs-system-tables",
717  po::value<bool>(&g_enable_logs_system_tables)
718  ->default_value(g_enable_logs_system_tables)
719  ->implicit_value(true),
720  "Enable use of logs system tables.");
721  desc.add_options()(
722  "logs-system-tables-max-files-count",
723  po::value<size_t>(&g_logs_system_tables_max_files_count)
724  ->default_value(g_logs_system_tables_max_files_count),
725  "Maximum number of log files that will be processed by each logs system table.");
726 #ifdef ENABLE_MEMKIND
727  desc.add_options()("enable-tiered-cpu-mem",
728  po::value<bool>(&g_enable_tiered_cpu_mem)
729  ->default_value(g_enable_tiered_cpu_mem)
730  ->implicit_value(true),
731  "Enable additional tiers of CPU memory (PMEM, etc...)");
732  desc.add_options()("pmem-size", po::value<size_t>(&g_pmem_size)->default_value(0));
733  desc.add_options()("pmem-path", po::value<std::string>(&g_pmem_path));
734 #endif
735 
736  desc.add(log_options_.get_options());
737 }
size_t g_pmem_size
float g_filter_push_down_low_frac
Definition: Execute.cpp:99
bool g_use_query_resultset_cache
Definition: Execute.cpp:156
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:178
logger::LogOptions log_options_
bool g_enable_logs_system_tables
Definition: Catalog.cpp:100
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:83
bool g_allow_query_step_skipping
Definition: Execute.cpp:159
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
Definition: Logger.cpp:17
size_t preflight_count_query_threshold
double g_bbox_intersect_target_entries_per_bin
Definition: Execute.cpp:111
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:101
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:92
bool enable_non_kernel_time_query_interrupt
int g_hll_precision_bits
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:176
po::options_description help_desc_
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:184
std::string config_file
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:94
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:157
bool g_enable_string_functions
bool g_null_div_by_zero
Definition: Execute.cpp:91
bool g_enable_interop
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:174
bool g_restrict_ml_model_metadata_to_superusers
Definition: Execute.cpp:119
bool g_from_table_reordering
Definition: Execute.cpp:93
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:109
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
std::string g_pmem_path
float g_filter_push_down_high_frac
Definition: Execute.cpp:100
bool g_enable_distance_rangejoin
Definition: Execute.cpp:108
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:190
bool g_bigint_count
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:158
std::string allowed_export_paths
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:187
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:162
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:163
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:183
unsigned pending_query_interrupt_freq
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:87
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:105
bool g_cache_string_hash
bool g_enable_ml_functions
Definition: Execute.cpp:118
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:179
bool g_uniform_request_ids_per_thrift_call
Definition: DBHandler.cpp:125
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:177
bool g_enable_filter_push_down
Definition: Execute.cpp:98
boost::program_options::options_description const & get_options() const
po::positional_options_description positional_options
size_t watchdog_max_projected_rows_per_device
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:191
bool g_enable_union
bool g_allow_cpu_retry
Definition: Execute.cpp:89
File_Namespace::DiskCacheConfig disk_cache_config
bool enable_udf_registration_for_all_users
std::string allowed_import_paths
bool g_enable_stringdict_parallel
std::string disk_cache_level
size_t max_cacheable_hashtable_size_bytes
size_t watchdog_none_encoded_string_translation_limit
bool g_enable_fsi
Definition: Catalog.cpp:96
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:293
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:89
size_t g_max_import_threads
Definition: Importer.cpp:105
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:164
size_t g_bbox_intersect_max_table_size_bytes
Definition: Execute.cpp:110
bool g_enable_table_functions
Definition: Execute.cpp:117
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters

+ Here is the caller graph for this function:

std::string CommandLineOptions::getNodeIds ( )
std::vector<std::string> CommandLineOptions::getNodeIdsArray ( )
void CommandLineOptions::init_logging ( )

Definition at line 91 of file CommandLineOptions.cpp.

References logger::DEBUG1, and logger::init().

Referenced by EmbeddedDatabase::DBEngineImpl::init().

91  {
94  }
98 }
logger::LogOptions log_options_
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
Severity severity_
Definition: Logger.h:214
void set_base_path(std::string const &base_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

boost::optional< int > CommandLineOptions::parse_command_line ( int  argc,
char const *const *  argv,
const bool  should_init_logging = false 
)

Definition at line 1628 of file CommandLineOptions.cpp.

References SystemParameters::ALLOWED_ALL_USERS, SystemParameters::ALLOWED_SUPERUSERS_ONLY, BASELINE_HT, BBOX_INTERSECT_HT, construct_runtime_udf_registration_policy(), SystemParameters::DISALLOWED, logger::ERROR, migrations::MigrationMgr::executeRebrandMigration(), g_allow_auto_resultset_caching, g_allow_query_step_skipping, g_auto_resultset_caching_threshold, g_dynamic_watchdog_time_limit, g_enable_data_recycler, g_enable_dynamic_watchdog, g_enable_executor_resource_mgr, g_enable_filter_push_down, g_enable_non_kernel_time_query_interrupt, g_enable_runtime_query_interrupt, g_enable_union, g_enable_watchdog, g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency, g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency, g_executor_resource_mgr_cpu_result_mem_bytes, g_executor_resource_mgr_cpu_result_mem_ratio, g_executor_resource_mgr_max_available_resource_use_ratio, g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio, g_executor_resource_mgr_per_query_max_cpu_slots_ratio, g_from_table_reordering, g_hashtable_cache_total_bytes, g_max_cacheable_hashtable_size_bytes, g_max_cacheable_query_resultset_size_bytes, g_max_import_threads, g_optimize_cuda_block_and_grid_sizes, g_parallel_top_max, g_parallel_top_min, g_pending_query_interrupt_freq, g_preflight_count_query_threshold, g_query_resultset_cache_total_bytes, g_running_query_interrupt_freq, g_use_chunk_metadata_cache, g_use_estimator_result_cache, g_use_hashtable_cache, g_use_query_resultset_cache, g_watchdog_max_projected_rows_per_device, g_watchdog_none_encoded_string_translation_limit, BoundingBoxIntersectJoinHashTable::getHashTableCache(), PerfectJoinHashTable::getHashTableCache(), BaselineJoinHashTable::getHashTableCache(), logger::INFO, shared::kCatalogDirectoryName, shared::kDataDirectoryName, shared::kLockfilesDirectoryName, LOG, MAPD_RELEASE, migrations::MigrationMgr::migrationEnabled(), PERFECT_HT, run, anonymous_namespace{CommandLineOptions.cpp}::sanitize_config_file(), DataRecycler< CACHED_ITEM_TYPE, META_INFO_TYPE >::setMaxCacheItemSize(), DataRecycler< CACHED_ITEM_TYPE, META_INFO_TYPE >::setTotalCacheSize(), migrations::MigrationMgr::takeMigrationLock(), anonymous_namespace{CommandLineOptions.cpp}::trim_and_check_file_exists(), and UNREACHABLE.

Referenced by EmbeddedDatabase::DBEngineImpl::init(), and main().

1631  {
1632  po::options_description all_desc("All options");
1633  all_desc.add(help_desc_).add(developer_desc_);
1634 
1635  try {
1636  po::store(po::command_line_parser(argc, argv)
1637  .options(all_desc)
1638  .positional(positional_options)
1639  .run(),
1640  vm);
1641  po::notify(vm);
1642 
1643  if (vm.count("help")) {
1644  std::cerr << "Usage: heavydb <data directory path> [-p <port number>] "
1645  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1646  << std::endl
1647  << std::endl;
1648  std::cout << help_desc_ << std::endl;
1649  return 0;
1650  }
1651  if (vm.count("dev-options")) {
1652  std::cout << "Usage: heavydb <data directory path> [-p <port number>] "
1653  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1654  << std::endl
1655  << std::endl;
1656  std::cout << developer_desc_ << std::endl;
1657  return 0;
1658  }
1659  if (vm.count("version")) {
1660  std::cout << "HeavyDB Version: " << MAPD_RELEASE << std::endl;
1661  return 0;
1662  }
1663 
1664  if (vm.count("config")) {
1665  std::ifstream settings_file(system_parameters.config_file);
1666 
1667  auto sanitized_settings = sanitize_config_file(settings_file);
1668 
1669  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
1670  po::notify(vm);
1671  settings_file.close();
1672  }
1673 
1674  if (!g_enable_union) {
1675  std::cerr
1676  << "The enable-union option is DEPRECATED and is now enabled by default. "
1677  "Please remove use of this option, as it may be disabled in the future."
1678  << std::endl;
1679  }
1680 
1681  // Trim base path before executing migration
1682  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1683  if (!boost::filesystem::exists(base_path)) {
1684  std::cerr << "Storage folder (--data) not found: " << base_path << std::endl;
1685  std::cerr << "Need to run initheavy before heavydb." << std::endl;
1686  return 1;
1687  }
1688 
1689  // Execute rebrand migration before accessing any system files.
1690  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
1691  if (!boost::filesystem::exists(lockfiles_path)) {
1692  if (!boost::filesystem::create_directory(lockfiles_path)) {
1693  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
1694  " subdirectory under "
1695  << base_path << std::endl;
1696  return 1;
1697  }
1698  }
1699  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
1700  if (!boost::filesystem::exists(lockfiles_path2)) {
1701  if (!boost::filesystem::create_directory(lockfiles_path2)) {
1702  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1703  shared::kCatalogDirectoryName + " subdirectory under "
1704  << base_path << std::endl;
1705  return 1;
1706  }
1707  }
1708  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
1709  if (!boost::filesystem::exists(lockfiles_path3)) {
1710  if (!boost::filesystem::create_directory(lockfiles_path3)) {
1711  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1712  shared::kDataDirectoryName + " subdirectory under "
1713  << base_path << std::endl;
1714  return 1;
1715  }
1716  }
1720  }
1721 
1722  if (!vm["enable-runtime-udf"].defaulted()) {
1723  if (!vm["enable-runtime-udfs"].defaulted()) {
1724  std::cerr << "Usage Error: Both enable-runtime-udf and enable-runtime-udfs "
1725  "specified. Please remove use of the enable-runtime-udfs flag, "
1726  "as it will be deprecated in the future."
1727  << std::endl;
1728  return 1;
1729  } else {
1731  std::cerr << "The enable-runtime-udf flag has been deprecated and replaced "
1732  "with enable-runtime-udfs. Please remove use of this option "
1733  "as it will be disabled in the future."
1734  << std::endl;
1735  }
1736  }
1740 
1741  if (should_init_logging) {
1742  init_logging();
1743  }
1744 
1746  return 1;
1747  }
1748  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
1749  return 1;
1750  }
1752  "ssl trust store")) {
1753  return 1;
1754  }
1756  return 1;
1757  }
1759  return 1;
1760  }
1762  return 1;
1763  }
1764 
1781  if (g_use_hashtable_cache) {
1794  }
1796  } catch (po::error& e) {
1797  std::cerr << "Usage Error: " << e.what() << std::endl;
1798  return 1;
1799  }
1800 
1801  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1802  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
1803  return 1;
1804  }
1805 
1806  if (!g_from_table_reordering) {
1807  LOG(INFO) << " From clause table reordering is disabled";
1808  }
1809 
1811  LOG(INFO) << " Filter push down for JOIN is enabled";
1812  }
1813 
1814  if (vm.count("udf")) {
1815  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
1816 
1817  if (!boost::filesystem::exists(udf_file_name)) {
1818  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
1819  return 1;
1820  }
1821 
1822  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
1823  }
1824 
1825  if (vm.count("udf-compiler-path")) {
1826  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
1827  }
1828 
1829 #ifdef HAVE_TORCH_TFS
1830  if (vm.count("torch-lib-path")) {
1831  boost::algorithm::trim_if(torch_lib_path, boost::is_any_of("\"'"));
1832  }
1833 #endif
1834 
1835  auto trim_string = [](std::string& s) {
1836  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
1837  };
1838 
1839  if (vm.count("udf-compiler-options")) {
1840  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1841  }
1842 
1843  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
1844  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
1845  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
1846  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1847  boost::is_any_of("\"'"));
1848 
1849  if (!system_parameters.ha_group_id.empty()) {
1850  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
1851  if (system_parameters.ha_unique_server_id.empty()) {
1852  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
1853  return 5;
1854  } else {
1855  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
1856  }
1857  if (system_parameters.ha_brokers.empty()) {
1858  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
1859  return 6;
1860  } else {
1861  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
1862  }
1863  if (system_parameters.ha_shared_data.empty()) {
1864  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
1865  return 7;
1866  } else {
1867  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
1868  }
1869  }
1870 
1871  boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of("\"'"));
1872  if (!system_parameters.master_address.empty()) {
1873  if (!read_only) {
1874  LOG(ERROR) << "The master-address setting is only allowed in read-only mode";
1875  return 9;
1876  }
1877  LOG(INFO) << " Master Address is " << system_parameters.master_address;
1878  LOG(INFO) << " Master Port is " << system_parameters.master_port;
1879  }
1880 
1881  if (g_max_import_threads < 1) {
1882  std::cerr << "max-import-threads must be >= 1 (was set to " << g_max_import_threads
1883  << ")." << std::endl;
1884  return 8;
1885  } else {
1886  LOG(INFO) << " Max import threads " << g_max_import_threads;
1887  }
1888 
1890  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
1891  }
1893  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
1894  }
1895  LOG(INFO) << " Min CPU buffer pool slab size (in bytes) "
1897  LOG(INFO) << " Max CPU buffer pool slab size (in bytes) "
1899  LOG(INFO) << " Min GPU buffer pool slab size (in bytes) "
1901  LOG(INFO) << " Max GPU buffer pool slab size (in bytes) "
1903  LOG(INFO) << " calcite JVM max memory (in MB) " << system_parameters.calcite_max_mem;
1904  LOG(INFO) << " HeavyDB Server Port " << system_parameters.omnisci_server_port;
1905  LOG(INFO) << " HeavyDB Calcite Port " << system_parameters.calcite_port;
1906  LOG(INFO) << " Enable Calcite view optimize "
1908  LOG(INFO) << " Allow Local Auth Fallback: "
1909  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
1910  LOG(INFO) << " ParallelTop min threshold: " << g_parallel_top_min;
1911  LOG(INFO) << " ParallelTop watchdog max: " << g_parallel_top_max;
1912 
1913  LOG(INFO) << " Enable Data Recycler: "
1914  << (g_enable_data_recycler ? "enabled" : "disabled");
1915  if (g_enable_data_recycler) {
1916  LOG(INFO) << " \t Use hashtable cache: "
1917  << (g_use_hashtable_cache ? "enabled" : "disabled");
1918  if (g_use_hashtable_cache) {
1919  LOG(INFO) << " \t\t Total amount of bytes that hashtable cache keeps: "
1920  << g_hashtable_cache_total_bytes / (1024 * 1024) << " MB.";
1921  LOG(INFO) << " \t\t Per-hashtable size limit: "
1922  << g_max_cacheable_hashtable_size_bytes / (1024 * 1024) << " MB.";
1923  }
1924  LOG(INFO) << " \t Use query resultset cache: "
1925  << (g_use_query_resultset_cache ? "enabled" : "disabled");
1927  LOG(INFO) << " \t\t Total amount of bytes that query resultset cache keeps: "
1928  << g_query_resultset_cache_total_bytes / (1024 * 1024) << " MB.";
1929  LOG(INFO) << " \t\t Per-query resultset size limit: "
1930  << g_max_cacheable_query_resultset_size_bytes / (1024 * 1024) << " MB.";
1931  }
1932  LOG(INFO) << " \t\t Use auto query resultset caching: "
1933  << (g_allow_auto_resultset_caching ? "enabled" : "disabled");
1935  LOG(INFO) << " \t\t\t The maximum bytes of a query resultset which is "
1936  "automatically cached: "
1937  << g_auto_resultset_caching_threshold << " Bytes.";
1938  }
1939  LOG(INFO) << " \t\t Use query step skipping: "
1940  << (g_allow_query_step_skipping ? "enabled" : "disabled");
1941  LOG(INFO) << " \t Use chunk metadata cache: "
1942  << (g_use_chunk_metadata_cache ? "enabled" : "disabled");
1943  }
1944  LOG(INFO) << "Executor Resource Manager: "
1945  << (g_enable_executor_resource_mgr ? "enabled" : "disabled");
1947  LOG(INFO) << "\tCPU kernel concurrency: "
1949  : "disabled");
1950  LOG(INFO) << "\tCPU-GPU kernel concurrency: "
1952  : "disabled");
1954  LOG(INFO) << "\tCPU result set reserved allocation: "
1955  << g_executor_resource_mgr_cpu_result_mem_bytes / (1024 * 1024) << " MB";
1956  } else {
1957  LOG(INFO) << "\tCPU result set reserved ratio of CPU buffer pool size: "
1959  }
1960  LOG(INFO) << "\tPer-query max CPU threads ratio: "
1962  LOG(INFO) << "\tPer-query max CPU result memory ratio of allocated total: "
1964  LOG(INFO) << "\tAllow concurrent CPU thread/slot oversubscription: "
1966  ? "enabled"
1967  : "disabled");
1968  LOG(INFO)
1969  << "\tAllow concurrent CPU result memory oversubscription: "
1971  ? "enabled"
1972  : "disabled");
1973  LOG(INFO) << "\tPer-query Max available resource utilization ratio: "
1975  }
1976 
1977  const std::string udf_reg_policy_log_prefix{"Runtime UDF/UDTF Registration Policy: "};
1980  LOG(INFO) << udf_reg_policy_log_prefix << " DISALLOWED";
1981  break;
1982  }
1984  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for superusers only";
1985  break;
1986  }
1988  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for all users";
1989  break;
1990  }
1991  default: {
1992  UNREACHABLE() << "Unrecognized option for Runtime UDF/UDTF registration policy.";
1993  }
1994  }
1995 
1996  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
1997  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
1998  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
1999  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
2000  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
2001 
2002  return boost::none;
2003 }
std::string distinguishedName
Definition: AuthMetadata.h:25
double g_running_query_interrupt_freq
Definition: Execute.cpp:137
size_t g_parallel_top_max
Definition: ResultSet.cpp:50
const std::string kDataDirectoryName
std::string ldapQueryUrl
Definition: AuthMetadata.h:26
bool g_use_query_resultset_cache
Definition: Execute.cpp:156
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:178
SystemParameters::RuntimeUdfRegistrationPolicy construct_runtime_udf_registration_policy(const bool enable_runtime_udfs, const bool enable_udf_registration_for_all_users)
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
std::string ha_shared_data
std::string udf_compiler_path
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:136
#define LOG(tag)
Definition: Logger.h:285
bool enable_calcite_view_optimize
bool g_allow_query_step_skipping
Definition: Execute.cpp:159
size_t preflight_count_query_threshold
std::string ldapRoleRegex
Definition: AuthMetadata.h:27
#define UNREACHABLE()
Definition: Logger.h:338
size_t g_preflight_count_query_threshold
Definition: Execute.cpp:84
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:160
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:134
void setMaxCacheItemSize(CacheItemType item_type, size_t new_max_cache_item_size)
Definition: DataRecycler.h:613
bool enable_non_kernel_time_query_interrupt
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:176
bool g_enable_data_recycler
Definition: Execute.cpp:154
po::options_description help_desc_
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:184
std::string config_file
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:157
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:161
size_t g_watchdog_none_encoded_string_translation_limit
Definition: Execute.cpp:82
std::string ha_brokers
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:174
size_t g_parallel_top_min
Definition: ResultSet.cpp:49
std::string ssl_trust_ca_file
std::string ssl_trust_store
bool g_from_table_reordering
Definition: Execute.cpp:93
static HashtableRecycler * getHashTableCache()
static bool migrationEnabled()
Definition: MigrationMgr.h:47
size_t g_watchdog_max_projected_rows_per_device
Definition: Execute.cpp:83
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:190
std::string uri
Definition: AuthMetadata.h:24
bool g_enable_watchdog
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:158
std::string ha_unique_server_id
std::string ca_file_name
Definition: AuthMetadata.h:31
void setTotalCacheSize(CacheItemType item_type, size_t new_total_cache_size)
Definition: DataRecycler.h:606
std::string ssl_key_file
AuthMetadata authMetadata
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:165
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:187
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:162
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:163
RuntimeUdfRegistrationPolicy runtime_udf_registration_policy
std::string ldapSuperUserRole
Definition: AuthMetadata.h:28
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:183
unsigned pending_query_interrupt_freq
std::stringstream sanitize_config_file(std::ifstream &in)
std::vector< std::string > udf_compiler_options
const std::string kCatalogDirectoryName
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:179
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:177
bool g_enable_filter_push_down
Definition: Execute.cpp:98
bool g_use_estimator_result_cache
Definition: Execute.cpp:135
bool allowLocalAuthFallback
Definition: AuthMetadata.h:32
po::positional_options_description positional_options
size_t watchdog_max_projected_rows_per_device
static HashtableRecycler * getHashTableCache()
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:191
bool g_enable_union
std::string ssl_keystore
const std::string kLockfilesDirectoryName
bool enable_udf_registration_for_all_users
static const std::string MAPD_RELEASE
Definition: release.h:42
static bool run
po::variables_map vm
size_t max_cacheable_hashtable_size_bytes
size_t watchdog_none_encoded_string_translation_limit
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:88
std::string ha_group_id
po::options_description developer_desc_
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:133
size_t g_max_import_threads
Definition: Importer.cpp:105
bool g_use_hashtable_cache
Definition: Execute.cpp:155
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:164
std::string master_address
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
std::string ssl_cert_file

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CommandLineOptions::validate ( )

Definition at line 1231 of file CommandLineOptions.cpp.

References anonymous_namespace{CommandLineOptions.cpp}::addOptionalFileToBlacklist(), ddl_utils::FilePathBlacklist::addToBlacklist(), File_Namespace::all, test_exceptions::data_path, File_Namespace::fsi, heavyai::ftruncate(), g_allow_invalid_literal_buffer_reads, g_enable_debug_timer, g_enable_executor_resource_mgr, g_enable_fsi, g_enable_fsi_regex_import, g_enable_legacy_delimited_import, g_enable_logs_system_tables, g_enable_ml_functions, g_enable_s3_fsi, g_enable_system_tables, g_enable_table_functions, g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_kernel_concurrency, g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency, g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency, g_executor_resource_mgr_cpu_result_mem_ratio, g_executor_resource_mgr_max_available_resource_use_ratio, g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio, g_executor_resource_mgr_per_query_max_cpu_slots_ratio, Catalog_Namespace::g_log_user_id, g_logs_system_tables_max_files_count, g_multi_instance, g_pmem_path, g_pmem_size, g_read_only, g_vacuum_min_selectivity, File_Namespace::CachingFileMgr::getMinimumSize(), logger::INFO, ddl_utils::FilePathWhitelist::initialize(), shared::kCatalogDirectoryName, shared::kDataDirectoryName, shared::kDefaultDiskCacheDirName, shared::kDefaultLicenseFileName, shared::kDefaultLogDirName, shared::kSystemCatalogName, LOG, File_Namespace::non_fsi, File_Namespace::none, heavyai::safe_close(), heavyai::safe_fcntl(), heavyai::safe_open(), heavyai::safe_write(), import_export::ForeignDataImporter::setDefaultImportPath(), to_string(), VLOG, and logger::WARNING.

Referenced by main().

1231  {
1232  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1233  const auto data_path = boost::filesystem::path(base_path) / shared::kDataDirectoryName;
1234  if (!boost::filesystem::exists(data_path)) {
1235  throw std::runtime_error("HeavyDB data directory does not exist at '" + base_path +
1236  "'");
1237  }
1238 
1239 // TODO: support lock on Windows
1240 #ifndef _WIN32
1241  {
1242  // If we aren't sharing the data directory, take and hold a write lock on
1243  // heavydb_pid.lck to prevent other processes from trying to share our dir.
1244  // TODO(sy): Probably need to get rid of this PID file because it doesn't make much
1245  // sense to store only one server's PID when we have the --multi-instance option.
1246  auto exe_filename = boost::filesystem::path(exe_name).filename().string();
1247  const std::string lock_file =
1248  (boost::filesystem::path(base_path) / std::string(exe_filename + "_pid.lck"))
1249  .string();
1250  auto pid = std::to_string(getpid());
1251  if (!g_multi_instance) {
1252  VLOG(1) << "taking [" << lock_file << "] read+write lock until process exit";
1253  } else {
1254  VLOG(1) << "taking [" << lock_file << "] read-only lock until process exit";
1255  }
1256 
1257  int fd;
1258  fd = heavyai::safe_open(lock_file.c_str(), O_RDWR | O_CREAT, 0664);
1259  if (fd == -1) {
1260  throw std::runtime_error("failed to open lockfile: " + lock_file + ": " +
1261  std::string(strerror(errno)) + " (" +
1262  std::to_string(errno) + ")");
1263  }
1264 
1265  struct flock fl;
1266  memset(&fl, 0, sizeof(fl));
1267  fl.l_type = !g_multi_instance ? F_WRLCK : F_RDLCK;
1268  fl.l_whence = SEEK_SET;
1269  int cmd;
1270 #ifdef __linux__
1271  // cmd = F_OFD_SETLK; // TODO(sy): broken on centos
1272  cmd = F_SETLK;
1273 #else
1274  cmd = F_SETLK;
1275 #endif // __linux__
1276  int ret = heavyai::safe_fcntl(fd, cmd, &fl);
1277  if (ret == -1 && (errno == EACCES || errno == EAGAIN)) { // locked by someone else
1278  heavyai::safe_close(fd);
1279  throw std::runtime_error(
1280  "another HeavyDB server instance is already using data directory: " +
1281  base_path);
1282  } else if (ret == -1) {
1283  auto errno0 = errno;
1284  heavyai::safe_close(fd);
1285  throw std::runtime_error("failed to lock lockfile: " + lock_file + ": " +
1286  std::string(strerror(errno0)) + " (" +
1287  std::to_string(errno0) + ")");
1288  }
1289 
1290  if (!g_multi_instance) {
1291  if (heavyai::ftruncate(fd, 0) == -1) {
1292  auto errno0 = errno;
1293  heavyai::safe_close(fd);
1294  throw std::runtime_error("failed to truncate lockfile: " + lock_file + ": " +
1295  std::string(strerror(errno0)) + " (" +
1296  std::to_string(errno0) + ")");
1297  }
1298  if (heavyai::safe_write(fd, pid.c_str(), pid.length()) == -1) {
1299  auto errno0 = errno;
1300  heavyai::safe_close(fd);
1301  throw std::runtime_error("failed to write lockfile: " + lock_file + ": " +
1302  std::string(strerror(errno0)) + " (" +
1303  std::to_string(errno0) + ")");
1304  }
1305  }
1306 
1307  // Intentionally leak the file descriptor. Lock will be held until process exit.
1308  }
1309 #endif // _WIN32
1310 
1311  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
1312  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
1313  throw std::runtime_error("File containing DB queries " + db_query_file +
1314  " does not exist.");
1315  }
1316  const auto db_file = boost::filesystem::path(base_path) /
1318  if (!boost::filesystem::exists(db_file)) {
1319  { // check old system catalog existsense
1320  const auto db_file =
1321  boost::filesystem::path(base_path) / shared::kCatalogDirectoryName / "mapd";
1322  if (!boost::filesystem::exists(db_file)) {
1323  throw std::runtime_error("System catalog " + shared::kSystemCatalogName +
1324  " does not exist.");
1325  }
1326  }
1327  }
1328  if (license_path.length() == 0) {
1330  }
1331 
1332  // add all parameters to be displayed on startup
1333  LOG(INFO) << "HeavyDB started with data directory at '" << base_path << "'";
1334  if (vm.count("license-path")) {
1335  LOG(INFO) << "License key path set to '" << license_path << "'";
1336  }
1338  LOG(INFO) << " Server read-only mode is " << read_only << " (--read-only)";
1339  if (g_multi_instance) {
1340  LOG(INFO) << " Multiple servers per --data directory is " << g_multi_instance
1341  << " (--multi-instance)";
1342  }
1343  if (g_read_only && g_multi_instance) {
1344  throw std::runtime_error(
1345  "You may not use the --read-only and --multi-instance configuration flags "
1346  "simultaneously.");
1347  }
1349  LOG(WARNING) << " Allowing invalid reads from the literal buffer. May cause invalid "
1350  "query results! (--allow-invalid-literal-buffer-reads)";
1351  }
1352 #if DISABLE_CONCURRENCY
1353  LOG(INFO) << " Threading layer: serial";
1354 #elif ENABLE_TBB
1355  LOG(INFO) << " Threading layer: TBB";
1356 #else
1357  LOG(INFO) << " Threading layer: std";
1358 #endif
1359  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
1360  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
1362  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
1363  }
1364  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
1366  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
1367  << pending_query_interrupt_freq << " (in ms.)";
1368  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
1369  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
1370  }
1371  LOG(INFO) << " Non-kernel time query interrupt is set to "
1373 
1374  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
1375  LOG(INFO) << " LogUserId is set to " << Catalog_Namespace::g_log_user_id;
1376  LOG(INFO) << " Maximum idle session duration " << idle_session_duration;
1377  LOG(INFO) << " Maximum active session duration " << max_session_duration;
1378  LOG(INFO) << " Maximum number of sessions " << system_parameters.num_sessions;
1379 
1380  LOG(INFO) << "Legacy delimited import is set to " << g_enable_legacy_delimited_import;
1381 #ifdef ENABLE_IMPORT_PARQUET
1382  LOG(INFO) << "Legacy parquet import is set to " << g_enable_legacy_parquet_import;
1383 #endif
1384  LOG(INFO) << "FSI regex parsed import is set to " << g_enable_fsi_regex_import;
1385 
1386  LOG(INFO) << "Allowed import paths is set to " << allowed_import_paths;
1387  LOG(INFO) << "Allowed export paths is set to " << allowed_export_paths;
1390 
1400  g_enable_s3_fsi = false;
1401 
1403 #ifdef ENABLE_IMPORT_PARQUET
1404  !g_enable_legacy_parquet_import ||
1405 #endif
1407  g_enable_fsi =
1408  true; // a requirement for FSI import code-paths is for FSI to be enabled
1409  LOG(INFO) << "FSI has been enabled as a side effect of enabling non-legacy import.";
1410  }
1411 
1412  const bool executor_resource_mgr_cpu_result_mem_ratio_flag_set =
1413  vm["executor-cpu-result-mem-ratio"].defaulted() ? false : true;
1414  const bool executor_resource_mgr_cpu_result_mem_bytes_flag_set =
1415  vm["executor-cpu-result-mem-bytes"].defaulted() ? false : true;
1416  const bool executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set =
1417  vm["executor-per-query-max-cpu-threads-ratio"].defaulted() ? false : true;
1418  const bool executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set =
1419  vm["executor-per-query-max-cpu-result-mem-ratio"].defaulted() ? false : true;
1420  const bool executor_resource_mgr_cpu_kernel_concurrency_flag_set =
1421  vm["allow-cpu-kernel-concurrency"].defaulted() ? false : true;
1422  const bool executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set =
1423  vm["allow-cpu-gpu-kernel-concurrency"].defaulted() ? false : true;
1424  const bool executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set =
1425  vm["allow-cpu-thread-oversubscription-concurrency"].defaulted() ? false : true;
1426  const bool executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set =
1427  vm["allow-cpu-result-mem-oversubscription-concurrency"].defaulted() ? false : true;
1428 
1430  if (executor_resource_mgr_cpu_result_mem_bytes_flag_set) {
1431  throw std::runtime_error(
1432  "Cannot set executor-cpu-result-mem-bytes without enable-executor-resource-mgr "
1433  "option enabled");
1434  }
1435  if (executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1436  throw std::runtime_error(
1437  "Cannot set executor-cpu-result-mem-ratio without enable-executor-resource-mgr "
1438  "option enabled");
1439  }
1440  if (executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set) {
1441  throw std::runtime_error(
1442  "Cannot set executor-per-query-max-cpu-slots-ratio without "
1443  "enable-executor-resource-mgr option enabled");
1444  }
1445  if (executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set) {
1446  throw std::runtime_error(
1447  "Cannot set executor-per-query-max-cpu-result-mem-ratio without "
1448  "enable-executor-resource-mgr option enabled");
1449  }
1450  if (executor_resource_mgr_cpu_kernel_concurrency_flag_set) {
1451  throw std::runtime_error(
1452  "Cannot set allow-cpu-kernel-concurrency without "
1453  "enable-executor-resource-mgr option enabled");
1454  }
1455  if (executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set) {
1456  throw std::runtime_error(
1457  "Cannot set allow-cpu-gpu-kernel-concurrency without "
1458  "enable-executor-resource-mgr option enabled");
1459  }
1460  if (executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set) {
1461  throw std::runtime_error(
1462  "Cannot set allow-cpu-thread-oversubscription-concurrency without "
1463  "enable-executor-resource-mgr option enabled");
1464  }
1465  if (executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set) {
1466  throw std::runtime_error(
1467  "Cannot set allow-cpu-thread-result-mem-concurrency without "
1468  "enable-executor-resource-mgr option enabled");
1469  }
1470  }
1471  if (executor_resource_mgr_cpu_result_mem_bytes_flag_set &&
1472  executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1473  throw std::runtime_error(
1474  "Setting both executor-cpu-result-mem-bytes and executor-cpu-result-mem-ratio is "
1475  "not allowed as the flags are mutually exclusive.");
1476  }
1480  throw std::runtime_error(
1481  "allow-cpu-thread-oversubscription-concurrency cannot be set without at least "
1482  "one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency being "
1483  "set.");
1484  }
1486  throw std::runtime_error(
1487  "allow-cpu-result-mem-oversubscription-concurrency cannot be set without at "
1488  "least one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency "
1489  "being set.");
1490  }
1491  }
1492 
1494  throw std::runtime_error(
1495  "Invalid value for executor-cpu-result-mem-ratio, must be greater than 0.");
1496  }
1498  throw std::runtime_error(
1499  "Invalid value for executor-per-query-max-cpu-slots-ratio, must be greater than "
1500  "0.");
1501  }
1503  throw std::runtime_error(
1504  "Invalid value for executor-per-query-max-cpu-result-mem-ratio, must be greater "
1505  "than "
1506  "0.");
1507  }
1510  throw std::runtime_error(
1511  "Invalid value for executor-max-available-resource-use-ratio, must be greater "
1512  "than "
1513  "0. and less than or equal to 1.0");
1514  }
1515 
1516 #ifndef HAVE_SYSTEM_TFS
1518  g_enable_table_functions = false;
1519  LOG(INFO) << "System table functions turned off due to HeavyDB being built without "
1520  "table function support.";
1521  }
1522 #endif // HAVE_SYSTEM_TFS
1524  g_enable_ml_functions = false;
1525  LOG(INFO) << "ML functions turned off due to `--enable-table-functions` being set to "
1526  "false. Please enable table functions to use ML functionality.";
1527  }
1528 
1529  if (disk_cache_level == "foreign_tables") {
1530  if (g_enable_fsi) {
1532  LOG(INFO) << "Disk cache enabled for foreign tables only";
1533  } else {
1534  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1535  "disk cache disabled";
1536  }
1537  } else if (disk_cache_level == "all") {
1539  LOG(INFO) << "Disk cache enabled for all tables";
1540  } else if (disk_cache_level == "local_tables") {
1542  LOG(INFO) << "Disk cache enabled for non-FSI tables";
1543  } else if (disk_cache_level == "none") {
1545  LOG(INFO) << "Disk cache disabled";
1546  } else {
1547  throw std::runtime_error{
1548  "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1549  ". Valid options are 'foreign_tables', "
1550  "'local_tables', 'none', and 'all'."};
1551  }
1552 
1554  throw std::runtime_error{"disk-cache-size must be at least " +
1556  }
1557 
1558  if (disk_cache_config.path.empty()) {
1560  }
1562 
1565 
1566  // If passed in, blacklist all security config files
1575 
1576  if (g_vacuum_min_selectivity < 0) {
1577  throw std::runtime_error{"vacuum-min-selectivity cannot be less than 0."};
1578  }
1579  LOG(INFO) << "Vacuum Min Selectivity: " << g_vacuum_min_selectivity;
1580 
1581  LOG(INFO) << "Enable system tables is set to " << g_enable_system_tables;
1582  if (g_enable_system_tables) {
1583  // System tables currently reuse FSI infrastructure and therefore, require FSI to be
1584  // enabled
1585  if (!g_enable_fsi) {
1586  g_enable_fsi = true;
1587  LOG(INFO) << "FSI has been enabled as a side effect of enabling system tables";
1588  }
1589  }
1590  LOG(INFO) << "Enable FSI is set to " << g_enable_fsi;
1591  LOG(INFO) << "Enable logs system tables set to " << g_enable_logs_system_tables;
1592 
1594  throw std::runtime_error{
1595  "Invalid value provided for the \"logs-system-tables-max-files-count\" "
1596  "option. Value must be greater than 0."};
1597  }
1598  LOG(INFO) << "Maximum number of logs system table files set to "
1600 
1601 #ifdef ENABLE_MEMKIND
1602  if (g_enable_tiered_cpu_mem) {
1603  if (g_pmem_path == "") {
1604  throw std::runtime_error{"pmem-path must be set to use tiered cpu memory"};
1605  }
1606  if (g_pmem_size == 0) {
1607  throw std::runtime_error{"pmem-size must be set to use tiered cpu memory"};
1608  }
1609  if (!std::filesystem::exists(g_pmem_path.c_str())) {
1610  throw std::runtime_error{"path to PMem directory (" + g_pmem_path +
1611  ") does not exist."};
1612  }
1613  }
1614 #endif
1615 }
size_t g_pmem_size
int safe_open(const char *path, int flags, mode_t mode) noexcept
Definition: heavyai_fs.cpp:90
const std::string kDataDirectoryName
bool g_multi_instance
Definition: heavyai_locks.h:22
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:178
bool g_enable_logs_system_tables
Definition: Catalog.cpp:100
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:878
const std::string kDefaultDiskCacheDirName
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:83
#define LOG(tag)
Definition: Logger.h:285
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
Definition: Logger.cpp:17
const std::string kDefaultLogDirName
const std::string kSystemCatalogName
void addOptionalFileToBlacklist(std::string &filename)
bool enable_non_kernel_time_query_interrupt
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:176
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:184
std::string to_string(char const *&&v)
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:174
std::string ssl_trust_ca_file
ssize_t safe_write(const int fd, const void *buffer, const size_t buffer_size) noexcept
Definition: heavyai_fs.cpp:144
std::string ssl_trust_store
bool g_enable_s3_fsi
Definition: Catalog.cpp:97
static void setDefaultImportPath(const std::string &base_path)
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
std::string g_pmem_path
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:190
std::string allowed_export_paths
std::string ca_file_name
Definition: AuthMetadata.h:31
std::string ssl_key_file
AuthMetadata authMetadata
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:187
bool g_read_only
Definition: heavyai_locks.h:21
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:183
unsigned pending_query_interrupt_freq
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:87
float g_vacuum_min_selectivity
const std::string kCatalogDirectoryName
bool g_enable_ml_functions
Definition: Execute.cpp:118
bool g_allow_invalid_literal_buffer_reads
Definition: ConstantIR.cpp:140
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:179
const std::string kDefaultLicenseFileName
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:191
std::string ssl_keystore
File_Namespace::DiskCacheConfig disk_cache_config
int32_t ftruncate(const int32_t fd, int64_t length)
Definition: heavyai_fs.cpp:86
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:925
std::string allowed_import_paths
std::string disk_cache_level
po::variables_map vm
int safe_fcntl(int fd, int cmd, struct flock *fl) noexcept
Definition: heavyai_fs.cpp:112
int safe_close(int fd) noexcept
Definition: heavyai_fs.cpp:101
bool g_enable_fsi
Definition: Catalog.cpp:96
#define VLOG(n)
Definition: Logger.h:388
bool g_enable_table_functions
Definition: Execute.cpp:117
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
std::string ssl_cert_file

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CommandLineOptions::validate_base_path ( )

Definition at line 1224 of file CommandLineOptions.cpp.

Referenced by main().

1224  {
1225  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1226  if (!boost::filesystem::exists(base_path)) {
1227  throw std::runtime_error("HeavyDB base directory does not exist at " + base_path);
1228  }
1229 }

+ Here is the caller graph for this function:

Member Data Documentation

bool CommandLineOptions::allow_loop_joins = false
bool CommandLineOptions::allow_multifrag = true
std::string CommandLineOptions::allowed_export_paths {}

Definition at line 126 of file CommandLineOptions.h.

std::string CommandLineOptions::allowed_import_paths {}

Definition at line 125 of file CommandLineOptions.h.

AuthMetadata CommandLineOptions::authMetadata
std::string CommandLineOptions::base_path
const std::string CommandLineOptions::cluster_command_line_arg {"cluster_topology"}
static

Definition at line 155 of file CommandLineOptions.h.

std::string CommandLineOptions::cluster_file = {"cluster.conf"}

Definition at line 55 of file CommandLineOptions.h.

std::string CommandLineOptions::cluster_topology_file = {"cluster_topology.conf"}

Definition at line 56 of file CommandLineOptions.h.

std::string CommandLineOptions::clusterIds_arg

Definition at line 151 of file CommandLineOptions.h.

std::string CommandLineOptions::compressor = std::string(BLOSC_LZ4HC_COMPNAME)

Definition at line 139 of file CommandLineOptions.h.

std::vector<LeafHostInfo> CommandLineOptions::db_leaves
std::string CommandLineOptions::db_query_file = {""}

path to file containing warmup queries list

Definition at line 107 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

po::options_description CommandLineOptions::developer_desc_

Definition at line 142 of file CommandLineOptions.h.

File_Namespace::DiskCacheConfig CommandLineOptions::disk_cache_config
std::string CommandLineOptions::disk_cache_level = ""

Definition at line 92 of file CommandLineOptions.h.

const bool CommandLineOptions::dist_v5_

Definition at line 163 of file CommandLineOptions.h.

unsigned CommandLineOptions::dynamic_watchdog_time_limit = 10000

Definition at line 91 of file CommandLineOptions.h.

bool CommandLineOptions::enable_auto_clear_render_mem = false
bool CommandLineOptions::enable_data_recycler = true

Definition at line 94 of file CommandLineOptions.h.

bool CommandLineOptions::enable_dynamic_watchdog = false

Definition at line 81 of file CommandLineOptions.h.

bool CommandLineOptions::enable_legacy_syntax = true
bool CommandLineOptions::enable_non_kernel_time_query_interrupt = true

Definition at line 87 of file CommandLineOptions.h.

bool CommandLineOptions::enable_rendering = false
bool CommandLineOptions::enable_runtime_query_interrupt = true

Definition at line 86 of file CommandLineOptions.h.

bool CommandLineOptions::enable_runtime_udf = true
private

Definition at line 170 of file CommandLineOptions.h.

bool CommandLineOptions::enable_runtime_udfs = true
private

Definition at line 166 of file CommandLineOptions.h.

bool CommandLineOptions::enable_udf_registration_for_all_users = false
private

Definition at line 171 of file CommandLineOptions.h.

bool CommandLineOptions::enable_watchdog = true

Definition at line 80 of file CommandLineOptions.h.

std::string CommandLineOptions::encryption_key_store_path = {}

Definition at line 58 of file CommandLineOptions.h.

std::string CommandLineOptions::exe_name

Definition at line 144 of file CommandLineOptions.h.

bool CommandLineOptions::exit_after_warmup = false

exit after warmup

Definition at line 111 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

size_t CommandLineOptions::hashtable_cache_total_bytes = 4294967296

Definition at line 96 of file CommandLineOptions.h.

po::options_description CommandLineOptions::help_desc_

Definition at line 141 of file CommandLineOptions.h.

int CommandLineOptions::http_binary_port = 6276

Definition at line 51 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

int CommandLineOptions::http_port = 6278

Definition at line 50 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

int CommandLineOptions::idle_session_duration = kMinsPerHour

Inactive session tolerance in mins (60 mins)

Definition at line 115 of file CommandLineOptions.h.

Referenced by EmbeddedDatabase::DBEngineImpl::init(), and startHeavyDBServer().

bool CommandLineOptions::intel_jit_profile = false
bool CommandLineOptions::jit_debug = false
std::string CommandLineOptions::license_path = {""}

Definition at line 57 of file CommandLineOptions.h.

logger::LogOptions CommandLineOptions::log_options_

Definition at line 143 of file CommandLineOptions.h.

bool CommandLineOptions::log_user_origin = true

Definition at line 66 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

size_t CommandLineOptions::max_cacheable_hashtable_size_bytes = 2147483648

Definition at line 97 of file CommandLineOptions.h.

size_t CommandLineOptions::max_concurrent_render_sessions = 500
int CommandLineOptions::max_session_duration = kMinsPerMonth

Maximum session life in mins (43,200 mins == 30 Days) (https://pages.nist.gov/800-63-3/sp800-63b.html#aal3reauth)

Definition at line 120 of file CommandLineOptions.h.

Referenced by EmbeddedDatabase::DBEngineImpl::init(), and startHeavyDBServer().

const std::string CommandLineOptions::nodeIds_token = {"node_id"}
static

Definition at line 155 of file CommandLineOptions.h.

size_t CommandLineOptions::num_reader_threads = 0

Number of threads used when loading data

Definition at line 103 of file CommandLineOptions.h.

Referenced by EmbeddedDatabase::DBEngineImpl::init(), and startHeavyDBServer().

bool CommandLineOptions::optimize_cuda_block_and_grid_sizes = false

Definition at line 98 of file CommandLineOptions.h.

unsigned CommandLineOptions::pending_query_interrupt_freq = 1000

Definition at line 90 of file CommandLineOptions.h.

po::positional_options_description CommandLineOptions::positional_options

Definition at line 145 of file CommandLineOptions.h.

size_t CommandLineOptions::preflight_count_query_threshold = g_preflight_count_query_threshold

Definition at line 85 of file CommandLineOptions.h.

bool CommandLineOptions::read_only = false
bool CommandLineOptions::render_compositor_use_last_gpu = true
size_t CommandLineOptions::render_mem_bytes = 1000000000
int CommandLineOptions::render_oom_retry_threshold = 0
bool CommandLineOptions::renderer_enable_slab_allocation = false

Definition at line 79 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

bool CommandLineOptions::renderer_prefer_igpu = false
bool CommandLineOptions::renderer_use_parallel_executors = true

Definition at line 78 of file CommandLineOptions.h.

Referenced by startHeavyDBServer().

unsigned CommandLineOptions::renderer_vulkan_timeout_ms = 60000
size_t CommandLineOptions::reserved_gpu_mem = 384 * 1024 * 1024
double CommandLineOptions::running_query_interrupt_freq = 0.1

Definition at line 89 of file CommandLineOptions.h.

std::vector<LeafHostInfo> CommandLineOptions::string_leaves
SystemParameters CommandLineOptions::system_parameters
std::vector<std::string> CommandLineOptions::udf_compiler_options
std::string CommandLineOptions::udf_compiler_path = {""}
std::string CommandLineOptions::udf_file_name = {""}
bool CommandLineOptions::use_estimator_result_cache = true

Definition at line 88 of file CommandLineOptions.h.

bool CommandLineOptions::use_hashtable_cache = true

Definition at line 95 of file CommandLineOptions.h.

bool CommandLineOptions::verbose_logging = false

Definition at line 59 of file CommandLineOptions.h.

po::variables_map CommandLineOptions::vm

Definition at line 150 of file CommandLineOptions.h.

size_t CommandLineOptions::watchdog_max_projected_rows_per_device
Initial value:

Definition at line 83 of file CommandLineOptions.h.

size_t CommandLineOptions::watchdog_none_encoded_string_translation_limit = 1000000

Definition at line 82 of file CommandLineOptions.h.


The documentation for this class was generated from the following files: