24 #ifndef QUERYENGINE_HASHJOINRUNTIME_H
25 #define QUERYENGINE_HASHJOINRUNTIME_H
30 #include "../../../Shared/SqlTypesLayout.h"
31 #include "../../../Shared/sqltypes.h"
34 #include "../../DecodersImpl.h"
36 #include "../../RuntimeFunctions.h"
38 #include "../../../Shared/funcannotations.h"
53 return entry_count + 1;
64 const int64_t entry_count,
65 const int32_t invalid_slot_val,
66 const int32_t cpu_thread_idx,
67 const int32_t cpu_thread_count);
72 void init_hash_join_buff_tbb(int32_t* buff,
73 const int64_t entry_count,
74 const int32_t invalid_slot_val);
76 #endif // #ifdef HAVE_TBB
77 #endif // #ifndef __CUDACC__
80 const int64_t entry_count,
81 const int32_t invalid_slot_val);
84 const int64_t entry_count,
85 const size_t key_component_count,
86 const bool with_val_slot,
87 const int32_t invalid_slot_val,
88 const int32_t cpu_thread_idx,
89 const int32_t cpu_thread_count);
92 const int64_t entry_count,
93 const size_t key_component_count,
94 const bool with_val_slot,
95 const int32_t invalid_slot_val,
96 const int32_t cpu_thread_idx,
97 const int32_t cpu_thread_count);
102 void init_baseline_hash_join_buff_tbb_32(int8_t* hash_join_buff,
103 const int64_t entry_count,
104 const size_t key_component_count,
105 const bool with_val_slot,
106 const int32_t invalid_slot_val);
108 void init_baseline_hash_join_buff_tbb_64(int8_t* hash_join_buff,
109 const int64_t entry_count,
110 const size_t key_component_count,
111 const bool with_val_slot,
112 const int32_t invalid_slot_val);
114 #endif // #ifdef HAVE_TBB
115 #endif // #ifndef __CUDACC__
118 const int64_t entry_count,
119 const size_t key_component_count,
120 const bool with_val_slot,
121 const int32_t invalid_slot_val);
124 const int64_t entry_count,
125 const size_t key_component_count,
126 const bool with_val_slot,
127 const int32_t invalid_slot_val);
170 const int32_t invalid_slot_val,
171 const bool for_semi_join,
174 const int32_t* sd_inner_to_outer_translation_map,
175 const int32_t min_inner_elem,
176 const int32_t cpu_thread_idx,
177 const int32_t cpu_thread_count,
178 const int64_t bucket_normalization);
181 const int32_t invalid_slot_val,
182 const bool for_semi_join,
185 const int32_t* sd_inner_to_outer_translation_map,
186 const int32_t min_inner_elem,
187 const int32_t cpu_thread_idx,
188 const int32_t cpu_thread_count);
191 const int32_t invalid_slot_val,
192 const bool for_semi_join,
198 const int32_t invalid_slot_val,
199 const bool for_semi_join,
203 const int64_t bucket_normalization);
213 const int32_t invalid_slot_val,
214 const bool for_semi_join,
221 const int32_t invalid_slot_val,
222 const bool for_semi_join,
227 const int64_t bucket_normalization);
233 const int32_t* sd_inner_to_outer_translation_map,
234 const int32_t min_inner_elem,
235 const unsigned cpu_thread_count);
242 const int32_t* sd_inner_to_outer_translation_map,
243 const int32_t min_inner_elem,
244 const unsigned cpu_thread_count);
249 const int32_t invalid_slot_val,
253 const int32_t* sd_inner_to_outer_translation_map,
254 const int32_t min_inner_elem,
255 const unsigned cpu_thread_count);
275 const int64_t entry_count,
276 const int32_t invalid_slot_val,
277 const bool for_semi_join,
278 const size_t key_component_count,
279 const bool with_val_slot,
281 const int64_t num_elems,
282 const int32_t cpu_thread_idx,
283 const int32_t cpu_thread_count);
286 const int64_t entry_count,
287 const int32_t invalid_slot_val,
288 const size_t key_component_count,
289 const bool with_val_slot,
291 const int64_t num_elems,
292 const int32_t cpu_thread_idx,
293 const int32_t cpu_thread_count);
296 const size_t entry_count,
297 const int32_t invalid_slot_val,
298 const size_t key_component_count,
299 const bool with_val_slot,
301 const size_t num_elems,
302 const int32_t cpu_thread_idx,
303 const int32_t cpu_thread_count);
306 const int64_t entry_count,
307 const int32_t invalid_slot_val,
308 const bool for_semi_join,
309 const size_t key_component_count,
310 const bool with_val_slot,
312 const int64_t num_elems,
313 const int32_t cpu_thread_idx,
314 const int32_t cpu_thread_count);
317 const int64_t entry_count,
318 const int32_t invalid_slot_val,
319 const size_t key_component_count,
320 const bool with_val_slot,
322 const int64_t num_elems,
323 const int32_t cpu_thread_idx,
324 const int32_t cpu_thread_count);
327 const size_t entry_count,
328 const int32_t invalid_slot_val,
329 const size_t key_component_count,
330 const bool with_val_slot,
332 const size_t num_elems,
333 const int32_t cpu_thread_idx,
334 const int32_t cpu_thread_count);
337 const int64_t entry_count,
338 const int32_t invalid_slot_val,
339 const bool for_semi_join,
340 const size_t key_component_count,
341 const bool with_val_slot,
344 const int64_t num_elems);
347 const int64_t entry_count,
348 const int32_t invalid_slot_val,
349 const bool for_semi_join,
350 const size_t key_component_count,
351 const bool with_val_slot,
354 const int64_t num_elems);
358 const int64_t entry_count,
359 const int32_t invalid_slot_val,
360 const size_t key_component_count,
361 const bool with_val_slot,
364 const int64_t num_elems);
367 const int64_t entry_count,
368 const int32_t invalid_slot_val,
369 const size_t key_component_count,
370 const bool with_val_slot,
373 const size_t num_elems);
377 const int32_t* composite_key_dict,
378 const int64_t hash_entry_count,
379 const size_t key_component_count,
380 const std::vector<JoinColumn>& join_column_per_key,
381 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
382 const std::vector<JoinBucketInfo>& join_bucket_info,
383 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
384 const std::vector<int32_t>& sd_min_inner_elems,
385 const int32_t cpu_thread_count,
386 const bool is_range_join =
false,
387 const bool is_geo_compressed =
false);
391 const int64_t* composite_key_dict,
392 const int64_t hash_entry_count,
393 const size_t key_component_count,
394 const std::vector<JoinColumn>& join_column_per_key,
395 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
396 const std::vector<JoinBucketInfo>& join_bucket_info,
397 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
398 const std::vector<int32_t>& sd_min_inner_elems,
399 const int32_t cpu_thread_count,
400 const bool is_range_join =
false,
401 const bool is_geo_compressed =
false);
405 const int32_t* composite_key_dict,
406 const int64_t hash_entry_count,
407 const size_t key_component_count,
409 const int64_t num_elems);
413 const int64_t* composite_key_dict,
414 const int64_t hash_entry_count,
416 const int64_t num_elems);
420 const int64_t* composite_key_dict,
421 const int64_t hash_entry_count,
423 const int64_t num_elems);
427 const int64_t* composite_key_dict,
428 const size_t hash_entry_count,
430 const size_t num_elems);
434 const size_t padded_size_bytes,
435 const std::vector<JoinColumn>& join_column_per_key,
436 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
437 const int thread_count);
440 uint8_t* hll_buffer_all_cpus,
441 std::vector<int32_t>& row_counts,
443 const size_t padded_size_bytes,
444 const std::vector<JoinColumn>& join_column_per_key,
445 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
446 const std::vector<JoinBucketInfo>& join_buckets_per_key,
447 const int thread_count);
450 uint8_t* hll_buffer_all_cpus,
451 std::vector<int32_t>& row_counts,
453 const size_t padded_size_bytes,
454 const std::vector<JoinColumn>& join_column_per_key,
455 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
456 const std::vector<JoinBucketInfo>& join_buckets_per_key,
457 const bool is_compressed,
458 const int thread_count);
463 const int64_t num_elems);
467 int32_t* row_counts_buffer,
469 const int64_t num_elems);
474 const std::vector<double>& bucket_size_thresholds,
475 const int thread_count);
479 int32_t* row_counts_buffer,
481 const size_t num_elems,
482 const size_t block_size_x,
483 const size_t grid_size_x);
488 const double* bucket_size_thresholds);
490 #endif // QUERYENGINE_HASHJOINRUNTIME_H
const ColumnType column_type
void fill_hash_join_buff_on_device_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int64_t bucket_normalization)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_hash_table_on_device(int32_t *buff, const HashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_hash_join_buff_on_device_sharded(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info)
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::vector< double > inverse_bucket_sizes_for_dimension
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
const size_t entry_count_per_shard
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
const int64_t translated_null_val
void compute_bucket_sizes_on_device(double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double *bucket_size_thresholds)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
void approximate_distinct_tuples_on_device_range(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
size_t col_chunks_buff_sz
void fill_one_to_many_hash_table_sharded_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count)
void fill_one_to_many_hash_table_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count)
void compute_bucket_sizes_on_cpu(std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const std::vector< double > &bucket_size_thresholds, const int thread_count)
int64_t bucket_normalization
void fill_one_to_many_hash_table_on_device_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const int8_t * col_chunks_buff
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void fill_one_to_many_hash_table_on_device_sharded(int32_t *buff, const HashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info)
DEVICE int SUFFIX() fill_hash_join_buff(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void approximate_distinct_tuples_overlaps(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
bool is_date_in_days() const
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_hash_table(int32_t *buff, const HashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
size_t getNormalizedHashEntryCount() const
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
void approximate_distinct_tuples_range(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
void fill_hash_join_buff_on_device(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info)
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const size_t g_maximum_conditions_to_coalesce
void approximate_distinct_tuples_on_device_overlaps(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_hash_join_buff_on_device_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const int64_t bucket_normalization)
bool is_unsigned_type(const SQLTypeInfo &ti)