23 #ifndef QUERYENGINE_HASHJOINRUNTIME_H
24 #define QUERYENGINE_HASHJOINRUNTIME_H
29 #include "../../../Shared/SqlTypesLayout.h"
30 #include "../../../Shared/sqltypes.h"
33 #include "../../DecodersImpl.h"
35 #include "../../RuntimeFunctions.h"
37 #include "../../../Shared/funcannotations.h"
55 const int64_t entry_count,
56 const int32_t invalid_slot_val,
57 const int32_t cpu_thread_idx,
58 const int32_t cpu_thread_count);
63 void init_hash_join_buff_tbb(int32_t* buff,
64 const int64_t entry_count,
65 const int32_t invalid_slot_val);
67 #endif // #ifdef HAVE_TBB
68 #endif // #ifndef __CUDACC__
71 const int64_t entry_count,
72 const int32_t invalid_slot_val);
75 const int64_t entry_count,
76 const size_t key_component_count,
77 const bool with_val_slot,
78 const int32_t invalid_slot_val,
79 const int32_t cpu_thread_idx,
80 const int32_t cpu_thread_count);
83 const int64_t entry_count,
84 const size_t key_component_count,
85 const bool with_val_slot,
86 const int32_t invalid_slot_val,
87 const int32_t cpu_thread_idx,
88 const int32_t cpu_thread_count);
93 void init_baseline_hash_join_buff_tbb_32(int8_t* hash_join_buff,
94 const int64_t entry_count,
95 const size_t key_component_count,
96 const bool with_val_slot,
97 const int32_t invalid_slot_val);
99 void init_baseline_hash_join_buff_tbb_64(int8_t* hash_join_buff,
100 const int64_t entry_count,
101 const size_t key_component_count,
102 const bool with_val_slot,
103 const int32_t invalid_slot_val);
105 #endif // #ifdef HAVE_TBB
106 #endif // #ifndef __CUDACC__
109 const int64_t entry_count,
110 const size_t key_component_count,
111 const bool with_val_slot,
112 const int32_t invalid_slot_val);
115 const int64_t entry_count,
116 const size_t key_component_count,
117 const bool with_val_slot,
118 const int32_t invalid_slot_val);
161 const int32_t invalid_slot_val,
162 const bool for_semi_join,
165 const int32_t* sd_inner_to_outer_translation_map,
166 const int32_t min_inner_elem,
167 const int32_t cpu_thread_idx,
168 const int32_t cpu_thread_count,
169 const int64_t bucket_normalization);
172 const int32_t invalid_slot_val,
173 const bool for_semi_join,
176 const int32_t* sd_inner_to_outer_translation_map,
177 const int32_t min_inner_elem,
178 const int32_t cpu_thread_idx,
179 const int32_t cpu_thread_count);
182 const int32_t invalid_slot_val,
183 const bool for_semi_join,
189 const int32_t invalid_slot_val,
190 const bool for_semi_join,
194 const int64_t bucket_normalization);
204 const int32_t invalid_slot_val,
205 const bool for_semi_join,
212 const int32_t invalid_slot_val,
213 const bool for_semi_join,
218 const int64_t bucket_normalization);
224 const int32_t* sd_inner_to_outer_translation_map,
225 const int32_t min_inner_elem,
226 const unsigned cpu_thread_count,
227 const bool for_window_framing);
234 const int32_t* sd_inner_to_outer_translation_map,
235 const int32_t min_inner_elem,
236 const unsigned cpu_thread_count);
241 const int32_t invalid_slot_val,
245 const int32_t* sd_inner_to_outer_translation_map,
246 const int32_t min_inner_elem,
247 const unsigned cpu_thread_count);
253 const bool for_window_framing);
269 const int64_t entry_count,
270 const int32_t invalid_slot_val,
271 const bool for_semi_join,
272 const size_t key_component_count,
273 const bool with_val_slot,
275 const int64_t num_elems,
276 const int32_t cpu_thread_idx,
277 const int32_t cpu_thread_count);
280 const int64_t entry_count,
281 const int32_t invalid_slot_val,
282 const size_t key_component_count,
283 const bool with_val_slot,
285 const int64_t num_elems,
286 const int32_t cpu_thread_idx,
287 const int32_t cpu_thread_count);
290 const size_t entry_count,
291 const int32_t invalid_slot_val,
292 const size_t key_component_count,
293 const bool with_val_slot,
295 const size_t num_elems,
296 const int32_t cpu_thread_idx,
297 const int32_t cpu_thread_count);
300 const int64_t entry_count,
301 const int32_t invalid_slot_val,
302 const bool for_semi_join,
303 const size_t key_component_count,
304 const bool with_val_slot,
306 const int64_t num_elems,
307 const int32_t cpu_thread_idx,
308 const int32_t cpu_thread_count);
311 const int64_t entry_count,
312 const int32_t invalid_slot_val,
313 const size_t key_component_count,
314 const bool with_val_slot,
316 const int64_t num_elems,
317 const int32_t cpu_thread_idx,
318 const int32_t cpu_thread_count);
321 const size_t entry_count,
322 const int32_t invalid_slot_val,
323 const size_t key_component_count,
324 const bool with_val_slot,
326 const size_t num_elems,
327 const int32_t cpu_thread_idx,
328 const int32_t cpu_thread_count);
331 const int64_t entry_count,
332 const int32_t invalid_slot_val,
333 const bool for_semi_join,
334 const size_t key_component_count,
335 const bool with_val_slot,
338 const int64_t num_elems);
341 const int64_t entry_count,
342 const int32_t invalid_slot_val,
343 const bool for_semi_join,
344 const size_t key_component_count,
345 const bool with_val_slot,
348 const int64_t num_elems);
352 const int64_t entry_count,
353 const int32_t invalid_slot_val,
354 const size_t key_component_count,
355 const bool with_val_slot,
358 const int64_t num_elems);
361 const int64_t entry_count,
362 const int32_t invalid_slot_val,
363 const size_t key_component_count,
364 const bool with_val_slot,
367 const size_t num_elems);
371 const int32_t* composite_key_dict,
372 const int64_t hash_entry_count,
373 const size_t key_component_count,
374 const std::vector<JoinColumn>& join_column_per_key,
375 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
376 const std::vector<JoinBucketInfo>& join_bucket_info,
377 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
378 const std::vector<int32_t>& sd_min_inner_elems,
379 const int32_t cpu_thread_count,
380 const bool is_range_join =
false,
381 const bool is_geo_compressed =
false,
382 const bool for_window_framing =
false);
386 const int64_t* composite_key_dict,
387 const int64_t hash_entry_count,
388 const size_t key_component_count,
389 const std::vector<JoinColumn>& join_column_per_key,
390 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
391 const std::vector<JoinBucketInfo>& join_bucket_info,
392 const std::vector<const int32_t*>& sd_inner_to_outer_translation_maps,
393 const std::vector<int32_t>& sd_min_inner_elems,
394 const int32_t cpu_thread_count,
395 const bool is_range_join =
false,
396 const bool is_geo_compressed =
false,
397 const bool for_window_framing =
false);
401 const int32_t* composite_key_dict,
402 const int64_t hash_entry_count,
403 const size_t key_component_count,
405 const int64_t num_elems,
406 const bool for_window_framing);
410 const int64_t* composite_key_dict,
411 const int64_t hash_entry_count,
413 const int64_t num_elems,
414 const bool for_window_framing);
418 const int64_t* composite_key_dict,
419 const int64_t hash_entry_count,
421 const int64_t num_elems);
425 const int64_t* composite_key_dict,
426 const size_t hash_entry_count,
428 const size_t num_elems);
432 const size_t padded_size_bytes,
433 const std::vector<JoinColumn>& join_column_per_key,
434 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
435 const int thread_count);
438 uint8_t* hll_buffer_all_cpus,
439 std::vector<int32_t>& row_counts,
441 const size_t padded_size_bytes,
442 const std::vector<JoinColumn>& join_column_per_key,
443 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
444 const std::vector<JoinBucketInfo>& join_buckets_per_key,
445 const int thread_count);
448 uint8_t* hll_buffer_all_cpus,
449 std::vector<int32_t>& row_counts,
451 const size_t padded_size_bytes,
452 const std::vector<JoinColumn>& join_column_per_key,
453 const std::vector<JoinColumnTypeInfo>& type_info_per_key,
454 const std::vector<JoinBucketInfo>& join_buckets_per_key,
455 const bool is_compressed,
456 const int thread_count);
461 const int64_t num_elems);
465 int32_t* row_counts_buffer,
467 const int64_t num_elems);
472 const std::vector<double>& bucket_size_thresholds,
473 const int thread_count);
477 int32_t* row_counts_buffer,
479 const size_t num_elems,
480 const size_t block_size_x,
481 const size_t grid_size_x);
486 const double* bucket_size_thresholds);
488 #endif // QUERYENGINE_HASHJOINRUNTIME_H
const ColumnType column_type
void fill_hash_join_buff_on_device_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int64_t bucket_normalization)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
void fill_one_to_many_hash_table(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count, const bool for_window_framing)
void fill_one_to_many_hash_table_on_device_sharded(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_hash_join_buff_on_device_sharded(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::vector< double > inverse_bucket_sizes_for_dimension
void fill_one_to_many_hash_table_on_device_bucketized(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info)
void fill_one_to_many_hash_table_bucketized(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count)
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
const size_t entry_count_per_shard
const int64_t translated_null_val
void compute_bucket_sizes_on_device(double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double *bucket_size_thresholds)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
void approximate_distinct_tuples_on_device_range(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
size_t col_chunks_buff_sz
int64_t bucket_normalization
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
void compute_bucket_sizes_on_cpu(std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const std::vector< double > &bucket_size_thresholds, const int thread_count)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const int8_t * col_chunks_buff
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void fill_one_to_many_hash_table_on_device(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const bool for_window_framing)
DEVICE int SUFFIX() fill_hash_join_buff(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void approximate_distinct_tuples_overlaps(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
bool is_date_in_days() const
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
size_t getNormalizedHashEntryCount() const
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
void approximate_distinct_tuples_range(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
void fill_one_to_many_hash_table_sharded_bucketized(int32_t *buff, const BucketizedHashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const int32_t *sd_inner_to_outer_translation_map, const int32_t min_inner_elem, const unsigned cpu_thread_count)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_hash_join_buff_on_device(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info)
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const size_t g_maximum_conditions_to_coalesce
void approximate_distinct_tuples_on_device_overlaps(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_hash_join_buff_on_device_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, const bool for_semi_join, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const int64_t bucket_normalization)
size_t bucketized_hash_entry_count
bool is_unsigned_type(const SQLTypeInfo &ti)