OmniSciDB  04ee39c94c
HashJoinKeyHandlers.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_HASHJOINKEYHANDLERS_H
18 #define QUERYENGINE_HASHJOINKEYHANDLERS_H
19 
20 #include "../Shared/SqlTypesLayout.h"
21 #include "HashJoinRuntime.h"
22 
23 #ifdef __CUDACC__
24 #include "DecodersImpl.h"
25 #else
26 #include "../StringDictionary/StringDictionary.h"
27 #include "../StringDictionary/StringDictionaryProxy.h"
28 #include "RuntimeFunctions.h"
29 #include "Shared/Logger.h"
30 #endif
31 
32 #include <cmath>
33 
34 #include "../Shared/funcannotations.h"
35 
37  const JoinColumn& join_column,
38  const size_t i) {
39  switch (type_info.column_type) {
40  case SmallDate:
42  join_column.col_buff,
43  type_info.elem_sz,
44  type_info.elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
45  type_info.elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
46  i);
47  case Signed:
49  join_column.col_buff, type_info.elem_sz, i);
50  case Unsigned:
52  join_column.col_buff, type_info.elem_sz, i);
53  default:
54 #ifndef __CUDACC__
55  CHECK(false);
56 #else
57  assert(0);
58 #endif
59  return 0;
60  }
61 }
62 
64  GenericKeyHandler(const size_t key_component_count,
65  const bool should_skip_entries,
66  const JoinColumn* join_column_per_key,
67  const JoinColumnTypeInfo* type_info_per_key
68 #ifndef __CUDACC__
69  ,
70  const void* const* sd_inner_proxy_per_key,
71  const void* const* sd_outer_proxy_per_key
72 #endif
73  )
74  : key_component_count_(key_component_count)
75  , should_skip_entries_(should_skip_entries)
76  , join_column_per_key_(join_column_per_key)
77  , type_info_per_key_(type_info_per_key) {
78 #ifndef __CUDACC__
79  if (sd_inner_proxy_per_key) {
80  CHECK(sd_outer_proxy_per_key);
81  sd_inner_proxy_per_key_ = sd_inner_proxy_per_key;
82  sd_outer_proxy_per_key_ = sd_outer_proxy_per_key;
83  } else
84 #endif
85  {
86  sd_inner_proxy_per_key_ = nullptr;
87  sd_outer_proxy_per_key_ = nullptr;
88  }
89  }
90 
91  template <typename T, typename KEY_BUFF_HANDLER>
92  DEVICE int operator()(const size_t i, T* key_scratch_buff, KEY_BUFF_HANDLER f) const {
93  bool skip_entry = false;
94  for (size_t key_component_index = 0; key_component_index < key_component_count_;
95  ++key_component_index) {
96  const auto& join_column = join_column_per_key_[key_component_index];
97  const auto& type_info = type_info_per_key_[key_component_index];
98  int64_t elem = get_join_column_element_value(type_info, join_column, i);
99  if (should_skip_entries_ && elem == type_info.null_val && !type_info.uses_bw_eq) {
100  skip_entry = true;
101  break;
102  }
103 #ifndef __CUDACC__
104  const auto sd_inner_proxy = sd_inner_proxy_per_key_
105  ? sd_inner_proxy_per_key_[key_component_index]
106  : nullptr;
107  const auto sd_outer_proxy = sd_outer_proxy_per_key_
108  ? sd_outer_proxy_per_key_[key_component_index]
109  : nullptr;
110  if (sd_inner_proxy && elem != type_info.null_val) {
111  CHECK(sd_outer_proxy);
112  const auto sd_inner_dict_proxy =
113  static_cast<const StringDictionaryProxy*>(sd_inner_proxy);
114  const auto sd_outer_dict_proxy =
115  static_cast<const StringDictionaryProxy*>(sd_outer_proxy);
116  const auto elem_str = sd_inner_dict_proxy->getString(elem);
117  const auto outer_id = sd_outer_dict_proxy->getIdOfString(elem_str);
118  if (outer_id == StringDictionary::INVALID_STR_ID) {
119  skip_entry = true;
120  break;
121  }
122  elem = outer_id;
123  }
124 #endif
125  key_scratch_buff[key_component_index] = elem;
126  }
127 
128  if (!skip_entry) {
129  return f(i, key_scratch_buff, key_component_count_);
130  }
131 
132  return 0;
133  }
134 
135  const size_t key_component_count_;
139  const void* const* sd_inner_proxy_per_key_;
140  const void* const* sd_outer_proxy_per_key_;
141 };
142 
144  OverlapsKeyHandler(const size_t key_dims_count,
145  const JoinColumn* join_column,
146  const double* bucket_sizes_for_dimension)
147  : key_dims_count_(key_dims_count)
148  , join_column_(join_column)
149  , bucket_sizes_for_dimension_(bucket_sizes_for_dimension) {}
150 
151  template <typename T, typename KEY_BUFF_HANDLER>
152  DEVICE int operator()(const size_t i, T* key_scratch_buff, KEY_BUFF_HANDLER f) const {
153  // TODO(adb): hard-coding the 2D case w/ bounds for now. Should support n-dims with a
154  // check to ensure we are not exceeding maximum number of dims for coalesced keys
155  double bounds[4];
156  for (size_t j = 0; j < 2 * key_dims_count_; j++) {
157  bounds[j] = SUFFIX(fixed_width_double_decode_noinline)(join_column_[0].col_buff,
158  2 * key_dims_count_ * i + j);
159  }
160 
161  const auto x_bucket_sz = bucket_sizes_for_dimension_[0];
162  const auto y_bucket_sz = bucket_sizes_for_dimension_[1];
163 
164  for (int64_t x = floor(bounds[0] * x_bucket_sz); x <= floor(bounds[2] * x_bucket_sz);
165  x++) {
166  for (int64_t y = floor(bounds[1] * y_bucket_sz);
167  y <= floor(bounds[3] * y_bucket_sz);
168  y++) {
169  key_scratch_buff[0] = x;
170  key_scratch_buff[1] = y;
171 
172  const auto err = f(i, key_scratch_buff, key_dims_count_);
173  if (err) {
174  return err;
175  }
176  }
177  }
178  return 0;
179  }
180 
181  const size_t key_dims_count_;
184 };
185 
186 #endif // QUERYENGINE_HASHJOINKEYHANDLERS_H
const ColumnType column_type
const JoinColumn * join_column_per_key_
OverlapsKeyHandler(const size_t key_dims_count, const JoinColumn *join_column, const double *bucket_sizes_for_dimension)
const int8_t * col_buff
DEVICE int operator()(const size_t i, T *key_scratch_buff, KEY_BUFF_HANDLER f) const
const size_t key_component_count_
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_int_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:83
const size_t elem_sz
const void *const * sd_inner_proxy_per_key_
#define SUFFIX(name)
const double * bucket_sizes_for_dimension_
const void *const * sd_outer_proxy_per_key_
#define DEVICE
static constexpr int32_t INVALID_STR_ID
std::string getString(int32_t string_id) const
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_unsigned_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:90
#define NULL_INT
Definition: sqltypes.h:173
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_small_date_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int32_t null_val, const int64_t ret_null_val, const int64_t pos)
Definition: DecodersImpl.h:141
const JoinColumnTypeInfo * type_info_per_key_
DEVICE int operator()(const size_t i, T *key_scratch_buff, KEY_BUFF_HANDLER f) const
#define NULL_SMALLINT
Definition: sqltypes.h:172
#define CHECK(condition)
Definition: Logger.h:187
DEVICE int64_t get_join_column_element_value(const JoinColumnTypeInfo &type_info, const JoinColumn &join_column, const size_t i)
DEVICE NEVER_INLINE double SUFFIX() fixed_width_double_decode_noinline(const int8_t *byte_stream, const int64_t pos)
Definition: DecodersImpl.h:126
GenericKeyHandler(const size_t key_component_count, const bool should_skip_entries, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key, const void *const *sd_inner_proxy_per_key, const void *const *sd_outer_proxy_per_key)
const size_t key_dims_count_
const JoinColumn * join_column_
const bool should_skip_entries_