OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ArrayTestTableFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TableFunctionsTesting.h"
18 
19 /*
20  This file contains testing array-related compile-time UDTFs.
21 
22  NOTE: This file currently contains no GPU UDTFs. If any GPU UDTFs are
23  added, it should be added to CUDA_TABLE_FUNCTION_FILES in CMakeLists.txt
24  */
25 
26 #ifndef __CUDACC__
27 
28 template <typename T>
30  Column<T>& output) {
31  int size = input.size();
32  for (int i = 0; i < size; i++) {
33  const Array<T> arr = input[i];
34  if (arr.isNull()) {
35  output.setNull(i);
36  } else {
37  if constexpr (std::is_same<T, TextEncodingDict>::value) {
39  std::string acc = "";
40  for (size_t j = 0; j < arr.size(); j++) {
41  if (!arr.isNull(j)) {
42  acc += mgr->getString(input.getDictDbId(), input.getDictId(), arr[j]);
43  }
44  }
45  int32_t out_string_id =
46  mgr->getOrAddTransient(output.getDictDbId(), output.getDictId(), acc);
47  output[i] = out_string_id;
48  } else {
49  T acc{0};
50  for (size_t j = 0; j < arr.size(); j++) {
51  if constexpr (std::is_same_v<T, bool>) {
52  // todo: arr.isNull(i) returns arr[i] because bool does not
53  // have null value, we should introduce 8-bit boolean type
54  // for Arrays
55  acc |= arr[j];
56  } else {
57  if (!arr.isNull(j)) {
58  acc += arr[j];
59  }
60  }
61  }
62  output[i] = acc;
63  }
64  }
65  }
66  return size;
67 }
68 
69 // explicit instantiations
70 template NEVER_INLINE HOST int32_t
72 template NEVER_INLINE HOST int32_t
74 template NEVER_INLINE HOST int32_t
76 template NEVER_INLINE HOST int32_t
78 template NEVER_INLINE HOST int32_t
80 template NEVER_INLINE HOST int32_t
82 template NEVER_INLINE HOST int32_t
84 template NEVER_INLINE HOST int32_t
86  Column<TextEncodingDict>& output);
87 
88 template <typename T>
90  const Column<Array<T>>& input,
91  Column<Array<T>>& output) {
92  int size = input.size();
93 
94  // count the number of items in all input arrays:
95  int output_values_size = 0;
96  for (int i = 0; i < size; i++) {
97  output_values_size += input[i].size();
98  }
99 
100  // set the size and allocate the output columns buffers:
102  /*output column index=*/0,
103  /*upper bound to the number of items in all output arrays=*/output_values_size);
104  mgr.set_output_row_size(size);
105 
106  // set the items of output colums:
107  for (int i = 0; i < size; i++) {
108  output.setItem(i, input[i]);
109  }
110 
111  return size;
112 }
113 
114 // explicit instantiations
115 template NEVER_INLINE HOST int32_t
117  const Column<Array<float>>& input,
118  Column<Array<float>>& output);
119 template NEVER_INLINE HOST int32_t
121  const Column<Array<double>>& input,
122  Column<Array<double>>& output);
123 template NEVER_INLINE HOST int32_t
125  const Column<Array<int8_t>>& input,
126  Column<Array<int8_t>>& output);
127 template NEVER_INLINE HOST int32_t
129  const Column<Array<int16_t>>& input,
130  Column<Array<int16_t>>& output);
131 template NEVER_INLINE HOST int32_t
133  const Column<Array<int32_t>>& input,
134  Column<Array<int32_t>>& output);
135 template NEVER_INLINE HOST int32_t
137  const Column<Array<int64_t>>& input,
138  Column<Array<int64_t>>& output);
139 template NEVER_INLINE HOST int32_t
141  const Column<Array<bool>>& input,
142  Column<Array<bool>>& output);
143 template NEVER_INLINE HOST int32_t
145  const Column<Array<TextEncodingDict>>& input,
147 
148 template <typename T>
150  const ColumnList<Array<T>>& inputs,
151  Column<Array<T>>& output) {
152  int size = inputs.size();
153 
154  int output_values_size = 0;
155  for (int j = 0; j < inputs.numCols(); j++) {
156  for (int i = 0; i < size; i++) {
157  output_values_size += inputs[j][i].size();
158  }
159  }
161  /*output column index=*/0,
162  /*upper bound to the number of items in all output arrays=*/output_values_size);
163 
164  mgr.set_output_row_size(size);
165 
166  for (int i = 0; i < size; i++) {
167  for (int j = 0; j < inputs.numCols(); j++) {
168  Column<Array<T>> col = inputs[j];
169  output.concatItem(i,
170  col[i]); // works only if i is the last row set, otherwise throws
171  }
172  }
173  return size;
174 }
175 
176 template <typename T>
178  const Column<Array<T>>& input1,
179  const Array<T>& input2,
180  Column<Array<T>>& output) {
181  int size = input1.size();
182  int output_values_size = input2.size() * size;
183  for (int i = 0; i < size; i++) {
184  output_values_size += input1[i].size();
185  }
187  /*output column index=*/0,
188  /*upper bound to the number of items in all output arrays=*/output_values_size);
189 
190  mgr.set_output_row_size(size);
191 
192  for (int i = 0; i < size; i++) {
193  output.concatItem(i, input1[i]);
194  output.concatItem(i, input2);
195  }
196  return size;
197 }
198 
199 // explicit instantiations
200 template NEVER_INLINE HOST int32_t
202  const ColumnList<Array<float>>& inputs,
203  Column<Array<float>>& output);
204 template NEVER_INLINE HOST int32_t
206  const ColumnList<Array<double>>& inputs,
207  Column<Array<double>>& output);
208 template NEVER_INLINE HOST int32_t
210  const ColumnList<Array<int8_t>>& inputs,
211  Column<Array<int8_t>>& output);
212 template NEVER_INLINE HOST int32_t
214  const ColumnList<Array<int16_t>>& inputs,
215  Column<Array<int16_t>>& output);
216 template NEVER_INLINE HOST int32_t
218  const ColumnList<Array<int32_t>>& inputs,
219  Column<Array<int32_t>>& output);
220 template NEVER_INLINE HOST int32_t
222  const ColumnList<Array<int64_t>>& inputs,
223  Column<Array<int64_t>>& output);
224 template NEVER_INLINE HOST int32_t
226  const ColumnList<Array<bool>>& inputs,
227  Column<Array<bool>>& output);
228 template NEVER_INLINE HOST int32_t
230  const ColumnList<Array<TextEncodingDict>>& inputs,
232 
233 template NEVER_INLINE HOST int32_t
235  const Column<Array<float>>& input1,
236  const Array<float>& input2,
237  Column<Array<float>>& output);
238 template NEVER_INLINE HOST int32_t
240  const Column<Array<double>>& input1,
241  const Array<double>& input2,
242  Column<Array<double>>& output);
243 template NEVER_INLINE HOST int32_t
245  const Column<Array<int8_t>>& input1,
246  const Array<int8_t>& input2,
247  Column<Array<int8_t>>& output);
248 template NEVER_INLINE HOST int32_t
250  const Column<Array<int16_t>>& input1,
251  const Array<int16_t>& input2,
252  Column<Array<int16_t>>& output);
253 template NEVER_INLINE HOST int32_t
255  const Column<Array<int32_t>>& input1,
256  const Array<int32_t>& input2,
257  Column<Array<int32_t>>& output);
258 template NEVER_INLINE HOST int32_t
260  const Column<Array<int64_t>>& input1,
261  const Array<int64_t>& input2,
262  Column<Array<int64_t>>& output);
263 template NEVER_INLINE HOST int32_t
265  const Column<Array<bool>>& input1,
266  const Array<bool>& input2,
267  Column<Array<bool>>& output);
268 
269 template <typename T>
271  const Column<T>& input,
272  Column<Array<T>>& output) {
273  int size = input.size();
274  int output_values_size = 0;
275  for (int i = 0; i < size; i++) {
276  output_values_size += (input.isNull(i) ? 0 : 1);
277  }
279  /*output column index=*/0,
280  /*upper bound to the number of items in all output arrays=*/output_values_size);
281  mgr.set_output_row_size(size);
282 
283  if constexpr (std::is_same<T, TextEncodingDict>::value) {
284  for (int i = 0; i < size; i++) {
285  if (input.isNull(i)) {
286  output.setNull(i);
287  } else {
288  Array<T> arr = output.getItem(i, 1);
289  arr[0] = mgr.getOrAddTransient(
290  mgr.getNewDictDbId(),
291  mgr.getNewDictId(),
292  mgr.getString(input.getDictDbId(), input.getDictId(), input[i]));
293  }
294  }
295  } else {
296  for (int i = 0; i < size; i++) {
297  if (input.isNull(i)) {
298  output.setNull(i);
299  } else {
300  Array<T> arr = output.getItem(i, 1);
301  arr[0] = input[i];
302  }
303  }
304  }
305  return size;
306 }
307 
308 // explicit instantiations
309 
310 template NEVER_INLINE HOST int32_t
312  const Column<int64_t>& input,
313  Column<Array<int64_t>>& output);
314 
315 template NEVER_INLINE HOST int32_t
317  const Column<TextEncodingDict>& input,
319 
320 // clang-format off
321 /*
322  UDTF: array_split__cpu_template(TableFunctionManager mgr, Column<Array<T>> input) ->
323  Column<Array<T>> | input_id=args<0>, Column<Array<T>> | input_id=args<0>,
324  T=[float, double, int8_t, int16_t, int32_t, int64_t, bool, TextEncodingDict]
325 */
326 // clang-format on
327 template <typename T>
329  const Column<Array<T>>& input,
330  Column<Array<T>>& first,
331  Column<Array<T>>& second) {
332  int size = input.size();
333  int first_values_size = 0;
334  int second_values_size = 0;
335  for (int i = 0; i < size; i++) {
336  if (!input.isNull(i)) {
337  int64_t sz = input[i].size();
338  first_values_size += sz / 2;
339  second_values_size += sz - sz / 2;
340  }
341  }
342  mgr.set_output_array_values_total_number(0, first_values_size);
343  mgr.set_output_array_values_total_number(1, second_values_size);
344  mgr.set_output_row_size(size);
345 
346  for (int i = 0; i < size; i++) {
347  if (input.isNull(i)) {
348  first.setNull(i);
349  second.setNull(i);
350  } else {
351  Array<T> arr = input[i];
352  int64_t sz = arr.size();
353  Array<T> arr1 = first.getItem(i, sz / 2);
354  Array<T> arr2 = second.getItem(i, sz - sz / 2);
355  for (int64_t j = 0; j < sz; j++) {
356  if (j < sz / 2) {
357  arr1[j] = arr[j];
358  } else {
359  arr2[j - sz / 2] = arr[j];
360  }
361  }
362  }
363  }
364  return size;
365 }
366 
367 // explicit instantiations
368 
369 template NEVER_INLINE HOST int32_t
371  const Column<Array<float>>& input,
372  Column<Array<float>>& first,
373  Column<Array<float>>& second);
374 
375 template NEVER_INLINE HOST int32_t
377  const Column<Array<double>>& input,
378  Column<Array<double>>& first,
379  Column<Array<double>>& second);
380 
381 template NEVER_INLINE HOST int32_t
383  const Column<Array<int8_t>>& input,
384  Column<Array<int8_t>>& first,
385  Column<Array<int8_t>>& second);
386 
387 template NEVER_INLINE HOST int32_t
389  const Column<Array<int16_t>>& input,
390  Column<Array<int16_t>>& first,
391  Column<Array<int16_t>>& second);
392 
393 template NEVER_INLINE HOST int32_t
395  const Column<Array<int32_t>>& input,
396  Column<Array<int32_t>>& first,
397  Column<Array<int32_t>>& second);
398 
399 template NEVER_INLINE HOST int32_t
401  const Column<Array<int64_t>>& input,
402  Column<Array<int64_t>>& first,
403  Column<Array<int64_t>>& second);
404 
405 template NEVER_INLINE HOST int32_t
407  const Column<Array<bool>>& input,
408  Column<Array<bool>>& first,
409  Column<Array<bool>>& second);
410 
411 template NEVER_INLINE HOST int32_t
413  const Column<Array<TextEncodingDict>>& input,
416 
417 #endif // #ifndef __CUDACC__
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
std::string getString(int32_t db_id, int32_t dict_id, int32_t string_id)
Definition: heavydbTypes.h:422
void set_output_array_values_total_number(int32_t index, int64_t output_array_values_total_number)
Definition: heavydbTypes.h:361
NEVER_INLINE HOST int32_t array_copier__cpu_template(TableFunctionManager &mgr, const Column< Array< T >> &input, Column< Array< T >> &output)
int32_t getOrAddTransient(int32_t db_id, int32_t dict_id, std::string str)
Definition: heavydbTypes.h:426
NEVER_INLINE HOST int32_t array_asarray__cpu_template(TableFunctionManager &mgr, const Column< T > &input, Column< Array< T >> &output)
DEVICE int64_t size() const
int32_t getNewDictDbId()
Definition: heavydbTypes.h:411
NEVER_INLINE HOST int32_t array_split__cpu_template(TableFunctionManager &mgr, const Column< Array< T >> &input, Column< Array< T >> &first, Column< Array< T >> &second)
NEVER_INLINE HOST int32_t array_concat__cpu_template(TableFunctionManager &mgr, const ColumnList< Array< T >> &inputs, Column< Array< T >> &output)
#define HOST
NEVER_INLINE HOST int32_t sum_along_row__cpu_template(const Column< Array< T >> &input, Column< T > &output)
DEVICE bool isNull(int64_t index) const
DEVICE void setNull(int64_t index)
DEVICE ALWAYS_INLINE size_t size() const
Definition: heavydbTypes.h:520
#define NEVER_INLINE
NEVER_INLINE HOST int32_t array_append__cpu_template(TableFunctionManager &mgr, const Column< Array< T >> &input1, const Array< T > &input2, Column< Array< T >> &output)
static TableFunctionManager * get_singleton()
Definition: heavydbTypes.h:357
DEVICE bool isNull() const
Definition: heavydbTypes.h:556