OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsCommon.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __CUDACC__
18 
19 #include "TableFunctionsCommon.hpp"
20 
21 #include <filesystem>
22 #include <memory>
23 #include <regex>
24 #include <string>
25 
26 #include <tbb/parallel_for.h>
27 #include <tbb/task_arena.h>
28 
29 #define NANOSECONDS_PER_SECOND 1000000000
30 
31 template <typename T>
32 NEVER_INLINE HOST std::pair<T, T> get_column_min_max(const Column<T>& col) {
33  T col_min = std::numeric_limits<T>::max();
34  T col_max = std::numeric_limits<T>::lowest();
35  const int64_t num_rows = col.size();
36  const size_t max_thread_count = std::thread::hardware_concurrency();
37  const size_t max_inputs_per_thread = 20000;
38  const size_t num_threads = std::min(
39  max_thread_count, ((num_rows + max_inputs_per_thread - 1) / max_inputs_per_thread));
40 
41  std::vector<T> local_col_mins(num_threads, std::numeric_limits<T>::max());
42  std::vector<T> local_col_maxes(num_threads, std::numeric_limits<T>::lowest());
43  tbb::task_arena limited_arena(num_threads);
44 
45  limited_arena.execute([&] {
47  tbb::blocked_range<int64_t>(0, num_rows),
48  [&](const tbb::blocked_range<int64_t>& r) {
49  const int64_t start_idx = r.begin();
50  const int64_t end_idx = r.end();
51  T local_col_min = std::numeric_limits<T>::max();
52  T local_col_max = std::numeric_limits<T>::lowest();
53  for (int64_t r = start_idx; r < end_idx; ++r) {
54  const T val = col[r];
55  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
56  if (std::isnan(val) || std::isinf(val)) {
57  continue;
58  }
59  }
60  if (val == inline_null_value<T>()) {
61  continue;
62  }
63  if (val < local_col_min) {
64  local_col_min = val;
65  }
66  if (val > local_col_max) {
67  local_col_max = val;
68  }
69  }
70  size_t thread_idx = tbb::this_task_arena::current_thread_index();
71  if (local_col_min < local_col_mins[thread_idx]) {
72  local_col_mins[thread_idx] = local_col_min;
73  }
74  if (local_col_max > local_col_maxes[thread_idx]) {
75  local_col_maxes[thread_idx] = local_col_max;
76  }
77  });
78  });
79 
80  for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
81  if (local_col_mins[thread_idx] < col_min) {
82  col_min = local_col_mins[thread_idx];
83  }
84  if (local_col_maxes[thread_idx] > col_max) {
85  col_max = local_col_maxes[thread_idx];
86  }
87  }
88  return std::make_pair(col_min, col_max);
89 }
90 
91 template NEVER_INLINE HOST std::pair<int8_t, int8_t> get_column_min_max(
92  const Column<int8_t>& col);
93 template NEVER_INLINE HOST std::pair<int16_t, int16_t> get_column_min_max(
94  const Column<int16_t>& col);
95 template NEVER_INLINE HOST std::pair<int32_t, int32_t> get_column_min_max(
96  const Column<int32_t>& col);
97 template NEVER_INLINE HOST std::pair<int64_t, int64_t> get_column_min_max(
98  const Column<int64_t>& col);
99 template NEVER_INLINE HOST std::pair<float, float> get_column_min_max(
100  const Column<float>& col);
101 template NEVER_INLINE HOST std::pair<double, double> get_column_min_max(
102  const Column<double>& col);
103 
104 std::pair<int32_t, int32_t> get_column_min_max(const Column<TextEncodingDict>& col) {
105  Column<int32_t> int_alias_col(reinterpret_cast<int32_t*>(col.getPtr()), col.size());
106  return get_column_min_max(int_alias_col);
107 }
108 
109 // Todo(todd): we should use a functor approach for gathering whatever stats
110 // a table function needs so we're not repeating boilerplate code (although
111 // should confirm it doesn't have an adverse affect on performance).
112 // Leaving as a follow-up though until we have more examples of real-world
113 // usage patterns.
114 
115 template <typename T>
116 NEVER_INLINE HOST double get_column_mean(const T* data, const int64_t num_rows) {
117  // const int64_t num_rows = col.size();
118  const size_t max_thread_count = std::thread::hardware_concurrency();
119  const size_t max_inputs_per_thread = 20000;
120  const size_t num_threads = std::min(
121  max_thread_count, ((num_rows + max_inputs_per_thread - 1) / max_inputs_per_thread));
122 
123  std::vector<double> local_col_sums(num_threads, 0.);
124  std::vector<int64_t> local_col_non_null_counts(num_threads, 0L);
125  tbb::task_arena limited_arena(num_threads);
126  limited_arena.execute([&] {
128  tbb::blocked_range<int64_t>(0, num_rows),
129  [&](const tbb::blocked_range<int64_t>& r) {
130  const int64_t start_idx = r.begin();
131  const int64_t end_idx = r.end();
132  double local_col_sum = 0.;
133  int64_t local_col_non_null_count = 0;
134  for (int64_t r = start_idx; r < end_idx; ++r) {
135  const T val = data[r];
136  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
137  if (std::isnan(val) || std::isinf(val)) {
138  continue;
139  }
140  }
141  if (val == inline_null_value<T>()) {
142  continue;
143  }
144  local_col_sum += data[r];
145  local_col_non_null_count++;
146  }
147  size_t thread_idx = tbb::this_task_arena::current_thread_index();
148  local_col_sums[thread_idx] += local_col_sum;
149  local_col_non_null_counts[thread_idx] += local_col_non_null_count;
150  });
151  });
152 
153  double col_sum = 0.0;
154  int64_t col_non_null_count = 0L;
155 
156  for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
157  col_sum += local_col_sums[thread_idx];
158  col_non_null_count += local_col_non_null_counts[thread_idx];
159  }
160 
161  return col_non_null_count == 0 ? 0 : col_sum / col_non_null_count;
162 }
163 
164 template NEVER_INLINE HOST double get_column_mean(const int8_t* data,
165  const int64_t num_rows);
166 
167 template NEVER_INLINE HOST double get_column_mean(const int16_t* data,
168  const int64_t num_rows);
169 
170 template NEVER_INLINE HOST double get_column_mean(const int32_t* data,
171  const int64_t num_rows);
172 
173 template NEVER_INLINE HOST double get_column_mean(const int64_t* data,
174  const int64_t num_rows);
175 
176 template NEVER_INLINE HOST double get_column_mean(const float* data,
177  const int64_t num_rows);
178 
179 template NEVER_INLINE HOST double get_column_mean(const double* data,
180  const int64_t num_rows);
181 
182 template <typename T>
184  return get_column_mean(col.getPtr(), col.size());
185 }
186 
187 template NEVER_INLINE HOST double get_column_mean(const Column<int8_t>& col);
188 template NEVER_INLINE HOST double get_column_mean(const Column<int16_t>& col);
189 template NEVER_INLINE HOST double get_column_mean(const Column<int32_t>& col);
190 template NEVER_INLINE HOST double get_column_mean(const Column<int64_t>& col);
191 template NEVER_INLINE HOST double get_column_mean(const Column<float>& col);
192 template NEVER_INLINE HOST double get_column_mean(const Column<double>& col);
193 
194 template <typename T>
195 NEVER_INLINE HOST double get_column_std_dev(const Column<T>& col, const double mean) {
196  return get_column_std_dev(col.getPtr(), col.size(), mean);
197 }
198 
199 template NEVER_INLINE HOST double get_column_std_dev(const Column<int32_t>& col,
200  const double mean);
201 template NEVER_INLINE HOST double get_column_std_dev(const Column<int64_t>& col,
202  const double mean);
203 template NEVER_INLINE HOST double get_column_std_dev(const Column<float>& col,
204  const double mean);
205 template NEVER_INLINE HOST double get_column_std_dev(const Column<double>& col,
206  const double mean);
207 
208 template <typename T>
210  const int64_t num_rows,
211  const double mean) {
212  // const int64_t num_rows = col.size();
213  const size_t max_thread_count = std::thread::hardware_concurrency();
214  const size_t max_inputs_per_thread = 200000;
215  const size_t num_threads = std::min(
216  max_thread_count, ((num_rows + max_inputs_per_thread - 1) / max_inputs_per_thread));
217 
218  std::vector<double> local_col_squared_residuals(num_threads, 0.);
219  std::vector<int64_t> local_col_non_null_counts(num_threads, 0L);
220  tbb::task_arena limited_arena(num_threads);
221 
222  limited_arena.execute([&] {
224  tbb::blocked_range<int64_t>(0, num_rows),
225  [&](const tbb::blocked_range<int64_t>& r) {
226  const int64_t start_idx = r.begin();
227  const int64_t end_idx = r.end();
228  double local_col_squared_residual = 0.;
229  int64_t local_col_non_null_count = 0;
230  for (int64_t r = start_idx; r < end_idx; ++r) {
231  const T val = data[r];
232  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
233  if (std::isnan(val) || std::isinf(val)) {
234  continue;
235  }
236  }
237  if (val == inline_null_value<T>()) {
238  continue;
239  }
240  const double residual = val - mean;
241  local_col_squared_residual += (residual * residual);
242  local_col_non_null_count++;
243  }
244  size_t thread_idx = tbb::this_task_arena::current_thread_index();
245  local_col_squared_residuals[thread_idx] += local_col_squared_residual;
246  local_col_non_null_counts[thread_idx] += local_col_non_null_count;
247  });
248  });
249 
250  double col_sum_squared_residual = 0.0;
251  int64_t col_non_null_count = 0;
252 
253  for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
254  col_sum_squared_residual += local_col_squared_residuals[thread_idx];
255  col_non_null_count += local_col_non_null_counts[thread_idx];
256  }
257 
258  return col_non_null_count == 0 ? 0
259  : sqrt(col_sum_squared_residual / col_non_null_count);
260 }
261 
262 template NEVER_INLINE HOST double get_column_std_dev(const int32_t* data,
263  const int64_t num_rows,
264  const double mean);
265 template NEVER_INLINE HOST double get_column_std_dev(const int64_t* data,
266  const int64_t num_rows,
267  const double mean);
268 template NEVER_INLINE HOST double get_column_std_dev(const float* data,
269  const int64_t num_rows,
270  const double mean);
271 template NEVER_INLINE HOST double get_column_std_dev(const double* data,
272  const int64_t num_rows,
273  const double mean);
274 
275 template <typename T>
276 NEVER_INLINE HOST std::tuple<T, T, bool> get_column_metadata(const Column<T>& col) {
277  T col_min = std::numeric_limits<T>::max();
278  T col_max = std::numeric_limits<T>::lowest();
279  bool has_nulls = false;
280  const int64_t num_rows = col.size();
281  const size_t max_thread_count = std::thread::hardware_concurrency();
282  const size_t max_inputs_per_thread = 200000;
283  const size_t num_threads = std::min(
284  max_thread_count, ((num_rows + max_inputs_per_thread - 1) / max_inputs_per_thread));
285 
286  std::vector<T> local_col_mins(num_threads, std::numeric_limits<T>::max());
287  std::vector<T> local_col_maxes(num_threads, std::numeric_limits<T>::lowest());
288  std::vector<bool> local_col_has_nulls(num_threads, false);
289  tbb::task_arena limited_arena(num_threads);
290 
291  limited_arena.execute([&] {
293  tbb::blocked_range<int64_t>(0, num_rows),
294  [&](const tbb::blocked_range<int64_t>& r) {
295  const int64_t start_idx = r.begin();
296  const int64_t end_idx = r.end();
297  T local_col_min = std::numeric_limits<T>::max();
298  T local_col_max = std::numeric_limits<T>::lowest();
299  bool local_has_nulls = false;
300  for (int64_t r = start_idx; r < end_idx; ++r) {
301  const T val = col[r];
302  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
303  if (std::isnan(val) || std::isinf(val)) {
304  continue;
305  }
306  }
307  if (col.isNull(r)) {
308  local_has_nulls = true;
309  continue;
310  }
311  if (val < local_col_min) {
312  local_col_min = val;
313  }
314  if (val > local_col_max) {
315  local_col_max = val;
316  }
317  }
318  const size_t thread_idx = tbb::this_task_arena::current_thread_index();
319  if (local_has_nulls) {
320  local_col_has_nulls[thread_idx] = true;
321  }
322  if (local_col_min < local_col_mins[thread_idx]) {
323  local_col_mins[thread_idx] = local_col_min;
324  }
325  if (local_col_max > local_col_maxes[thread_idx]) {
326  local_col_maxes[thread_idx] = local_col_max;
327  }
328  });
329  });
330 
331  for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
332  if (local_col_has_nulls[thread_idx]) {
333  has_nulls = true;
334  }
335  if (local_col_mins[thread_idx] < col_min) {
336  col_min = local_col_mins[thread_idx];
337  }
338  if (local_col_maxes[thread_idx] > col_max) {
339  col_max = local_col_maxes[thread_idx];
340  }
341  }
342  return {col_min, col_max, has_nulls};
343 }
344 
345 template NEVER_INLINE HOST std::tuple<int8_t, int8_t, bool> get_column_metadata(
346  const Column<int8_t>& col);
347 template NEVER_INLINE HOST std::tuple<int16_t, int16_t, bool> get_column_metadata(
348  const Column<int16_t>& col);
349 template NEVER_INLINE HOST std::tuple<int32_t, int32_t, bool> get_column_metadata(
350  const Column<int32_t>& col);
351 template NEVER_INLINE HOST std::tuple<int64_t, int64_t, bool> get_column_metadata(
352  const Column<int64_t>& col);
353 template NEVER_INLINE HOST std::tuple<float, float, bool> get_column_metadata(
354  const Column<float>& col);
355 template NEVER_INLINE HOST std::tuple<double, double, bool> get_column_metadata(
356  const Column<double>& col);
357 
358 std::tuple<int32_t, int32_t, bool> get_column_metadata(
359  const Column<TextEncodingDict>& col) {
360  Column<int32_t> int_alias_col(reinterpret_cast<int32_t*>(col.getPtr()), col.size());
361  return get_column_metadata(int_alias_col);
362 }
363 
364 template <typename T>
365 void z_std_normalize_col(const T* input_data,
366  T* output_data,
367  const int64_t num_rows,
368  const double mean,
369  const double std_dev) {
370  if (std_dev <= 0.0) {
371  throw std::runtime_error("Standard deviation cannot be <= 0");
372  }
373  const double inv_std_dev = 1.0 / std_dev;
374 
375  tbb::parallel_for(tbb::blocked_range<int64_t>(0, num_rows),
376  [&](const tbb::blocked_range<int64_t>& r) {
377  const int64_t start_idx = r.begin();
378  const int64_t end_idx = r.end();
379  for (int64_t row_idx = start_idx; row_idx < end_idx; ++row_idx) {
380  output_data[row_idx] = (input_data[row_idx] - mean) * inv_std_dev;
381  }
382  });
383 }
384 
385 template void z_std_normalize_col(const float* input_data,
386  float* output_data,
387  const int64_t num_rows,
388  const double mean,
389  const double std_dev);
390 template void z_std_normalize_col(const double* input_data,
391  double* output_data,
392  const int64_t num_rows,
393  const double mean,
394  const double std_dev);
395 
396 template <typename T>
397 std::vector<std::vector<T>> z_std_normalize_data(const std::vector<T*>& input_data,
398  const int64_t num_rows) {
399  const int64_t num_features = input_data.size();
400  std::vector<std::vector<T>> normalized_data(num_features);
401  for (int64_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
402  const auto mean = get_column_mean(input_data[feature_idx], num_rows);
403  const auto std_dev = get_column_std_dev(input_data[feature_idx], num_rows, mean);
404  normalized_data[feature_idx].resize(num_rows);
405  z_std_normalize_col(input_data[feature_idx],
406  normalized_data[feature_idx].data(),
407  num_rows,
408  mean,
409  std_dev);
410  }
411  return normalized_data;
412 }
413 
414 template std::vector<std::vector<float>> z_std_normalize_data(
415  const std::vector<float*>& input_data,
416  const int64_t num_rows);
417 template std::vector<std::vector<double>> z_std_normalize_data(
418  const std::vector<double*>& input_data,
419  const int64_t num_rows);
420 
421 template <typename T>
423  const std::vector<T*>& input_data,
424  const int64_t num_rows) {
425  const int64_t num_features = input_data.size();
426  std::vector<std::vector<T>> normalized_data(num_features);
427  std::vector<T> means(num_features);
428  std::vector<T> std_devs(num_features);
429  for (int64_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
430  means[feature_idx] = get_column_mean(input_data[feature_idx], num_rows);
431  std_devs[feature_idx] =
432  get_column_std_dev(input_data[feature_idx], num_rows, means[feature_idx]);
433  normalized_data[feature_idx].resize(num_rows);
434  z_std_normalize_col(input_data[feature_idx],
435  normalized_data[feature_idx].data(),
436  num_rows,
437  means[feature_idx],
438  std_devs[feature_idx]);
439  }
440  return ZStdNormalizationSummaryStats<T>(normalized_data, means, std_devs);
441 }
442 
444  const std::vector<float*>& input_data,
445  const int64_t num_rows);
447  const std::vector<double*>& input_data,
448  const int64_t num_rows);
449 
450 template <typename T1, typename T2>
452 distance_in_meters(const T1 fromlon, const T1 fromlat, const T2 tolon, const T2 tolat) {
453  T1 latitudeArc = (fromlat - tolat) * 0.017453292519943295769236907684886;
454  T1 longitudeArc = (fromlon - tolon) * 0.017453292519943295769236907684886;
455  T1 latitudeH = sin(latitudeArc * 0.5);
456  latitudeH *= latitudeH;
457  T1 lontitudeH = sin(longitudeArc * 0.5);
458  lontitudeH *= lontitudeH;
459  T1 tmp = cos(fromlat * 0.017453292519943295769236907684886) *
460  cos(tolat * 0.017453292519943295769236907684886);
461  return 6372797.560856 * (2.0 * asin(sqrt(latitudeH + tmp * lontitudeH)));
462 }
463 
464 template NEVER_INLINE HOST float distance_in_meters(const float fromlon,
465  const float fromlat,
466  const float tolon,
467  const float tolat);
468 
469 template NEVER_INLINE HOST float distance_in_meters(const float fromlon,
470  const float fromlat,
471  const double tolon,
472  const double tolat);
473 
474 template NEVER_INLINE HOST double distance_in_meters(const double fromlon,
475  const double fromlat,
476  const float tolon,
477  const float tolat);
478 
479 template NEVER_INLINE HOST double distance_in_meters(const double fromlon,
480  const double fromlat,
481  const double tolon,
482  const double tolat);
483 
484 namespace FileUtilities {
485 
486 // Following implementation taken from https://stackoverflow.com/a/65851545
487 
488 std::regex glob_to_regex(const std::string& glob, bool case_sensitive = false) {
489  // Note It is possible to automate checking if filesystem is case sensitive or not (e.g.
490  // by performing a test first time this function is ran)
491  std::string regex_string{glob};
492  // Escape all regex special chars:
493  regex_string = std::regex_replace(regex_string, std::regex("\\\\"), "\\\\");
494  regex_string = std::regex_replace(regex_string, std::regex("\\^"), "\\^");
495  regex_string = std::regex_replace(regex_string, std::regex("\\."), "\\.");
496  regex_string = std::regex_replace(regex_string, std::regex("\\$"), "\\$");
497  regex_string = std::regex_replace(regex_string, std::regex("\\|"), "\\|");
498  regex_string = std::regex_replace(regex_string, std::regex("\\("), "\\(");
499  regex_string = std::regex_replace(regex_string, std::regex("\\)"), "\\)");
500  regex_string = std::regex_replace(regex_string, std::regex("\\{"), "\\{");
501  regex_string = std::regex_replace(regex_string, std::regex("\\{"), "\\}");
502  regex_string = std::regex_replace(regex_string, std::regex("\\["), "\\[");
503  regex_string = std::regex_replace(regex_string, std::regex("\\]"), "\\]");
504  regex_string = std::regex_replace(regex_string, std::regex("\\+"), "\\+");
505  regex_string = std::regex_replace(regex_string, std::regex("\\/"), "\\/");
506  // Convert wildcard specific chars '*?' to their regex equivalents:
507  regex_string = std::regex_replace(regex_string, std::regex("\\?"), ".");
508  regex_string = std::regex_replace(regex_string, std::regex("\\*"), ".*");
509 
510  return std::regex(
511  regex_string,
512  case_sensitive ? std::regex_constants::ECMAScript : std::regex_constants::icase);
513 }
514 
515 std::vector<std::filesystem::path> get_fs_paths(const std::string& file_or_directory) {
516  const std::filesystem::path file_or_directory_path(file_or_directory);
517  const auto file_status = std::filesystem::status(file_or_directory_path);
518 
519  std::vector<std::filesystem::path> fs_paths;
520  if (std::filesystem::is_regular_file(file_status)) {
521  fs_paths.emplace_back(file_or_directory_path);
522  return fs_paths;
523  } else if (std::filesystem::is_directory(file_status)) {
524  for (std::filesystem::directory_entry const& entry :
525  std::filesystem::directory_iterator(file_or_directory_path)) {
526  if (std::filesystem::is_regular_file(std::filesystem::status(entry))) {
527  fs_paths.emplace_back(entry.path());
528  }
529  }
530  return fs_paths;
531  } else {
532  const auto parent_path = file_or_directory_path.parent_path();
533  const auto parent_status = std::filesystem::status(parent_path);
534  if (std::filesystem::is_directory(parent_status)) {
535  const auto file_glob = file_or_directory_path.filename();
536  const std::regex glob_regex{glob_to_regex(file_glob.string(), false)};
537 
538  for (std::filesystem::directory_entry const& entry :
539  std::filesystem::directory_iterator(parent_path)) {
540  if (std::filesystem::is_regular_file(std::filesystem::status(entry))) {
541  const auto entry_filename = entry.path().filename().string();
542  if (std::regex_match(entry_filename, glob_regex)) {
543  fs_paths.emplace_back(entry.path());
544  }
545  }
546  }
547  return fs_paths;
548  }
549  }
550  return fs_paths;
551 }
552 
553 } // namespace FileUtilities
554 
555 template <typename T>
557  const T bounds_val,
558  const BoundsType bounds_type,
559  const IntervalType interval_type) {
560  switch (bounds_type) {
561  case BoundsType::Min:
562  switch (interval_type) {
564  return input >= bounds_val;
566  return input > bounds_val;
567  default:
568  UNREACHABLE();
569  }
570  case BoundsType::Max:
571  switch (interval_type) {
573  return input <= bounds_val;
575  return input < bounds_val;
576  default:
577  UNREACHABLE();
578  }
579  break;
580  default:
581  UNREACHABLE();
582  }
583  UNREACHABLE();
584  return false; // To address compiler warning
585 }
586 
587 template NEVER_INLINE HOST bool is_valid_tf_input(const int32_t input,
588  const int32_t bounds_val,
589  const BoundsType bounds_type,
590  const IntervalType interval_type);
591 
592 template NEVER_INLINE HOST bool is_valid_tf_input(const int64_t input,
593  const int64_t bounds_val,
594  const BoundsType bounds_type,
595  const IntervalType interval_type);
596 
597 template NEVER_INLINE HOST bool is_valid_tf_input(const float input,
598  const float bounds_val,
599  const BoundsType bounds_type,
600  const IntervalType interval_type);
601 
602 template NEVER_INLINE HOST bool is_valid_tf_input(const double input,
603  const double bounds_val,
604  const BoundsType bounds_type,
605  const IntervalType interval_type);
606 
607 #endif // #ifndef __CUDACC__
std::regex glob_to_regex(const std::string &glob, bool case_sensitive=false)
NEVER_INLINE HOST std::pair< T, T > get_column_min_max(const Column< T > &col)
DEVICE int64_t size() const
#define UNREACHABLE()
Definition: Logger.h:338
DEVICE T * getPtr() const
void z_std_normalize_col(const T *input_data, T *output_data, const int64_t num_rows, const double mean, const double std_dev)
std::vector< std::filesystem::path > get_fs_paths(const std::string &file_or_directory)
#define HOST
const size_t max_inputs_per_thread
DEVICE TextEncodingDict * getPtr() const
DEVICE bool isNull(int64_t index) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
EXTENSION_NOINLINE double distance_in_meters(const double fromlon, const double fromlat, const double tolon, const double tolat)
Computes the distance, in meters, between two WGS-84 positions.
#define NEVER_INLINE
NEVER_INLINE HOST std::tuple< T, T, bool > get_column_metadata(const Column< T > &col)
std::vector< std::vector< T > > z_std_normalize_data(const std::vector< T * > &input_data, const int64_t num_rows)
NEVER_INLINE HOST double get_column_std_dev(const Column< T > &col, const double mean)
DEVICE int64_t size() const
std::vector< std::string > glob(const std::string &pattern)
ZStdNormalizationSummaryStats< T > z_std_normalize_data_with_summary_stats(const std::vector< T * > &input_data, const int64_t num_rows)
NEVER_INLINE HOST bool is_valid_tf_input(const T input, const T bounds_val, const BoundsType bounds_type, const IntervalType interval_type)
NEVER_INLINE HOST double get_column_mean(const T *data, const int64_t num_rows)