OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsCommon.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
21 #include <filesystem>
22 #include <mutex>
23 #include <shared_mutex>
24 #include <string>
25 #include <tuple>
26 #include <unordered_map>
27 #include <vector>
28 
30 
31 template <typename T>
32 NEVER_INLINE HOST std::pair<T, T> get_column_min_max(const Column<T>& col);
33 
34 NEVER_INLINE HOST std::pair<int32_t, int32_t> get_column_min_max(
35  const Column<TextEncodingDict>& col);
36 
37 template <typename T>
38 NEVER_INLINE HOST double get_column_mean(const T* data, const int64_t num_rows);
39 
40 template <typename T>
41 NEVER_INLINE HOST double get_column_mean(const Column<T>& col);
42 
43 template <typename T>
44 NEVER_INLINE HOST double get_column_std_dev(const Column<T>& col, const double mean);
45 
46 template <typename T>
47 NEVER_INLINE HOST double get_column_std_dev(const T* data,
48  const int64_t num_rows,
49  const double mean);
50 
51 // Assumes nulls have been removed
52 template <typename T>
53 void z_std_normalize_col(const T* input_data,
54  T* output_data,
55  const int64_t num_rows,
56  const double mean,
57  const double std_dev);
58 
59 // Assumes nulls have been removed
60 template <typename T>
61 std::vector<std::vector<T>> z_std_normalize_data(const std::vector<T*>& input_data,
62  const int64_t num_rows);
63 
64 template <typename T>
65 NEVER_INLINE HOST std::tuple<T, T, bool> get_column_metadata(const Column<T>& col);
66 
67 NEVER_INLINE HOST std::tuple<int32_t, int32_t, bool> get_column_metadata(
68  const Column<TextEncodingDict>& col);
69 
70 template <typename T1, typename T2>
72 distance_in_meters(const T1 fromlon, const T1 fromlat, const T2 tolon, const T2 tolat);
73 
74 inline int64_t x_y_bin_to_bin_index(const int64_t x_bin,
75  const int64_t y_bin,
76  const int64_t num_x_bins) {
77  return y_bin * num_x_bins + x_bin;
78 }
79 
80 inline std::pair<int64_t, int64_t> bin_to_x_y_bin_indexes(const int64_t bin,
81  const int64_t num_x_bins) {
82  return std::make_pair(bin % num_x_bins, bin / num_x_bins);
83 }
84 
85 namespace FileUtilities {
86 std::vector<std::filesystem::path> get_fs_paths(const std::string& file_or_directory);
87 }
88 
89 enum BoundsType { Min, Max };
90 
92 
93 template <typename T>
94 NEVER_INLINE HOST bool is_valid_tf_input(const T input,
95  const T bounds_val,
96  const BoundsType bounds_type,
97  const IntervalType interval_type);
98 
99 #endif //__CUDACC__
NEVER_INLINE HOST std::pair< T, T > get_column_min_max(const Column< T > &col)
void z_std_normalize_col(const T *input_data, T *output_data, const int64_t num_rows, const double mean, const double std_dev)
std::vector< std::filesystem::path > get_fs_paths(const std::string &file_or_directory)
#define HOST
int64_t x_y_bin_to_bin_index(const int64_t x_bin, const int64_t y_bin, const int64_t num_x_bins)
std::pair< int64_t, int64_t > bin_to_x_y_bin_indexes(const int64_t bin, const int64_t num_x_bins)
EXTENSION_NOINLINE double distance_in_meters(const double fromlon, const double fromlat, const double tolon, const double tolat)
Computes the distance, in meters, between two WGS-84 positions.
#define NEVER_INLINE
NEVER_INLINE HOST std::tuple< T, T, bool > get_column_metadata(const Column< T > &col)
std::vector< std::vector< T > > z_std_normalize_data(const std::vector< T * > &input_data, const int64_t num_rows)
NEVER_INLINE HOST double get_column_std_dev(const Column< T > &col, const double mean)
NEVER_INLINE HOST bool is_valid_tf_input(const T input, const T bounds_val, const BoundsType bounds_type, const IntervalType interval_type)
NEVER_INLINE HOST double get_column_mean(const T *data, const int64_t num_rows)