OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsCommon.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmnSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
21 #include <filesystem>
22 #include <mutex>
23 #include <shared_mutex>
24 #include <string>
25 #include <unordered_map>
26 #include <vector>
27 
29 
30 template <typename T>
31 TEMPLATE_NOINLINE std::pair<T, T> get_column_min_max(const Column<T>& col);
32 
33 TEMPLATE_NOINLINE std::pair<int32_t, int32_t> get_column_min_max(
34  const Column<TextEncodingDict>& col);
35 
36 template <typename T1, typename T2>
38 distance_in_meters(const T1 fromlon, const T1 fromlat, const T2 tolon, const T2 tolat);
39 
40 inline int64_t x_y_bin_to_bin_index(const int64_t x_bin,
41  const int64_t y_bin,
42  const int64_t num_x_bins) {
43  return y_bin * num_x_bins + x_bin;
44 }
45 
46 inline std::pair<int64_t, int64_t> bin_to_x_y_bin_indexes(const int64_t bin,
47  const int64_t num_x_bins) {
48  return std::make_pair(bin % num_x_bins, bin / num_x_bins);
49 }
50 
51 struct CacheDataTf {
52  int8_t* data_buffer;
53  size_t num_bytes;
54 
55  CacheDataTf(const size_t num_bytes) : num_bytes(num_bytes) {
56  data_buffer = new int8_t[num_bytes];
57  }
58 
59  ~CacheDataTf() { delete[] data_buffer; }
60 };
61 
63  public:
64  bool isKeyCached(const std::string& key) const;
65 
66  bool isKeyCachedAndSameLength(const std::string& key, const size_t num_bytes) const;
67 
68  // Assumes dest_buffer is already appropriately sized
69  template <typename T>
70  void getDataForKey(const std::string& key, T* dest_buffer) const;
71 
72  template <typename T>
73  const T& getDataRefForKey(const std::string& key) const;
74 
75  template <typename T>
76  const T* getDataPtrForKey(const std::string& key) const;
77 
78  template <typename T>
79  void putDataForKey(const std::string& key,
80  T* const data_buffer,
81  const size_t num_elements);
82 
83  private:
84  const size_t parallel_copy_min_bytes{1 << 20};
85 
86  void copyData(int8_t* dest, const int8_t* source, const size_t num_bytes) const;
87 
88  std::unordered_map<std::string, std::shared_ptr<CacheDataTf>> data_cache_;
89  mutable std::shared_mutex cache_mutex_;
90 };
91 
92 template <typename T>
93 class DataCache {
94  public:
95  bool isKeyCached(const std::string& key) const;
96 
97  std::shared_ptr<T> getDataForKey(const std::string& key) const;
98 
99  void putDataForKey(const std::string& key, std::shared_ptr<T> const data);
100 
101  private:
102  std::unordered_map<std::string, std::shared_ptr<T>> data_cache_;
103  mutable std::shared_mutex cache_mutex_;
104 };
105 
106 namespace FileUtilities {
107 std::vector<std::filesystem::path> get_fs_paths(const std::string& file_or_directory);
108 }
109 
110 enum BoundsType { Min, Max };
111 
113 
114 template <typename T>
115 bool is_valid_tf_input(const T input,
116  const T bounds_val,
117  const BoundsType bounds_type,
118  const IntervalType interval_type);
119 
120 #include "TableFunctionsCommon.cpp"
121 
122 #endif //__CUDACC__
bool isKeyCachedAndSameLength(const std::string &key, const size_t num_bytes) const
bool isKeyCached(const std::string &key) const
#define TEMPLATE_NOINLINE
Definition: OmniSciTypes.h:36
TEMPLATE_NOINLINE std::pair< T, T > get_column_min_max(const Column< T > &col)
std::vector< std::filesystem::path > get_fs_paths(const std::string &file_or_directory)
std::unordered_map< std::string, std::shared_ptr< T > > data_cache_
const T * getDataPtrForKey(const std::string &key) const
void putDataForKey(const std::string &key, T *const data_buffer, const size_t num_elements)
std::shared_ptr< T > getDataForKey(const std::string &key) const
bool isKeyCached(const std::string &key) const
bool is_valid_tf_input(const T input, const T bounds_val, const BoundsType bounds_type, const IntervalType interval_type)
void putDataForKey(const std::string &key, std::shared_ptr< T > const data)
void getDataForKey(const std::string &key, T *dest_buffer) const
EXTENSION_NOINLINE double distance_in_meters(const double fromlon, const double fromlat, const double tolon, const double tolat)
Computes the distance, in meters, between two WGS-84 positions.
std::shared_mutex cache_mutex_
CacheDataTf(const size_t num_bytes)
int64_t x_y_bin_to_bin_index(const int64_t x_bin, const int64_t y_bin, const int64_t num_x_bins)
const T & getDataRefForKey(const std::string &key) const
std::pair< int64_t, int64_t > bin_to_x_y_bin_indexes(const int64_t bin, const int64_t num_x_bins)