19 #include <boost/noncopyable.hpp>
24 #include <unordered_map>
40 namespace Catalog_Namespace {
53 const size_t executor_id,
54 const size_t num_kernel_threads = 0)
58 VLOG(2) <<
"Prepare " << num_kernel_threads + 1
59 <<
" allocators from RowSetMemoryOwner attached to Executor-" <<
executor_id_;
61 for (
size_t i = 0; i < num_kernel_threads + 1; i++) {
62 allocators_.emplace_back(std::make_unique<DramArena>(arena_block_size));
69 int8_t*
allocate(
const size_t num_bytes,
const size_t thread_idx = 0)
override {
73 return reinterpret_cast<int8_t*
>(allocator->allocate(num_bytes));
77 const size_t thread_idx) {
86 int64_t* group_by_buffer =
reinterpret_cast<int64_t*
>(allocator->allocate(num_bytes));
87 CHECK(group_by_buffer);
90 return std::make_pair(group_by_buffer,
false);
94 const size_t thread_idx = 0) {
95 int8_t* buffer =
allocate(num_bytes, thread_idx);
96 std::memset(buffer, 0, num_bytes);
103 const bool physical_buffer) {
143 std::vector<int64_t>*
addArray(
const std::vector<int64_t>& arr) {
151 const int64_t generation) {
155 CHECK_EQ(it->second->getDictionary(), str_dict.get());
156 it->second->updateGeneration(generation);
157 return it->second.get();
162 std::make_shared<StringDictionaryProxy>(str_dict, dict_key, generation))
164 return it->second.get();
169 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
170 std::ostringstream oss;
171 oss <<
"{source_dict_key: " << source_proxy_dict_key
172 <<
" StringOps: " << string_op_infos <<
"}";
179 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
180 std::ostringstream oss;
181 oss <<
"{source_dict_key: " << source_proxy_dict_key
182 <<
", dest_dict_key: " << dest_proxy_dict_key <<
" StringOps: " << string_op_infos
190 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
201 dest_proxy, string_op_infos))
209 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
213 if (it->first != map_key) {
223 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
234 dest_proxy, string_op_infos))
241 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
248 std::make_shared<StringOps_Namespace::StringOps>(string_op_infos))
251 return it->second.get();
258 return it->second.get();
262 const bool with_generation);
265 std::shared_ptr<StringDictionaryProxy> lit_str_dict_proxy) {
278 const bool with_generation,
280 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos);
285 const bool with_generation,
286 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos);
294 std::ostringstream oss;
295 oss <<
"Destruct RowSetMemoryOwner attached to Executor-" <<
executor_id_ <<
"{\t";
296 int allocator_id = 0;
298 auto const usedBytes = allocator->bytesUsed();
300 oss <<
"allocator-" << allocator_id <<
", byteUsed: " << usedBytes <<
"/"
301 << allocator->totalBytes() <<
"\t";
306 VLOG(2) << oss.str();
308 delete count_distinct_set;
314 CHECK(varlen_input_buffer);
315 varlen_input_buffer->unPin();
323 auto rtn = std::make_shared<RowSetMemoryOwner>(
345 const uint8_t* raw_data,
346 const size_t num_bytes,
349 std::memcpy(metadata_value.first.data(), raw_data, num_bytes);
355 const uint8_t*& raw_data,
361 throw std::runtime_error(
"Failed to find Table Function Metadata with key '" +
362 std::string(key) +
"'");
364 raw_data = itr->second.first.data();
365 num_bytes = itr->second.first.size();
366 value_type = itr->second.second;
387 std::unordered_map<shared::StringDictKey, std::shared_ptr<StringDictionaryProxy>>
389 std::map<std::string, StringDictionaryProxy::IdMap>
391 std::map<std::string, StringDictionaryProxy::IdMap>
393 std::map<std::string, StringDictionaryProxy::TranslationMap<Datum>>
400 std::map<std::string, std::shared_ptr<StringOps_Namespace::StringOps>>
std::shared_ptr< RowSetMemoryOwner > cloneStrDictDataOnly()
robin_hood::unordered_set< int64_t > CountDistinctSet
std::vector< std::unique_ptr< Arena > > allocators_
int8_t * allocateCountDistinctBuffer(const size_t num_bytes, const size_t thread_idx=0)
std::list< std::vector< int64_t > > arrays_
void addVarlenInputBuffer(Data_Namespace::AbstractBuffer *buffer)
const shared::StringDictKey & getDictKey() const noexcept
const StringDictionaryProxy::IdMap * getOrAddStringProxyTranslationMap(const shared::StringDictKey &source_dict_id_in, const shared::StringDictKey &dest_dict_id_in, const bool with_generation, const StringTranslationType translation_map_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
const bool physical_buffer
void addLiteralStringDictProxy(std::shared_ptr< StringDictionaryProxy > lit_str_dict_proxy)
RowSetMemoryOwner(const size_t arena_block_size, const size_t executor_id, const size_t num_kernel_threads=0)
std::map< std::string, StringDictionaryProxy::TranslationMap< Datum > > str_proxy_numeric_translation_maps_owned_
const StringDictionaryProxy::TranslationMap< Datum > * addStringProxyNumericTranslationMap(const StringDictionaryProxy *source_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
std::list< std::string > strings_
Calculate approximate median and general quantiles, based on "Computing Extremely Accurate Quantiles ...
void addCountDistinctBuffer(int8_t *count_distinct_buffer, const size_t bytes, const bool physical_buffer)
virtual MemoryLevel getType() const =0
std::vector< int64_t > * addArray(const std::vector< int64_t > &arr)
int8_t * allocate(const size_t num_bytes, const size_t thread_idx=0) override
std::vector< CountDistinctSet * > count_distinct_sets_
StringDictionary * getDictionary() const noexcept
std::pair< std::vector< uint8_t >, TableFunctionMetadataType > MetadataValue
quantile::TDigest * nullTDigest(double const q)
StringDictionaryGenerations & getStringDictionaryGenerations()
StringDictionaryProxy * getStringDictProxy(const shared::StringDictKey &dict_key) const
StringDictionaryProxy * getLiteralStringDictProxy() const
std::mutex table_function_metadata_store_mutex_
std::pair< int64_t *, bool > allocateCachedGroupByBuffer(const size_t num_bytes, const size_t thread_idx)
TranslationMap< Datum > buildNumericTranslationMap(const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
Builds a vectorized string_id translation map from this proxy to dest_proxy.
std::map< std::string, MetadataValue > table_function_metadata_store_
std::list< AggMode > mode_maps_
StringDictionaryProxy * addStringDict(std::shared_ptr< StringDictionary > str_dict, const shared::StringDictKey &dict_key, const int64_t generation)
std::map< std::string, StringDictionaryProxy::IdMap > str_proxy_intersection_translation_maps_owned_
void setDictionaryGenerations(StringDictionaryGenerations generations)
const StringDictionaryProxy::TranslationMap< Datum > * getOrAddStringProxyNumericTranslationMap(const shared::StringDictKey &source_dict_id_in, const bool with_generation, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
std::vector< CountDistinctBitmapBuffer > count_distinct_bitmaps_
std::vector< void * > col_buffers_
std::map< std::string, StringDictionaryProxy::IdMap > str_proxy_union_translation_maps_owned_
std::map< std::string, std::shared_ptr< StringOps_Namespace::StringOps > > string_ops_owned_
std::shared_ptr< StringDictionaryProxy > lit_str_dict_proxy_
Calculate statistical mode as an aggregate function.
std::vector< void * > varlen_buffers_
std::string generate_translation_map_key(const shared::StringDictKey &source_proxy_dict_key, const shared::StringDictKey &dest_proxy_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
std::unordered_map< shared::StringDictKey, std::shared_ptr< StringDictionaryProxy > > str_dict_proxy_owned_
std::vector< Data_Namespace::AbstractBuffer * > varlen_input_buffers_
An AbstractBuffer is a unit of data management for a data manager.
StringDictionaryGenerations string_dictionary_generations_
IdMap buildUnionTranslationMapToOtherProxy(StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
const StringOps_Namespace::StringOps * getStringOps(const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
void setTableFunctionMetadata(const char *key, const uint8_t *raw_data, const size_t num_bytes, const TableFunctionMetadataType value_type)
void addVarlenBuffer(void *varlen_buffer)
std::vector< std::unique_ptr< quantile::TDigest > > t_digests_
Functions used to work with (approximate) count distinct sets.
void addCountDistinctSet(CountDistinctSet *count_distinct_set)
void clearNonOwnedGroupByBuffers()
void getTableFunctionMetadata(const char *key, const uint8_t *&raw_data, size_t &num_bytes, TableFunctionMetadataType &value_type) const
std::vector< int64_t * > non_owned_group_by_buffers_
const StringDictionaryProxy::IdMap * addStringProxyUnionTranslationMap(const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
IdMap buildIntersectionTranslationMapToOtherProxy(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
std::string * addString(const std::string &str)
const StringDictionaryProxy::IdMap * addStringProxyIntersectionTranslationMap(const StringDictionaryProxy *source_proxy, const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
void addColBuffer(const void *col_buffer)
std::string generate_translation_map_key(const shared::StringDictKey &source_proxy_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
StringDictionaryProxy * getOrAddStringDictProxy(const shared::StringDictKey &dict_key, const bool with_generation)