OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineJoinHashTable Class Reference

#include <BaselineJoinHashTable.h>

+ Inheritance diagram for BaselineJoinHashTable:
+ Collaboration diagram for BaselineJoinHashTable:

Classes

struct  ColumnsForDevice
 
struct  CompositeKeyInfo
 
struct  HashTableCacheKey
 
struct  HashTableCacheValue
 

Public Member Functions

int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
std::string toString (const ExecutorDeviceType device_type, const int device_id, bool raw=false) const noexceptoverride
 
std::set
< DecodedJoinHashBufferEntry
decodeJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
JoinHashTableInterface::HashType getHashType () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
virtual ~BaselineJoinHashTable ()
 
- Public Member Functions inherited from JoinHashTableInterface
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const noexcept
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const noexcept
 

Static Public Member Functions

static std::shared_ptr
< BaselineJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr
< BaselineJoinHashTable
getSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static size_t getShardCountForCondition (const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
static auto yieldCacheInvalidator () -> std::function< void()>
 

Protected Types

typedef std::pair< const
int8_t *, size_t > 
LinearizedColumn
 
typedef std::pair< int, int > LinearizedColumnCacheKey
 

Protected Member Functions

 BaselineJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
virtual void reifyWithLayout (const int device_count, const JoinHashTableInterface::HashType layout)
 
virtual ColumnsForDevice fetchColumnsForDevice (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< ColumnsForDevice > &) const
 
virtual size_t getKeyComponentWidth () const
 
virtual size_t getKeyComponentCount () const
 
virtual int initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
 
virtual int initHashTableOnGpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
 
virtual llvm::Value * codegenKey (const CompilationOptions &)
 
std::pair< const int8_t *, size_t > getAllColumnFragments (const Analyzer::ColumnVar &hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner)
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
CompositeKeyInfo getCompositeKeyInfo () const
 
void reify (const int device_count)
 
JoinColumn fetchColumn (const Analyzer::ColumnVar *inner_col, const Data_Namespace::MemoryLevel &effective_memory_level, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, const int device_id)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id)
 
void checkHashJoinReplicationConstraint (const int table_id) const
 
int initHashTableForDevice (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
llvm::Value * hashPtr (const size_t index)
 
void initHashTableOnCpuFromCache (const HashTableCacheKey &)
 
void putHashTableOnCpuToCache (const HashTableCacheKey &)
 
std::pair< ssize_t, size_t > getApproximateTupleCountFromCache (const HashTableCacheKey &) const
 
bool isBitwiseEq () const
 
void freeHashBufferMemory ()
 
void freeHashBufferGpuMemory ()
 
void freeHashBufferCpuMemory ()
 
const HashTableCacheValuefindHashTableOnCpuInCache (const HashTableCacheKey &)
 

Static Protected Member Functions

static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
JoinHashTableInterface::HashType layout_
 
size_t entry_count_
 
size_t emitted_keys_count_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::shared_ptr< std::vector
< int8_t > > 
cpu_hash_table_buff_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::map
< LinearizedColumnCacheKey,
LinearizedColumn
linearized_multifrag_columns_
 
std::mutex linearized_multifrag_column_mutex_
 
RowSetMemoryOwner linearized_multifrag_column_owner_
 
std::vector< InnerOuterinner_outer_pairs_
 
const Catalog_Namespace::Catalogcatalog_
 

Static Protected Attributes

static std::vector< std::pair
< HashTableCacheKey,
HashTableCacheValue > > 
hash_table_cache_
 
static std::mutex hash_table_cache_mutex_
 
static const int ERR_FAILED_TO_FETCH_COLUMN {-3}
 
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
 

Private Member Functions

size_t getComponentBufferSize () const noexcept
 

Additional Inherited Members

- Public Types inherited from JoinHashTableInterface
enum  HashType { HashType::OneToOne, HashType::OneToMany }
 

Detailed Description

Definition at line 43 of file BaselineJoinHashTable.h.

Member Typedef Documentation

typedef std::pair<const int8_t*, size_t> BaselineJoinHashTable::LinearizedColumn
protected

Definition at line 264 of file BaselineJoinHashTable.h.

typedef std::pair<int, int> BaselineJoinHashTable::LinearizedColumnCacheKey
protected

Definition at line 265 of file BaselineJoinHashTable.h.

Constructor & Destructor Documentation

virtual BaselineJoinHashTable::~BaselineJoinHashTable ( )
inlinevirtual

Definition at line 110 of file BaselineJoinHashTable.h.

110 {}
BaselineJoinHashTable::BaselineJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const size_t  entry_count,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
protected

Definition at line 146 of file BaselineJoinHashTable.cpp.

Referenced by getInstance().

155  : condition_(condition)
156  , query_infos_(query_infos)
157  , memory_level_(memory_level)
158  , layout_(preferred_hash_type)
159  , entry_count_(entry_count)
161  , executor_(executor)
162  , column_cache_(column_cache)
163  , inner_outer_pairs_(inner_outer_pairs)
164  , catalog_(executor->getCatalog())
165 #ifdef HAVE_CUDA
166  , block_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
167  ? executor->blockSize()
168  : 0)
169  , grid_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
170  ? executor->gridSize()
171  : 0) {
172 }
173 #else
174 {
175 }
JoinHashTableInterface::HashType layout_
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

Member Function Documentation

std::pair< size_t, size_t > BaselineJoinHashTable::approximateTupleCount ( const std::vector< ColumnsForDevice > &  columns_per_device) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 413 of file BaselineJoinHashTable.cpp.

References ThrustAllocator::allocateScopedBuffer(), approximate_distinct_tuples(), approximate_distinct_tuples_on_device(), Bitmap, catalog_, CHECK(), CHECK_EQ, condition_, copy_from_gpu(), CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), getApproximateTupleCountFromCache(), getCompositeKeyInfo(), Catalog_Namespace::Catalog::getDataMgr(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), inner_outer_pairs_, transfer_object_to_gpu(), transfer_pod_vector_to_gpu(), and UNREACHABLE.

Referenced by reifyWithLayout().

414  {
415  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
416  CountDistinctDescriptor count_distinct_desc{
418  0,
419  11,
420  true,
421  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
424  1};
425  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
426 
427  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
428 
429  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
430  const auto composite_key_info = getCompositeKeyInfo();
431  HashTableCacheKey cache_key{columns_per_device.front().join_columns.front().num_elems,
432  composite_key_info.cache_key_chunks,
433  condition_->get_optype()};
434  const auto cached_count_info = getApproximateTupleCountFromCache(cache_key);
435  if (cached_count_info.first >= 0) {
436  return std::make_pair(cached_count_info.first, cached_count_info.second);
437  }
438  int thread_count = cpu_threads();
439  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
440  auto hll_result = &hll_buffer_all_cpus[0];
441 
442  approximate_distinct_tuples(hll_result,
443  count_distinct_desc.bitmap_sz_bits,
444  padded_size_bytes,
445  columns_per_device.front().join_columns,
446  columns_per_device.front().join_column_types,
447  thread_count);
448  for (int i = 1; i < thread_count; ++i) {
449  hll_unify(hll_result,
450  hll_result + i * padded_size_bytes,
451  1 << count_distinct_desc.bitmap_sz_bits);
452  }
453  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
454  }
455 #ifdef HAVE_CUDA
456  const int device_count = columns_per_device.size();
457  auto& data_mgr = catalog_->getDataMgr();
458  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count);
459  for (auto& host_hll_buffer : host_hll_buffers) {
460  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
461  }
462  std::vector<std::future<void>> approximate_distinct_device_threads;
463  for (int device_id = 0; device_id < device_count; ++device_id) {
464  approximate_distinct_device_threads.emplace_back(std::async(
465  std::launch::async,
466  [device_id,
467  &columns_per_device,
468  &count_distinct_desc,
469  &data_mgr,
470  &host_hll_buffers,
471  this] {
472  ThrustAllocator allocator(&data_mgr, device_id);
473  auto device_hll_buffer =
474  allocator.allocateScopedBuffer(count_distinct_desc.bitmapPaddedSizeBytes());
475  data_mgr.getCudaMgr()->zeroDeviceMem(
476  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
477  const auto& columns_for_device = columns_per_device[device_id];
478  auto join_columns_gpu =
479  transfer_pod_vector_to_gpu(columns_for_device.join_columns, allocator);
480  auto join_column_types_gpu =
481  transfer_pod_vector_to_gpu(columns_for_device.join_column_types, allocator);
482  const auto key_handler =
483  GenericKeyHandler(columns_for_device.join_columns.size(),
484  true,
485  join_columns_gpu,
486  join_column_types_gpu,
487  nullptr,
488  nullptr);
489  const auto key_handler_gpu = transfer_object_to_gpu(key_handler, allocator);
491  reinterpret_cast<uint8_t*>(device_hll_buffer),
492  count_distinct_desc.bitmap_sz_bits,
493  key_handler_gpu,
494  columns_for_device.join_columns[0].num_elems,
495  block_size_,
496  grid_size_);
497 
498  auto& host_hll_buffer = host_hll_buffers[device_id];
499  copy_from_gpu(&data_mgr,
500  &host_hll_buffer[0],
501  reinterpret_cast<CUdeviceptr>(device_hll_buffer),
502  count_distinct_desc.bitmapPaddedSizeBytes(),
503  device_id);
504  }));
505  }
506  for (auto& child : approximate_distinct_device_threads) {
507  child.get();
508  }
509  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
510  auto& result_hll_buffer = host_hll_buffers.front();
511  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
512  for (int device_id = 1; device_id < device_count; ++device_id) {
513  auto& host_hll_buffer = host_hll_buffers[device_id];
514  hll_unify(hll_result,
515  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
516  1 << count_distinct_desc.bitmap_sz_bits);
517  }
518  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
519 #else
520  UNREACHABLE();
521  return {0, 0};
522 #endif // HAVE_CUDA
523 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:234
std::pair< ssize_t, size_t > getApproximateTupleCountFromCache(const HashTableCacheKey &) const
T * transfer_pod_vector_to_gpu(const std::vector< T > &vec, ThrustAllocator &allocator)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
std::vector< InnerOuter > inner_outer_pairs_
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
T * transfer_object_to_gpu(const T &object, ThrustAllocator &allocator)
CHECK(cgen_state)
CompositeKeyInfo getCompositeKeyInfo() const
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
int cpu_threads()
Definition: thread_count.h:25
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::checkHashJoinReplicationConstraint ( const int  table_id) const
protected

Definition at line 1253 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK(), g_cluster, Catalog_Namespace::Catalog::getMetadataForTable(), shardCount(), and table_is_replicated().

1253  {
1254  if (!g_cluster) {
1255  return;
1256  }
1257  if (table_id >= 0) {
1258  const auto inner_td = catalog_->getMetadataForTable(table_id);
1259  CHECK(inner_td);
1260  const auto shard_count = shardCount();
1261  if (!shard_count && !table_is_replicated(inner_td)) {
1262  throw TableMustBeReplicated(inner_td->tableName);
1263  }
1264  }
1265 }
bool g_cluster
CHECK(cgen_state)
bool table_is_replicated(const TableDescriptor *td)
const Catalog_Namespace::Catalog * catalog_
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::codegenKey ( const CompilationOptions co)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 1182 of file BaselineJoinHashTable.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenMatchingSet(), and codegenSlot().

1182  {
1183  const auto key_component_width = getKeyComponentWidth();
1184  CHECK(key_component_width == 4 || key_component_width == 8);
1185  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1186  llvm::Value* key_buff_lv{nullptr};
1187  switch (key_component_width) {
1188  case 4:
1189  key_buff_lv =
1190  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1191  break;
1192  case 8:
1193  key_buff_lv =
1194  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1195  break;
1196  default:
1197  CHECK(false);
1198  }
1199 
1200  CodeGenerator code_generator(executor_);
1201  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
1202  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
1203  const auto& inner_outer_pair = inner_outer_pairs_[i];
1204  const auto outer_col = inner_outer_pair.second;
1205  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1206  CHECK_EQ(size_t(1), col_lvs.size());
1207  const auto col_lv = LL_BUILDER.CreateSExt(
1208  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
1209  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1210  }
1211  return key_buff_lv;
1212 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::vector< InnerOuter > inner_outer_pairs_
CHECK(cgen_state)
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet BaselineJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1119 of file BaselineJoinHashTable.cpp.

References CHECK(), JoinHashTable::codegenHashTableLoad(), codegenKey(), JoinHashTable::codegenMatchingSet(), entry_count_, executor_, get_int_type(), getComponentBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), layout_, LL_BUILDER, LL_CONTEXT, LL_INT, offsetBufferOff(), JoinHashTableInterface::OneToMany, and to_string().

1121  {
1122  const auto key_component_width = getKeyComponentWidth();
1123  CHECK(key_component_width == 4 || key_component_width == 8);
1124  auto key_buff_lv = codegenKey(co);
1126  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1127  const auto composite_dict_ptr_type =
1128  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1129  const auto composite_key_dict =
1130  hash_ptr->getType()->isPointerTy()
1131  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1132  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1133  const auto key_component_count = getKeyComponentCount();
1134  const auto key = executor_->cgen_state_->emitExternalCall(
1135  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1136  get_int_type(64, LL_CONTEXT),
1137  {key_buff_lv,
1138  LL_INT(key_component_count),
1139  composite_key_dict,
1140  LL_INT(entry_count_)});
1141  auto one_to_many_ptr = hash_ptr;
1142  if (one_to_many_ptr->getType()->isPointerTy()) {
1143  one_to_many_ptr =
1144  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1145  } else {
1146  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1147  }
1148  const auto composite_key_dict_size = offsetBufferOff();
1149  one_to_many_ptr =
1150  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1152  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(entry_count_ - 1)},
1153  false,
1154  false,
1155  false,
1157  executor_);
1158 }
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
CHECK(cgen_state)
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1103 of file BaselineJoinHashTable.cpp.

References CHECK(), codegenKey(), entry_count_, executor_, get_int_type(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), hashPtr(), LL_BUILDER, LL_CONTEXT, LL_INT, JoinHashTableInterface::OneToOne, and to_string().

1104  {
1106  const auto key_component_width = getKeyComponentWidth();
1107  CHECK(key_component_width == 4 || key_component_width == 8);
1108  auto key_buff_lv = codegenKey(co);
1109  const auto hash_ptr = hashPtr(index);
1110  const auto key_ptr_lv =
1111  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
1112  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1113  return executor_->cgen_state_->emitExternalCall(
1114  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
1115  get_int_type(64, LL_CONTEXT),
1116  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(entry_count_)});
1117 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
CHECK(cgen_state)
JoinHashTableInterface::HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
llvm::Value * hashPtr(const size_t index)
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1168 of file BaselineJoinHashTable.cpp.

References CHECK(), getComponentBufferSize(), layout_, offsetBufferOff(), and JoinHashTableInterface::OneToMany.

Referenced by payloadBufferOff().

1168  {
1171 }
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > BaselineJoinHashTable::decodeJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 267 of file BaselineJoinHashTable.cpp.

References catalog_(), copy_from_gpu(), decodeJoinHashBuffer(), and GPU.

269  {
270  auto buffer = getJoinHashBuffer(device_type, device_id);
271  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
272 #ifdef HAVE_CUDA
273  std::unique_ptr<int8_t[]> buffer_copy;
274  if (device_type == ExecutorDeviceType::GPU) {
275  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
276 
278  buffer_copy.get(),
279  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
280  buffer_size,
281  device_id);
282  }
283  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
284 #else
285  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
286 #endif // HAVE_CUDA
287  auto ptr2 = ptr1 + offsetBufferOff();
288  auto ptr3 = ptr1 + countBufferOff();
289  auto ptr4 = ptr1 + payloadBufferOff();
292  ptr1,
293  ptr2,
294  ptr3,
295  ptr4,
296  buffer_size);
297 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
std::set< DecodedJoinHashBufferEntry > decodeJoinHashBuffer(size_t key_component_count, size_t key_component_width, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

JoinColumn BaselineJoinHashTable::fetchColumn ( const Analyzer::ColumnVar inner_col,
const Data_Namespace::MemoryLevel effective_memory_level,
const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks_owner,
const int  device_id 
)
protected

Definition at line 525 of file BaselineJoinHashTable.cpp.

References ThrustAllocator::allocate(), catalog_, CHECK(), CHECK_NE, column_cache_, copy_to_gpu(), executor_, SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), Analyzer::Expr::get_type_info(), getAllColumnFragments(), Catalog_Namespace::Catalog::getDataMgr(), ColumnFetcher::getOneColumnFragment(), and Data_Namespace::GPU_LEVEL.

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), and fetchColumnsForDevice().

530  {
531  static std::mutex fragment_fetch_mutex;
532  const bool has_multi_frag = fragments.size() > 1;
533  const auto& first_frag = fragments.front();
534 
535  const int8_t* col_buff = nullptr;
536  size_t elem_count = 0;
537  const size_t elem_width = inner_col->get_type_info().get_size();
538  auto& data_mgr = catalog_->getDataMgr();
539  ThrustAllocator dev_buff_owner(&data_mgr, device_id);
540  if (has_multi_frag) {
541  try {
542  std::tie(col_buff, elem_count) =
543  getAllColumnFragments(*inner_col, fragments, chunks_owner);
544  } catch (...) {
545  throw FailedToFetchColumn();
546  }
547  }
548  {
549  std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
550  if (has_multi_frag) {
551  if (effective_memory_level == Data_Namespace::GPU_LEVEL) {
552  CHECK(col_buff != nullptr);
553  CHECK_NE(elem_count, size_t(0));
554  int8_t* dev_col_buff = nullptr;
555  dev_col_buff = dev_buff_owner.allocate(elem_count * elem_width);
556  copy_to_gpu(&data_mgr,
557  reinterpret_cast<CUdeviceptr>(dev_col_buff),
558  col_buff,
559  elem_count * elem_width,
560  device_id);
561  col_buff = dev_col_buff;
562  }
563  } else {
564  try {
565  std::tie(col_buff, elem_count) =
567  *inner_col,
568  first_frag,
569  effective_memory_level,
570  device_id,
571  chunks_owner,
572  column_cache_);
573  } catch (...) {
574  throw FailedToFetchColumn();
575  }
576  }
577  }
578  return {col_buff, elem_count};
579 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
std::pair< const int8_t *, size_t > getAllColumnFragments(const Analyzer::ColumnVar &hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner)
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
#define CHECK_NE(x, y)
Definition: Logger.h:199
ColumnCacheMap & column_cache_
static std::pair< const int8_t *, size_t > getOneColumnFragment(Executor *executor, const Analyzer::ColumnVar &hash_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const Catalog_Namespace::Catalog * catalog_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

BaselineJoinHashTable::ColumnsForDevice BaselineJoinHashTable::fetchColumnsForDevice ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 581 of file BaselineJoinHashTable.cpp.

References catalog_, fetchColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reifyWithLayout().

583  {
584  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
585 
586  std::vector<JoinColumn> join_columns;
587  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
588  std::vector<JoinColumnTypeInfo> join_column_types;
589  std::vector<JoinBucketInfo> join_bucket_info;
590  for (const auto& inner_outer_pair : inner_outer_pairs_) {
591  const auto inner_col = inner_outer_pair.first;
592  const auto inner_cd = get_column_descriptor_maybe(
593  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
594  if (inner_cd && inner_cd->isVirtualCol) {
596  }
597  join_columns.emplace_back(fetchColumn(
598  inner_col, effective_memory_level, fragments, chunks_owner, device_id));
599  const auto& ti = inner_col->get_type_info();
600  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
601  0,
602  0,
604  isBitwiseEq(),
605  0,
607  }
608  return {join_columns, join_column_types, chunks_owner, join_bucket_info};
609 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< InnerOuter > inner_outer_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:171
JoinColumn fetchColumn(const Analyzer::ColumnVar *inner_col, const Data_Namespace::MemoryLevel &effective_memory_level, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, const int device_id)
const Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const BaselineJoinHashTable::HashTableCacheValue * BaselineJoinHashTable::findHashTableOnCpuInCache ( const HashTableCacheKey key)
protected

Definition at line 1268 of file BaselineJoinHashTable.cpp.

References hash_table_cache_, and hash_table_cache_mutex_.

1268  {
1269  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1270  for (const auto& kv : hash_table_cache_) {
1271  if (kv.first == key) {
1272  return &kv.second;
1273  }
1274  }
1275  return nullptr;
1276 }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
void BaselineJoinHashTable::freeHashBufferCpuMemory ( )
protected

Definition at line 1340 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_.

Referenced by freeHashBufferMemory().

1340  {
1341  cpu_hash_table_buff_.reset();
1342 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_

+ Here is the caller graph for this function:

void BaselineJoinHashTable::freeHashBufferGpuMemory ( )
protected

Definition at line 1326 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK(), CudaAllocator::freeGpuAbstractBuffer(), and Catalog_Namespace::Catalog::getDataMgr().

Referenced by freeHashBufferMemory().

1326  {
1327 #ifdef HAVE_CUDA
1328  auto& data_mgr = catalog_->getDataMgr();
1329  for (auto& buf : gpu_hash_table_buff_) {
1330  if (buf) {
1331  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1332  buf = nullptr;
1333  }
1334  }
1335 #else
1336  CHECK(false);
1337 #endif // HAVE_CUDA
1338 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
static void freeGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, Data_Namespace::AbstractBuffer *ab)
const Catalog_Namespace::Catalog * catalog_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::freeHashBufferMemory ( )
protected

Definition at line 1319 of file BaselineJoinHashTable.cpp.

References freeHashBufferCpuMemory(), and freeHashBufferGpuMemory().

Referenced by reify().

1319  {
1320 #ifdef HAVE_CUDA
1322 #endif
1324 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const int8_t *, size_t > BaselineJoinHashTable::getAllColumnFragments ( const Analyzer::ColumnVar hash_col,
const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks_owner 
)
protected

Definition at line 635 of file BaselineJoinHashTable.cpp.

References RowSetMemoryOwner::addColBuffer(), CHECK(), column_cache_, executor_, Analyzer::ColumnVar::get_column_id(), Analyzer::ColumnVar::get_table_id(), ColumnFetcher::getAllColumnFragments(), linearized_multifrag_column_mutex_, linearized_multifrag_column_owner_, linearized_multifrag_columns_, and shardCount().

Referenced by fetchColumn().

638  {
639  std::lock_guard<std::mutex> linearized_multifrag_column_lock(
641  auto linearized_column_cache_key =
642  std::make_pair(hash_col.get_table_id(), hash_col.get_column_id());
643  const auto cache_it = linearized_multifrag_columns_.find(linearized_column_cache_key);
644  if (cache_it != linearized_multifrag_columns_.end()) {
645  return cache_it->second;
646  }
647  const int8_t* col_buff;
648  size_t total_elem_count;
649  std::tie(col_buff, total_elem_count) = ColumnFetcher::getAllColumnFragments(
650  executor_, hash_col, fragments, chunks_owner, column_cache_);
652  const auto shard_count = shardCount();
653  if (!shard_count) {
654  const auto it_ok = linearized_multifrag_columns_.emplace(
655  linearized_column_cache_key, LinearizedColumn{col_buff, total_elem_count});
656  CHECK(it_ok.second);
657  }
658  return {col_buff, total_elem_count};
659 }
int get_table_id() const
Definition: Analyzer.h:194
std::pair< const int8_t *, size_t > LinearizedColumn
RowSetMemoryOwner linearized_multifrag_column_owner_
CHECK(cgen_state)
ColumnCacheMap & column_cache_
std::map< LinearizedColumnCacheKey, LinearizedColumn > linearized_multifrag_columns_
std::mutex linearized_multifrag_column_mutex_
static std::pair< const int8_t *, size_t > getAllColumnFragments(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ColumnCacheMap &column_cache)
int get_column_id() const
Definition: Analyzer.h:195
void addColBuffer(const void *col_buffer)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< ssize_t, size_t > BaselineJoinHashTable::getApproximateTupleCountFromCache ( const HashTableCacheKey key) const
protected

Definition at line 1304 of file BaselineJoinHashTable.cpp.

References hash_table_cache_, and hash_table_cache_mutex_.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), and approximateTupleCount().

1305  {
1306  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1307  for (const auto& kv : hash_table_cache_) {
1308  if (kv.first == key) {
1309  return std::make_pair(kv.second.entry_count / 2, kv.second.emitted_keys_count);
1310  }
1311  }
1312  return std::make_pair(-1, 0);
1313 }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getComponentBufferSize ( ) const
privatenoexcept

Definition at line 1178 of file BaselineJoinHashTable.cpp.

References entry_count_.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

1178  {
1179  return entry_count_ * sizeof(int32_t);
1180 }

+ Here is the caller graph for this function:

BaselineJoinHashTable::CompositeKeyInfo BaselineJoinHashTable::getCompositeKeyInfo ( ) const
protected

Definition at line 299 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK(), Catalog_Namespace::DBMetadata::dbId, executor_, Catalog_Namespace::Catalog::getCurrentDB(), inner_outer_pairs_, and kENCODING_DICT.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), reify(), and OverlapsJoinHashTable::reifyWithLayout().

300  {
301  std::vector<const void*> sd_inner_proxy_per_key;
302  std::vector<const void*> sd_outer_proxy_per_key;
303  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
304  for (const auto& inner_outer_pair : inner_outer_pairs_) {
305  const auto inner_col = inner_outer_pair.first;
306  const auto outer_col = inner_outer_pair.second;
307  const auto& inner_ti = inner_col->get_type_info();
308  const auto& outer_ti = outer_col->get_type_info();
309  ChunkKey cache_key_chunks_for_column{catalog_->getCurrentDB().dbId,
310  inner_col->get_table_id(),
311  inner_col->get_column_id()};
312  if (inner_ti.is_string()) {
313  CHECK(outer_ti.is_string());
314  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
315  outer_ti.get_compression() == kENCODING_DICT);
316  const auto sd_inner_proxy = executor_->getStringDictionaryProxy(
317  inner_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
318  const auto sd_outer_proxy = executor_->getStringDictionaryProxy(
319  outer_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
320  CHECK(sd_inner_proxy && sd_outer_proxy);
321  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
322  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
323  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
324  } else {
325  sd_inner_proxy_per_key.emplace_back();
326  sd_outer_proxy_per_key.emplace_back();
327  }
328  cache_key_chunks.push_back(cache_key_chunks_for_column);
329  }
330  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
331 }
std::vector< int > ChunkKey
Definition: types.h:35
std::vector< InnerOuter > inner_outer_pairs_
CHECK(cgen_state)
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
const Catalog_Namespace::Catalog * catalog_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 685 of file BaselineJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, and needs_dictionary_translation().

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::computeBucketSizes(), OverlapsJoinHashTable::fetchColumnsForDevice(), fetchColumnsForDevice(), and reifyForDevice().

686  {
687  for (const auto& inner_outer_pair : inner_outer_pairs) {
689  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
691  }
692  }
693  return memory_level_;
694 }
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

JoinHashTableInterface::HashType BaselineJoinHashTable::getHashType ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1242 of file BaselineJoinHashTable.cpp.

References layout_.

Referenced by codegenSlot().

1242  {
1243  return layout_;
1244 }
JoinHashTableInterface::HashType layout_

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1227 of file BaselineJoinHashTable.cpp.

References CHECK(), and inner_outer_pairs_.

Referenced by OverlapsJoinHashTable::getInstance(), getInstance(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1227  {
1228  try {
1230  } catch (...) {
1231  CHECK(false);
1232  }
1233  return 0;
1234 }
std::vector< InnerOuter > inner_outer_pairs_
CHECK(cgen_state)
int getInnerTableId() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
staticprotected

Definition at line 1246 of file BaselineJoinHashTable.cpp.

References CHECK().

1247  {
1248  CHECK(!inner_outer_pairs.empty());
1249  const auto first_inner_col = inner_outer_pairs.front().first;
1250  return first_inner_col->get_table_id();
1251 }
CHECK(cgen_state)

+ Here is the call graph for this function:

int BaselineJoinHashTable::getInnerTableRteIdx ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1236 of file BaselineJoinHashTable.cpp.

References CHECK(), and inner_outer_pairs_.

1236  {
1237  CHECK(!inner_outer_pairs_.empty());
1238  const auto first_inner_col = inner_outer_pairs_.front().first;
1239  return first_inner_col->get_rte_idx();
1240 }
std::vector< InnerOuter > inner_outer_pairs_
CHECK(cgen_state)

+ Here is the call graph for this function:

std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 33 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTable(), get_entries_per_device(), get_inner_query_info(), getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), getShardCountForCondition(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, and normalize_column_pairs().

Referenced by Executor::buildHashTableForQualifier(), and getSyntheticInstance().

40  {
41  auto inner_outer_pairs = normalize_column_pairs(
42  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
43  const auto& query_info =
44  get_inner_query_info(getInnerTableId(inner_outer_pairs), query_infos).info;
45  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
46  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
47  throw TooManyHashEntries();
48  }
49  const auto shard_count = memory_level == Data_Namespace::GPU_LEVEL
51  condition.get(), executor, inner_outer_pairs)
52  : 0;
53  const auto entries_per_device =
54  get_entries_per_device(total_entries, shard_count, device_count, memory_level);
55  auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
56  new BaselineJoinHashTable(condition,
57  query_infos,
58  memory_level,
59  preferred_hash_type,
60  entries_per_device,
61  column_cache,
62  executor,
63  inner_outer_pairs));
64  join_hash_table->checkHashJoinReplicationConstraint(getInnerTableId(inner_outer_pairs));
65  try {
66  join_hash_table->reify(device_count);
67  } catch (const TableMustBeReplicated& e) {
68  // Throw a runtime error to abort the query
69  join_hash_table->freeHashBufferMemory();
70  throw std::runtime_error(e.what());
71  } catch (const HashJoinFail& e) {
72  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
73  // possible)
74  join_hash_table->freeHashBufferMemory();
75  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
76  "involved in equijoin | ") +
77  e.what());
78  } catch (const ColumnarConversionNotSupported& e) {
79  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
80  e.what());
81  } catch (const OutOfMemory& e) {
82  throw HashJoinFail(
83  std::string("Ran out of memory while building hash tables for equijoin | ") +
84  e.what());
85  } catch (const std::exception& e) {
86  throw std::runtime_error(
87  std::string("Fatal error while attempting to build hash tables for join: ") +
88  e.what());
89  }
90  return join_hash_table;
91 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
int getInnerTableId() const noexceptoverride
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t BaselineJoinHashTable::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 191 of file BaselineJoinHashTable.cpp.

References CHECK(), CHECK_LT, and CPU.

192  {
193  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
194  return 0;
195  }
196 #ifdef HAVE_CUDA
197  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
198  if (device_type == ExecutorDeviceType::CPU) {
199  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
200  } else {
201  return gpu_hash_table_buff_[device_id]
202  ? reinterpret_cast<CUdeviceptr>(
203  gpu_hash_table_buff_[device_id]->getMemoryPtr())
204  : reinterpret_cast<CUdeviceptr>(nullptr);
205  }
206 #else
207  CHECK(device_type == ExecutorDeviceType::CPU);
208  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
209 #endif
210 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:200

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 212 of file BaselineJoinHashTable.cpp.

References CHECK(), CHECK_LT, and CPU.

213  {
214  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
215  return 0;
216  }
217 #ifdef HAVE_CUDA
218  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
219  if (device_type == ExecutorDeviceType::CPU) {
220  return cpu_hash_table_buff_->size() *
221  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
222  } else {
223  return gpu_hash_table_buff_[device_id]
224  ? gpu_hash_table_buff_[device_id]->reservedSize()
225  : 0;
226  }
227 #else
228  CHECK(device_type == ExecutorDeviceType::CPU);
229  return cpu_hash_table_buff_->size() *
230  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
231 #endif
232 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:200

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::getKeyComponentCount ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 681 of file BaselineJoinHashTable.cpp.

References inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), initHashTableForDevice(), initHashTableOnCpu(), and offsetBufferOff().

681  {
682  return inner_outer_pairs_.size();
683 }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentWidth ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 669 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, and inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), initHashTableForDevice(), initHashTableOnCpu(), and offsetBufferOff().

669  {
670  for (const auto& inner_outer_pair : inner_outer_pairs_) {
671  const auto inner_col = inner_outer_pair.first;
672  const auto& inner_col_ti = inner_col->get_type_info();
673  if (inner_col_ti.get_logical_size() > 4) {
674  CHECK_EQ(8, inner_col_ti.get_logical_size());
675  return 8;
676  }
677  }
678  return 4;
679 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getShardCountForCondition ( const Analyzer::BinOper condition,
const Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
static

Definition at line 178 of file BaselineJoinHashTable.cpp.

References get_shard_count().

Referenced by OverlapsJoinHashTable::getInstance(), getInstance(), shardCount(), and Executor::skipFragmentPair().

181  {
182  for (const auto& inner_outer_pair : inner_outer_pairs) {
183  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
184  if (pair_shard_count) {
185  return pair_shard_count;
186  }
187  }
188  return 0;
189 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getSyntheticInstance ( std::string_view  table1,
std::string_view  column1,
std::string_view  table2,
std::string_view  column2,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 94 of file BaselineJoinHashTable.cpp.

References CHECK(), getInstance(), kBOOLEAN, kEQ, and kONE.

103  {
104  auto catalog = executor->getCatalog();
105  CHECK(catalog);
106 
107  auto tmeta1 = catalog->getMetadataForTable(std::string(table1));
108  auto tmeta2 = catalog->getMetadataForTable(std::string(table2));
109 
110  CHECK(tmeta1);
111  CHECK(tmeta2);
112 
113  auto cmeta1 = catalog->getMetadataForColumn(tmeta1->tableId, std::string(column1));
114  auto cmeta2 = catalog->getMetadataForColumn(tmeta2->tableId, std::string(column2));
115 
116  CHECK(cmeta1);
117  CHECK(cmeta2);
118 
119  auto ti1 = cmeta1->columnType;
120  auto ti2 = cmeta2->columnType;
121 
122  auto a1 =
123  std::make_shared<Analyzer::ColumnVar>(ti1, tmeta1->tableId, cmeta1->columnId, 0);
124  auto a2 =
125  std::make_shared<Analyzer::ColumnVar>(ti2, tmeta2->tableId, cmeta2->columnId, 1);
126 
127  auto op = std::make_shared<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, a1, a2);
128 
129  size_t number_of_join_tables{2};
130  std::vector<InputTableInfo> query_infos(number_of_join_tables);
131  query_infos[0].table_id = tmeta1->tableId;
132  query_infos[0].info = tmeta1->fragmenter->getFragmentsForQuery();
133  query_infos[1].table_id = tmeta2->tableId;
134  query_infos[1].info = tmeta2->fragmenter->getFragmentsForQuery();
135 
136  auto hash_table = BaselineJoinHashTable::getInstance(op,
137  query_infos,
138  memory_level,
139  preferred_hash_type,
140  device_count,
141  column_cache,
142  executor);
143  return hash_table;
144 }
Definition: sqldefs.h:30
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
CHECK(cgen_state)
Definition: sqldefs.h:69

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::hashPtr ( const size_t  index)
protected

Definition at line 1214 of file BaselineJoinHashTable.cpp.

References JoinHashTable::codegenHashTableLoad(), executor_, LL_BUILDER, and LL_CONTEXT.

Referenced by codegenSlot().

1214  {
1215  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1216  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
1217  return hash_ptr->getType()->isPointerTy()
1218  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
1219  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
1220 }
#define LL_CONTEXT
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableForDevice ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_buckets,
const JoinHashTableInterface::HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
protected

Definition at line 1027 of file BaselineJoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), catalog_, CHECK(), CHECK_EQ, copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, emitted_keys_count_, entry_count_, Catalog_Namespace::Catalog::getDataMgr(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, initHashTableOnCpu(), initHashTableOnGpu(), memory_level_, JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, and VLOG.

Referenced by reifyForDevice().

1033  {
1034  const auto key_component_width = getKeyComponentWidth();
1035  const auto key_component_count = getKeyComponentCount();
1036  int err = 0;
1037 #ifdef HAVE_CUDA
1038  auto& data_mgr = catalog_->getDataMgr();
1040  const auto entry_size =
1041  (key_component_count +
1042  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
1043  key_component_width;
1044  const auto keys_for_all_rows = emitted_keys_count_;
1045  const size_t one_to_many_hash_entries =
1047  ? 2 * entry_count_ + keys_for_all_rows
1048  : 0;
1049  const size_t hash_table_size =
1050  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
1051 
1052  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
1053  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
1054  throw TooManyHashEntries();
1055  }
1056 
1057  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
1058  << entry_count_ << " hash entries and " << one_to_many_hash_entries
1059  << " entries in the one to many buffer";
1060  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
1061  gpu_hash_table_buff_[device_id] =
1062  CudaAllocator::allocGpuAbstractBuffer(&data_mgr, hash_table_size, device_id);
1063  }
1064 #else
1065  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1066 #endif
1067  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
1068  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1069  err = initHashTableOnCpu(join_columns, join_column_types, join_bucket_info, layout);
1070  // Transfer the hash table on the GPU if we've only built it on CPU
1071  // but the query runs on GPU (join on dictionary encoded columns).
1072  // Don't transfer the buffer if there was an error since we'll bail anyway.
1073  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
1074 #ifdef HAVE_CUDA
1075  copy_to_gpu(
1076  &data_mgr,
1077  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1078  &(*cpu_hash_table_buff_)[0],
1079  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
1080  device_id);
1081 #else
1082  CHECK(false);
1083 #endif
1084  }
1085  } else {
1086  err = initHashTableOnGpu(join_columns,
1087  join_column_types,
1088  join_bucket_info,
1089  layout,
1090  key_component_width,
1091  key_component_count,
1092  device_id);
1093  }
1094  return err;
1095 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
virtual int initHashTableOnGpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
const Catalog_Namespace::Catalog * catalog_
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
const Data_Namespace::MemoryLevel memory_level_
virtual int initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
#define VLOG(n)
Definition: Logger.h:280

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 696 of file BaselineJoinHashTable.cpp.

References CHECK(), condition_, cpu_hash_table_buff_, cpu_threads(), entry_count_, fill_baseline_hash_join_buff_32(), fill_baseline_hash_join_buff_64(), fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), getCompositeKeyInfo(), getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), initHashTableOnCpuFromCache(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, putHashTableOnCpuToCache(), and VLOG.

Referenced by initHashTableForDevice().

700  {
701  const auto composite_key_info = getCompositeKeyInfo();
702  CHECK(!join_columns.empty());
703  HashTableCacheKey cache_key{join_columns.front().num_elems,
704  composite_key_info.cache_key_chunks,
705  condition_->get_optype()};
706  initHashTableOnCpuFromCache(cache_key);
707  if (cpu_hash_table_buff_) {
708  return 0;
709  }
710  const auto key_component_width = getKeyComponentWidth();
711  const auto key_component_count = getKeyComponentCount();
712  const auto entry_size =
713  (key_component_count +
714  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
715  key_component_width;
716  const auto keys_for_all_rows = join_columns.front().num_elems;
717  const size_t one_to_many_hash_entries =
719  ? 2 * entry_count_ + keys_for_all_rows
720  : 0;
721  const size_t hash_table_size =
722  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
723 
724  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
725  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
726  throw TooManyHashEntries();
727  }
728 
729  VLOG(1) << "Initializing CPU Join Hash Table with " << entry_count_
730  << " hash entries and " << one_to_many_hash_entries
731  << " entries in the one to many buffer";
732  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
733 
734  cpu_hash_table_buff_.reset(new std::vector<int8_t>(hash_table_size));
735  int thread_count = cpu_threads();
736  std::vector<std::future<void>> init_cpu_buff_threads;
737  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
738  init_cpu_buff_threads.emplace_back(
739  std::async(std::launch::async,
740  [this,
741  key_component_count,
742  key_component_width,
743  thread_idx,
744  thread_count,
745  layout] {
746  switch (key_component_width) {
747  case 4:
749  &(*cpu_hash_table_buff_)[0],
750  entry_count_,
751  key_component_count,
753  -1,
754  thread_idx,
755  thread_count);
756  break;
757  case 8:
759  &(*cpu_hash_table_buff_)[0],
760  entry_count_,
761  key_component_count,
763  -1,
764  thread_idx,
765  thread_count);
766  break;
767  default:
768  CHECK(false);
769  }
770  }));
771  }
772  for (auto& child : init_cpu_buff_threads) {
773  child.get();
774  }
775  std::vector<std::future<int>> fill_cpu_buff_threads;
776  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
777  fill_cpu_buff_threads.emplace_back(std::async(
778  std::launch::async,
779  [this,
780  &composite_key_info,
781  &join_columns,
782  &join_column_types,
783  key_component_count,
784  key_component_width,
785  layout,
786  thread_idx,
787  thread_count] {
788  switch (key_component_width) {
789  case 4: {
790  const auto key_handler =
791  GenericKeyHandler(key_component_count,
792  true,
793  &join_columns[0],
794  &join_column_types[0],
795  &composite_key_info.sd_inner_proxy_per_key[0],
796  &composite_key_info.sd_outer_proxy_per_key[0]);
798  &(*cpu_hash_table_buff_)[0],
799  entry_count_,
800  -1,
801  key_component_count,
803  &key_handler,
804  join_columns[0].num_elems,
805  thread_idx,
806  thread_count);
807  break;
808  }
809  case 8: {
810  const auto key_handler =
811  GenericKeyHandler(key_component_count,
812  true,
813  &join_columns[0],
814  &join_column_types[0],
815  &composite_key_info.sd_inner_proxy_per_key[0],
816  &composite_key_info.sd_outer_proxy_per_key[0]);
818  &(*cpu_hash_table_buff_)[0],
819  entry_count_,
820  -1,
821  key_component_count,
823  &key_handler,
824  join_columns[0].num_elems,
825  thread_idx,
826  thread_count);
827  break;
828  }
829  default:
830  CHECK(false);
831  }
832  return -1;
833  }));
834  }
835  int err = 0;
836  for (auto& child : fill_cpu_buff_threads) {
837  int partial_err = child.get();
838  if (partial_err) {
839  err = partial_err;
840  }
841  }
842  if (err) {
843  cpu_hash_table_buff_.reset();
844  return err;
845  }
847  auto one_to_many_buff = reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0] +
848  entry_count_ * entry_size);
849  init_hash_join_buff(one_to_many_buff, entry_count_, -1, 0, 1);
850  switch (key_component_width) {
851  case 4: {
852  const auto composite_key_dict =
853  reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0]);
855  composite_key_dict,
856  entry_count_,
857  -1,
858  key_component_count,
859  join_columns,
860  join_column_types,
861  join_bucket_info,
862  composite_key_info.sd_inner_proxy_per_key,
863  composite_key_info.sd_outer_proxy_per_key,
864  thread_count);
865  break;
866  }
867  case 8: {
868  const auto composite_key_dict =
869  reinterpret_cast<int64_t*>(&(*cpu_hash_table_buff_)[0]);
871  composite_key_dict,
872  entry_count_,
873  -1,
874  key_component_count,
875  join_columns,
876  join_column_types,
877  join_bucket_info,
878  composite_key_info.sd_inner_proxy_per_key,
879  composite_key_info.sd_outer_proxy_per_key,
880  thread_count);
881  break;
882  }
883  default:
884  CHECK(false);
885  }
886  }
887  if (!err && getInnerTableId() > 0) {
888  putHashTableOnCpuToCache(cache_key);
889  }
890  return err;
891 }
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
void putHashTableOnCpuToCache(const HashTableCacheKey &)
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CHECK(cgen_state)
CompositeKeyInfo getCompositeKeyInfo() const
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
int getInnerTableId() const noexceptoverride
void initHashTableOnCpuFromCache(const HashTableCacheKey &)
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
int cpu_threads()
Definition: thread_count.h:25
#define VLOG(n)
Definition: Logger.h:280
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const std::shared_ptr< Analyzer::BinOper > condition_
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::initHashTableOnCpuFromCache ( const HashTableCacheKey key)
protected

Definition at line 1278 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, and layout_.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1278  {
1279  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1280  for (const auto& kv : hash_table_cache_) {
1281  if (kv.first == key) {
1282  cpu_hash_table_buff_ = kv.second.buffer;
1283  layout_ = kv.second.type;
1284  entry_count_ = kv.second.entry_count;
1285  emitted_keys_count_ = kv.second.emitted_keys_count;
1286  break;
1287  }
1288  }
1289 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableOnGpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const int  device_id 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 893 of file BaselineJoinHashTable.cpp.

References ThrustAllocator::allocateScopedBuffer(), catalog_, copy_from_gpu(), copy_to_gpu(), entry_count_, fill_baseline_hash_join_buff_on_device_32(), fill_baseline_hash_join_buff_on_device_64(), fill_one_to_many_baseline_hash_table_on_device_32(), fill_one_to_many_baseline_hash_table_on_device_64(), Catalog_Namespace::Catalog::getDataMgr(), init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, transfer_object_to_gpu(), transfer_pod_vector_to_gpu(), and UNREACHABLE.

Referenced by initHashTableForDevice().

900  {
901  int err = 0;
902 #ifdef HAVE_CUDA
903  auto& data_mgr = catalog_->getDataMgr();
904  ThrustAllocator allocator(&data_mgr, device_id);
905  auto dev_err_buff =
906  reinterpret_cast<CUdeviceptr>(allocator.allocateScopedBuffer(sizeof(int)));
907  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
908  switch (key_component_width) {
909  case 4:
911  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
912  entry_count_,
913  key_component_count,
915  -1,
916  block_size_,
917  grid_size_);
918  break;
919  case 8:
921  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
922  entry_count_,
923  key_component_count,
925  -1,
926  block_size_,
927  grid_size_);
928  break;
929  default:
930  UNREACHABLE();
931  }
932  auto join_columns_gpu = transfer_pod_vector_to_gpu(join_columns, allocator);
933  auto hash_buff =
934  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
935  auto join_column_types_gpu = transfer_pod_vector_to_gpu(join_column_types, allocator);
936 
937  const auto key_handler = GenericKeyHandler(key_component_count,
938  true,
939  join_columns_gpu,
940  join_column_types_gpu,
941  nullptr,
942  nullptr);
943  const auto key_handler_gpu = transfer_object_to_gpu(key_handler, allocator);
944  switch (key_component_width) {
945  case 4: {
947  hash_buff,
948  entry_count_,
949  -1,
950  key_component_count,
952  reinterpret_cast<int*>(dev_err_buff),
953  key_handler_gpu,
954  join_columns.front().num_elems,
955  block_size_,
956  grid_size_);
957  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
958  break;
959  }
960  case 8: {
962  hash_buff,
963  entry_count_,
964  -1,
965  key_component_count,
967  reinterpret_cast<int*>(dev_err_buff),
968  key_handler_gpu,
969  join_columns.front().num_elems,
970  block_size_,
971  grid_size_);
972  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
973  break;
974  }
975  default:
976  UNREACHABLE();
977  }
978  if (err) {
979  return err;
980  }
982  const auto entry_size = key_component_count * key_component_width;
983  auto one_to_many_buff = reinterpret_cast<int32_t*>(
984  gpu_hash_table_buff_[device_id]->getMemoryPtr() + entry_count_ * entry_size);
985  switch (key_component_width) {
986  case 4: {
987  const auto composite_key_dict =
988  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
990  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
992  composite_key_dict,
993  entry_count_,
994  -1,
995  key_component_count,
996  key_handler_gpu,
997  join_columns.front().num_elems,
998  block_size_,
999  grid_size_);
1000  break;
1001  }
1002  case 8: {
1003  const auto composite_key_dict =
1004  reinterpret_cast<int64_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
1006  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
1008  composite_key_dict,
1009  entry_count_,
1010  -1,
1011  key_handler_gpu,
1012  join_columns.front().num_elems,
1013  block_size_,
1014  grid_size_);
1015  break;
1016  }
1017  default:
1018  UNREACHABLE();
1019  }
1020  }
1021 #else
1022  UNREACHABLE();
1023 #endif
1024  return err;
1025 }
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:234
T * transfer_pod_vector_to_gpu(const std::vector< T > &vec, ThrustAllocator &allocator)
T * transfer_object_to_gpu(const T &object, ThrustAllocator &allocator)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool BaselineJoinHashTable::isBitwiseEq ( ) const
protected

Definition at line 1315 of file BaselineJoinHashTable.cpp.

References condition_, and kBW_EQ.

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), and fetchColumnsForDevice().

1315  {
1316  return condition_->get_optype() == kBW_EQ;
1317 }
Definition: sqldefs.h:31
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1160 of file BaselineJoinHashTable.cpp.

References CHECK(), entry_count_, getKeyComponentCount(), getKeyComponentWidth(), layout_, and JoinHashTableInterface::OneToMany.

Referenced by codegenMatchingSet(), and countBufferOff().

1160  {
1162  const auto key_component_width = getKeyComponentWidth();
1163  CHECK(key_component_width == 4 || key_component_width == 8);
1164  const auto key_component_count = getKeyComponentCount();
1165  return entry_count_ * key_component_count * key_component_width;
1166 }
JoinHashTableInterface::HashType layout_
CHECK(cgen_state)
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1173 of file BaselineJoinHashTable.cpp.

References CHECK(), countBufferOff(), getComponentBufferSize(), layout_, and JoinHashTableInterface::OneToMany.

1173  {
1176 }
size_t getComponentBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
CHECK(cgen_state)
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

void BaselineJoinHashTable::putHashTableOnCpuToCache ( const HashTableCacheKey key)
protected

Definition at line 1291 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, and layout_.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1291  {
1292  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1293  for (const auto& kv : hash_table_cache_) {
1294  if (std::get<0>(kv) == key) {
1295  return;
1296  }
1297  }
1298  hash_table_cache_.emplace_back(
1299  key,
1300  HashTableCacheValue{
1302 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reify ( const int  device_count)
protected

Definition at line 333 of file BaselineJoinHashTable.cpp.

References CHECK_LT, condition_, freeHashBufferMemory(), HashTypeCache::get(), getCompositeKeyInfo(), layout_, JoinHashTableInterface::OneToMany, reifyWithLayout(), HashTypeCache::set(), and VLOG.

333  {
334  CHECK_LT(0, device_count);
335 #ifdef HAVE_CUDA
336  gpu_hash_table_buff_.resize(device_count);
337 #endif // HAVE_CUDA
338  const auto composite_key_info = getCompositeKeyInfo();
339  const auto type_and_found = HashTypeCache::get(composite_key_info.cache_key_chunks);
340  const auto layout = type_and_found.second ? type_and_found.first : layout_;
341 
342  if (condition_->is_overlaps_oper()) {
343  try {
345  return;
346  } catch (const std::exception& e) {
347  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
348  << e.what();
349  throw;
350  }
351  }
352 
353  try {
354  reifyWithLayout(device_count, layout);
355  } catch (const std::exception& e) {
356  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
358  HashTypeCache::set(composite_key_info.cache_key_chunks,
361  }
362 }
static void set(const std::vector< ChunkKey > &key, const JoinHashTableInterface::HashType hash_type)
JoinHashTableInterface::HashType layout_
CompositeKeyInfo getCompositeKeyInfo() const
#define CHECK_LT(x, y)
Definition: Logger.h:200
virtual void reifyWithLayout(const int device_count, const JoinHashTableInterface::HashType layout)
#define VLOG(n)
Definition: Logger.h:280
const std::shared_ptr< Analyzer::BinOper > condition_
static std::pair< JoinHashTableInterface::HashType, bool > get(const std::vector< ChunkKey > &key)

+ Here is the call graph for this function:

void BaselineJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const JoinHashTableInterface::HashType  layout,
const int  device_id 
)
protected

Definition at line 611 of file BaselineJoinHashTable.cpp.

References ERR_FAILED_TO_FETCH_COLUMN, ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, BaselineJoinHashTable::ColumnsForDevice::join_buckets, BaselineJoinHashTable::ColumnsForDevice::join_column_types, BaselineJoinHashTable::ColumnsForDevice::join_columns, and to_string().

Referenced by OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

613  {
614  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
615  const auto err = initHashTableForDevice(columns_for_device.join_columns,
616  columns_for_device.join_column_types,
617  columns_for_device.join_buckets,
618  layout,
619  effective_memory_level,
620  device_id);
621  if (err) {
622  switch (err) {
624  throw FailedToFetchColumn();
627  default:
628  throw HashJoinFail(
629  std::string("Unrecognized error when initializing baseline hash table (") +
630  std::to_string(err) + std::string(")"));
631  }
632  }
633 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
static const int ERR_FAILED_TO_FETCH_COLUMN
int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reifyWithLayout ( const int  device_count,
const JoinHashTableInterface::HashType  layout 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 364 of file BaselineJoinHashTable.cpp.

References approximateTupleCount(), CHECK(), emitted_keys_count_, entry_count_, fetchColumnsForDevice(), get_entries_per_device(), get_inner_query_info(), getInnerTableId(), InputTableInfo::info, layout_, memory_level_, JoinHashTableInterface::OneToMany, only_shards_for_device(), query_infos_, reifyForDevice(), and shardCount().

Referenced by reify().

366  {
367  layout_ = layout;
368  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
369  if (query_info.fragments.empty()) {
370  return;
371  }
372  std::vector<BaselineJoinHashTable::ColumnsForDevice> columns_per_device;
373  const auto shard_count = shardCount();
374  for (int device_id = 0; device_id < device_count; ++device_id) {
375  const auto fragments =
376  shard_count
377  ? only_shards_for_device(query_info.fragments, device_id, device_count)
378  : query_info.fragments;
379  const auto columns_for_device = fetchColumnsForDevice(fragments, device_id);
380  columns_per_device.push_back(columns_for_device);
381  }
383  CHECK(!columns_per_device.front().join_columns.empty());
384  emitted_keys_count_ = columns_per_device.front().join_columns.front().num_elems;
385  size_t tuple_count;
386  std::tie(tuple_count, std::ignore) = approximateTupleCount(columns_per_device);
387  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
388 
389  entry_count_ =
390  get_entries_per_device(entry_count, shard_count, device_count, memory_level_);
391  }
392  std::vector<std::future<void>> init_threads;
393  for (int device_id = 0; device_id < device_count; ++device_id) {
394  const auto fragments =
395  shard_count
396  ? only_shards_for_device(query_info.fragments, device_id, device_count)
397  : query_info.fragments;
398  init_threads.push_back(std::async(std::launch::async,
400  this,
401  columns_per_device[device_id],
402  layout,
403  device_id));
404  }
405  for (auto& init_thread : init_threads) {
406  init_thread.wait();
407  }
408  for (auto& init_thread : init_threads) {
409  init_thread.get();
410  }
411 }
std::deque< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
JoinHashTableInterface::HashType layout_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
const std::vector< InputTableInfo > & query_infos_
CHECK(cgen_state)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
int getInnerTableId() const noexceptoverride
void reifyForDevice(const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id)
const Data_Namespace::MemoryLevel memory_level_
virtual ColumnsForDevice fetchColumnsForDevice(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::shardCount ( ) const
protected

Definition at line 661 of file BaselineJoinHashTable.cpp.

References condition_, executor_, getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by checkHashJoinReplicationConstraint(), getAllColumnFragments(), OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

661  {
663  return 0;
664  }
667 }
std::vector< InnerOuter > inner_outer_pairs_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string BaselineJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id,
bool  raw = false 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 234 of file BaselineJoinHashTable.cpp.

References catalog_(), copy_from_gpu(), decodeJoinHashBufferToString(), and GPU.

236  {
237  auto buffer = getJoinHashBuffer(device_type, device_id);
238  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
239 #ifdef HAVE_CUDA
240  std::unique_ptr<int8_t[]> buffer_copy;
241  if (device_type == ExecutorDeviceType::GPU) {
242  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
243 
245  buffer_copy.get(),
246  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
247  buffer_size,
248  device_id);
249  }
250  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
251 #else
252  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
253 #endif // HAVE_CUDA
254  auto ptr2 = ptr1 + offsetBufferOff();
255  auto ptr3 = ptr1 + countBufferOff();
256  auto ptr4 = ptr1 + payloadBufferOff();
259  ptr1,
260  ptr2,
261  ptr3,
262  ptr4,
263  buffer_size,
264  raw);
265 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::string decodeJoinHashBufferToString(size_t key_component_count, size_t key_component_width, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw)
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

static auto BaselineJoinHashTable::yieldCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 103 of file BaselineJoinHashTable.h.

References hash_table_cache_, and hash_table_cache_mutex_.

103  {
104  return []() -> void {
105  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
106  hash_table_cache_.clear();
107  };
108  }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

Member Data Documentation

ColumnCacheMap& BaselineJoinHashTable::column_cache_
protected

Definition at line 258 of file BaselineJoinHashTable.h.

Referenced by fetchColumn(), and getAllColumnFragments().

std::shared_ptr<std::vector<int8_t> > BaselineJoinHashTable::cpu_hash_table_buff_
protected
std::mutex BaselineJoinHashTable::cpu_hash_table_buff_mutex_
protected

Definition at line 260 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice().

const int BaselineJoinHashTable::ERR_FAILED_TO_FETCH_COLUMN {-3}
staticprotected

Definition at line 288 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

const int BaselineJoinHashTable::ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
staticprotected

Definition at line 289 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

std::mutex BaselineJoinHashTable::hash_table_cache_mutex_
staticprotected
std::mutex BaselineJoinHashTable::linearized_multifrag_column_mutex_
protected

Definition at line 267 of file BaselineJoinHashTable.h.

Referenced by getAllColumnFragments().

RowSetMemoryOwner BaselineJoinHashTable::linearized_multifrag_column_owner_
protected

Definition at line 268 of file BaselineJoinHashTable.h.

Referenced by getAllColumnFragments().

std::map<LinearizedColumnCacheKey, LinearizedColumn> BaselineJoinHashTable::linearized_multifrag_columns_
protected

Definition at line 266 of file BaselineJoinHashTable.h.

Referenced by getAllColumnFragments().

const std::vector<InputTableInfo>& BaselineJoinHashTable::query_infos_
protected

The documentation for this class was generated from the following files: