OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GpuInitGroups.cu File Reference
#include "BufferCompaction.h"
#include "GpuInitGroups.h"
#include "GpuRtConstants.h"
+ Include dependency graph for GpuInitGroups.cu:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

template<typename T >
__device__ int8_t * init_columnar_buffer (T *buffer_ptr, const T init_val, const uint32_t entry_count, const int32_t start, const int32_t step)
 
__device__ void init_columnar_group_by_buffer_gpu_impl (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)
 
template<typename K >
__device__ void fill_empty_device_key (K *keys_ptr, const uint32_t key_count, const K empty_key)
 
__global__ void init_group_by_buffer_gpu (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
 
__global__ void init_columnar_group_by_buffer_gpu_wrapper (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)
 
void init_group_by_buffer_on_device (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size, const size_t block_size_x, const size_t grid_size_x)
 
void init_columnar_group_by_buffer_on_device (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size, const size_t block_size_x, const size_t grid_size_x)
 

Function Documentation

template<typename K >
__device__ void fill_empty_device_key ( K *  keys_ptr,
const uint32_t  key_count,
const empty_key 
)
inline

Definition at line 111 of file GpuInitGroups.cu.

References i.

Referenced by init_group_by_buffer_gpu().

113  {
114  for (uint32_t i = 0; i < key_count; ++i) {
115  keys_ptr[i] = empty_key;
116  }
117 }

+ Here is the caller graph for this function:

template<typename T >
__device__ int8_t* init_columnar_buffer ( T *  buffer_ptr,
const init_val,
const uint32_t  entry_count,
const int32_t  start,
const int32_t  step 
)

Definition at line 6 of file GpuInitGroups.cu.

References i.

10  {
11  for (int32_t i = start; i < entry_count; i += step) {
12  buffer_ptr[i] = init_val;
13  }
14  return reinterpret_cast<int8_t*>(buffer_ptr + entry_count);
15 }
__device__ void init_columnar_group_by_buffer_gpu_impl ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size 
)

Definition at line 17 of file GpuInitGroups.cu.

References align_to_int64(), EMPTY_KEY_16, EMPTY_KEY_32, EMPTY_KEY_64, EMPTY_KEY_8, and i.

Referenced by init_columnar_group_by_buffer_gpu_wrapper().

26  {
27  const int32_t start = blockIdx.x * blockDim.x + threadIdx.x;
28  const int32_t step = blockDim.x * gridDim.x;
29 
30  int8_t* buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
31  if (!keyless) {
32  for (uint32_t i = 0; i < key_count; ++i) {
33  switch (key_size) {
34  case 1:
35  buffer_ptr = init_columnar_buffer<int8_t>(
36  buffer_ptr, EMPTY_KEY_8, groups_buffer_entry_count, start, step);
37  break;
38  case 2:
39  buffer_ptr =
40  init_columnar_buffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
42  groups_buffer_entry_count,
43  start,
44  step);
45  break;
46  case 4:
47  buffer_ptr =
48  init_columnar_buffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
50  groups_buffer_entry_count,
51  start,
52  step);
53  break;
54  case 8:
55  buffer_ptr =
56  init_columnar_buffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
58  groups_buffer_entry_count,
59  start,
60  step);
61  break;
62  default:
63  // FIXME(miyu): CUDA linker doesn't accept assertion on GPU yet right now.
64  break;
65  }
66  buffer_ptr = align_to_int64(buffer_ptr);
67  }
68  }
69  int32_t init_idx = 0;
70  for (int32_t i = 0; i < agg_col_count; ++i) {
71  if (need_padding) {
72  buffer_ptr = align_to_int64(buffer_ptr);
73  }
74  switch (col_sizes[i]) {
75  case 1:
76  buffer_ptr = init_columnar_buffer<int8_t>(
77  buffer_ptr, init_vals[init_idx++], groups_buffer_entry_count, start, step);
78  break;
79  case 2:
80  buffer_ptr = init_columnar_buffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
81  init_vals[init_idx++],
82  groups_buffer_entry_count,
83  start,
84  step);
85  break;
86  case 4:
87  buffer_ptr = init_columnar_buffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
88  init_vals[init_idx++],
89  groups_buffer_entry_count,
90  start,
91  step);
92  break;
93  case 8:
94  buffer_ptr = init_columnar_buffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
95  init_vals[init_idx++],
96  groups_buffer_entry_count,
97  start,
98  step);
99  break;
100  case 0:
101  continue;
102  default:
103  // FIXME(miyu): CUDA linker doesn't accept assertion on GPU yet now.
104  break;
105  }
106  }
107  __syncthreads();
108 }
#define EMPTY_KEY_64
#define EMPTY_KEY_8
#define EMPTY_KEY_16
#define EMPTY_KEY_32
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__global__ void init_columnar_group_by_buffer_gpu_wrapper ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size 
)

Definition at line 168 of file GpuInitGroups.cu.

References init_columnar_group_by_buffer_gpu_impl().

177  {
179  init_vals,
180  groups_buffer_entry_count,
181  key_count,
182  agg_col_count,
183  col_sizes,
184  need_padding,
185  keyless,
186  key_size);
187 }
__device__ void init_columnar_group_by_buffer_gpu_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)

+ Here is the call graph for this function:

void init_columnar_group_by_buffer_on_device ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 209 of file GpuInitGroups.cu.

Referenced by QueryMemoryInitializer::createAndInitializeGroupByBufferGpu().

219  {
220  init_columnar_group_by_buffer_gpu_wrapper<<<grid_size_x, block_size_x>>>(
221  groups_buffer,
222  init_vals,
223  groups_buffer_entry_count,
224  key_count,
225  agg_col_count,
226  col_sizes,
227  need_padding,
228  keyless,
229  key_size);
230 }

+ Here is the caller graph for this function:

__global__ void init_group_by_buffer_gpu ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad,
const bool  keyless,
const int8_t  warp_size 
)

Definition at line 119 of file GpuInitGroups.cu.

References align_to_int64(), EMPTY_KEY_32, EMPTY_KEY_64, fill_empty_device_key(), and i.

126  {
127  const int32_t start = blockIdx.x * blockDim.x + threadIdx.x;
128  const int32_t step = blockDim.x * gridDim.x;
129  if (keyless) {
130  for (int32_t i = start;
131  i < groups_buffer_entry_count * row_size_quad * static_cast<int32_t>(warp_size);
132  i += step) {
133  groups_buffer[i] = init_vals[i % row_size_quad];
134  }
135  __syncthreads();
136  return;
137  }
138 
139  for (int32_t i = start; i < groups_buffer_entry_count; i += step) {
140  int64_t* keys_ptr = groups_buffer + i * row_size_quad;
141  switch (key_width) {
142  case 4:
144  reinterpret_cast<int32_t*>(keys_ptr), key_count, EMPTY_KEY_32);
145  break;
146  case 8:
148  reinterpret_cast<int64_t*>(keys_ptr), key_count, EMPTY_KEY_64);
149  break;
150  default:
151  break;
152  }
153  }
154 
155  const uint32_t values_off_quad =
156  align_to_int64(key_count * key_width) / sizeof(int64_t);
157  for (uint32_t i = start; i < groups_buffer_entry_count; i += step) {
158  int64_t* vals_ptr = groups_buffer + i * row_size_quad + values_off_quad;
159  const uint32_t val_count =
160  row_size_quad - values_off_quad; // value slots are always 64-bit
161  for (uint32_t j = 0; j < val_count; ++j) {
162  vals_ptr[j] = init_vals[j];
163  }
164  }
165  __syncthreads();
166 }
#define EMPTY_KEY_64
__device__ void fill_empty_device_key(K *keys_ptr, const uint32_t key_count, const K empty_key)
#define EMPTY_KEY_32
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

void init_group_by_buffer_on_device ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad,
const bool  keyless,
const int8_t  warp_size,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 189 of file GpuInitGroups.cu.

Referenced by QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), and QueryMemoryInitializer::prepareTopNHeapsDevBuffer().

198  {
199  init_group_by_buffer_gpu<<<grid_size_x, block_size_x>>>(groups_buffer,
200  init_vals,
201  groups_buffer_entry_count,
202  key_count,
203  key_width,
204  row_size_quad,
205  keyless,
206  warp_size);
207 }

+ Here is the caller graph for this function: