OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GpuInitGroups.cu File Reference
#include "BufferCompaction.h"
#include "GpuInitGroups.h"
#include "GpuRtConstants.h"
+ Include dependency graph for GpuInitGroups.cu:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

template<typename T >
__device__ int8_t * init_columnar_buffer (T *buffer_ptr, const T init_val, const uint32_t entry_count, const int32_t start, const int32_t step)
 
__device__ void init_columnar_group_by_buffer_gpu_impl (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)
 
__device__ void init_render_buffer (int64_t *render_buffer, const uint32_t qw_count)
 
__global__ void init_render_buffer_wrapper (int64_t *render_buffer, const uint32_t qw_count)
 
template<typename K >
__device__ void fill_empty_device_key (K *keys_ptr, const uint32_t key_count, const K empty_key)
 
__global__ void init_group_by_buffer_gpu (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
 
__global__ void init_columnar_group_by_buffer_gpu_wrapper (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)
 
void init_group_by_buffer_on_device (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size, const size_t block_size_x, const size_t grid_size_x)
 
void init_columnar_group_by_buffer_on_device (int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size, const size_t block_size_x, const size_t grid_size_x)
 
void init_render_buffer_on_device (int64_t *render_buffer, const uint32_t qw_count, const size_t block_size_x, const size_t grid_size_x)
 

Function Documentation

template<typename K >
__device__ void fill_empty_device_key ( K *  keys_ptr,
const uint32_t  key_count,
const K  empty_key 
)
inline

Definition at line 124 of file GpuInitGroups.cu.

Referenced by init_group_by_buffer_gpu().

126  {
127  for (uint32_t i = 0; i < key_count; ++i) {
128  keys_ptr[i] = empty_key;
129  }
130 }

+ Here is the caller graph for this function:

template<typename T >
__device__ int8_t* init_columnar_buffer ( T *  buffer_ptr,
const T  init_val,
const uint32_t  entry_count,
const int32_t  start,
const int32_t  step 
)

Definition at line 6 of file GpuInitGroups.cu.

10  {
11  for (int32_t i = start; i < entry_count; i += step) {
12  buffer_ptr[i] = init_val;
13  }
14  return reinterpret_cast<int8_t*>(buffer_ptr + entry_count);
15 }
__device__ void init_columnar_group_by_buffer_gpu_impl ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size 
)

Definition at line 17 of file GpuInitGroups.cu.

References agg_col_count, align_to_int64(), EMPTY_KEY_16, EMPTY_KEY_32, EMPTY_KEY_64, EMPTY_KEY_8, groups_buffer, and groups_buffer_entry_count.

Referenced by init_columnar_group_by_buffer_gpu_wrapper().

26  {
27  const int32_t start = blockIdx.x * blockDim.x + threadIdx.x;
28  const int32_t step = blockDim.x * gridDim.x;
29 
30  int8_t* buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
31  if (!keyless) {
32  for (uint32_t i = 0; i < key_count; ++i) {
33  switch (key_size) {
34  case 1:
35  buffer_ptr = init_columnar_buffer<int8_t>(
36  buffer_ptr, EMPTY_KEY_8, groups_buffer_entry_count, start, step);
37  break;
38  case 2:
39  buffer_ptr =
40  init_columnar_buffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
42  groups_buffer_entry_count,
43  start,
44  step);
45  break;
46  case 4:
47  buffer_ptr =
48  init_columnar_buffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
51  start,
52  step);
53  break;
54  case 8:
55  buffer_ptr =
56  init_columnar_buffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
59  start,
60  step);
61  break;
62  default:
63  // FIXME(miyu): CUDA linker doesn't accept assertion on GPU yet right now.
64  break;
65  }
66  buffer_ptr = align_to_int64(buffer_ptr);
67  }
68  }
69  int32_t init_idx = 0;
70  for (int32_t i = 0; i < agg_col_count; ++i) {
71  if (need_padding) {
72  buffer_ptr = align_to_int64(buffer_ptr);
73  }
74  switch (col_sizes[i]) {
75  case 1:
76  buffer_ptr = init_columnar_buffer<int8_t>(
77  buffer_ptr, init_vals[init_idx++], groups_buffer_entry_count, start, step);
78  break;
79  case 2:
80  buffer_ptr = init_columnar_buffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
81  init_vals[init_idx++],
82  groups_buffer_entry_count,
83  start,
84  step);
85  break;
86  case 4:
87  buffer_ptr = init_columnar_buffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
88  init_vals[init_idx++],
90  start,
91  step);
92  break;
93  case 8:
94  buffer_ptr = init_columnar_buffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
95  init_vals[init_idx++],
97  start,
98  step);
99  break;
100  case 0:
101  continue;
102  default:
103  // FIXME(miyu): CUDA linker doesn't accept assertion on GPU yet now.
104  break;
105  }
106  }
107  __syncthreads();
108 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
#define EMPTY_KEY_64
const int64_t const uint32_t groups_buffer_entry_count
#define EMPTY_KEY_8
#define EMPTY_KEY_16
#define EMPTY_KEY_32
const int64_t * init_vals
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

__global__ void init_columnar_group_by_buffer_gpu_wrapper ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size 
)

Definition at line 181 of file GpuInitGroups.cu.

References init_columnar_group_by_buffer_gpu_impl().

190  {
192  init_vals,
194  key_count,
196  col_sizes,
197  need_padding,
198  keyless,
199  key_size);
200 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
const int64_t const uint32_t groups_buffer_entry_count
__device__ void init_columnar_group_by_buffer_gpu_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t agg_col_count, const int8_t *col_sizes, const bool need_padding, const bool keyless, const int8_t key_size)
const int64_t * init_vals
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless

+ Here is the call graph for this function:

void init_columnar_group_by_buffer_on_device ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  agg_col_count,
const int8_t *  col_sizes,
const bool  need_padding,
const bool  keyless,
const int8_t  key_size,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 222 of file GpuInitGroups.cu.

References agg_col_count, groups_buffer, groups_buffer_entry_count, init_vals, and keyless.

232  {
233  init_columnar_group_by_buffer_gpu_wrapper<<<grid_size_x, block_size_x>>>(
235  init_vals,
237  key_count,
239  col_sizes,
240  need_padding,
241  keyless,
242  key_size);
243 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
const int64_t const uint32_t groups_buffer_entry_count
const int64_t * init_vals
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
__global__ void init_group_by_buffer_gpu ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad,
const bool  keyless,
const int8_t  warp_size 
)

Definition at line 132 of file GpuInitGroups.cu.

References align_to_int64(), EMPTY_KEY_32, EMPTY_KEY_64, fill_empty_device_key(), and groups_buffer_entry_count.

139  {
140  const int32_t start = blockIdx.x * blockDim.x + threadIdx.x;
141  const int32_t step = blockDim.x * gridDim.x;
142  if (keyless) {
143  for (int32_t i = start;
144  i < groups_buffer_entry_count * row_size_quad * static_cast<int32_t>(warp_size);
145  i += step) {
146  groups_buffer[i] = init_vals[i % row_size_quad];
147  }
148  __syncthreads();
149  return;
150  }
151 
152  for (int32_t i = start; i < groups_buffer_entry_count; i += step) {
153  int64_t* keys_ptr = groups_buffer + i * row_size_quad;
154  switch (key_width) {
155  case 4:
157  reinterpret_cast<int32_t*>(keys_ptr), key_count, EMPTY_KEY_32);
158  break;
159  case 8:
161  reinterpret_cast<int64_t*>(keys_ptr), key_count, EMPTY_KEY_64);
162  break;
163  default:
164  break;
165  }
166  }
167 
168  const uint32_t values_off_quad =
169  align_to_int64(key_count * key_width) / sizeof(int64_t);
170  for (uint32_t i = start; i < groups_buffer_entry_count; i += step) {
171  int64_t* vals_ptr = groups_buffer + i * row_size_quad + values_off_quad;
172  const uint32_t val_count =
173  row_size_quad - values_off_quad; // value slots are always 64-bit
174  for (uint32_t j = 0; j < val_count; ++j) {
175  vals_ptr[j] = init_vals[j];
176  }
177  }
178  __syncthreads();
179 }
const int32_t groups_buffer_size return groups_buffer
#define EMPTY_KEY_64
const int64_t const uint32_t groups_buffer_entry_count
__device__ void fill_empty_device_key(K *keys_ptr, const uint32_t key_count, const K empty_key)
#define EMPTY_KEY_32
const int64_t * init_vals
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

void init_group_by_buffer_on_device ( int64_t *  groups_buffer,
const int64_t *  init_vals,
const uint32_t  groups_buffer_entry_count,
const uint32_t  key_count,
const uint32_t  key_width,
const uint32_t  row_size_quad,
const bool  keyless,
const int8_t  warp_size,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 202 of file GpuInitGroups.cu.

References groups_buffer, groups_buffer_entry_count, init_vals, and keyless.

211  {
212  init_group_by_buffer_gpu<<<grid_size_x, block_size_x>>>(groups_buffer,
213  init_vals,
215  key_count,
216  key_width,
217  row_size_quad,
218  keyless,
219  warp_size);
220 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t groups_buffer_entry_count
const int64_t * init_vals
const int64_t const uint32_t const uint32_t const uint32_t const bool keyless
__device__ void init_render_buffer ( int64_t *  render_buffer,
const uint32_t  qw_count 
)

Definition at line 110 of file GpuInitGroups.cu.

References EMPTY_KEY_64.

Referenced by init_render_buffer_wrapper().

110  {
111  const uint32_t start = blockIdx.x * blockDim.x + threadIdx.x;
112  const uint32_t step = blockDim.x * gridDim.x;
113  for (uint32_t i = start; i < qw_count; i += step) {
114  render_buffer[i] = EMPTY_KEY_64;
115  }
116 }
#define EMPTY_KEY_64

+ Here is the caller graph for this function:

void init_render_buffer_on_device ( int64_t *  render_buffer,
const uint32_t  qw_count,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 245 of file GpuInitGroups.cu.

248  {
249  init_render_buffer_wrapper<<<grid_size_x, block_size_x>>>(render_buffer, qw_count);
250 }
__global__ void init_render_buffer_wrapper ( int64_t *  render_buffer,
const uint32_t  qw_count 
)

Definition at line 118 of file GpuInitGroups.cu.

References init_render_buffer().

119  {
120  init_render_buffer(render_buffer, qw_count);
121 }
__device__ void init_render_buffer(int64_t *render_buffer, const uint32_t qw_count)

+ Here is the call graph for this function: