OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RasterFormat.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
21 #include <tbb/parallel_for.h>
22 #include "CpuTimer.hpp"
24 #include "RasterInfo.h"
25 #include "TableFunctionsCommon.hpp"
26 
27 namespace RasterFormat_Namespace {
28 
29 template <typename OutputType>
30 TEMPLATE_NOINLINE void fill_row_halo(std::vector<OutputType>& output_data,
31  const int64_t tile_x,
32  const int64_t tile_y,
33  const int64_t tiles_x,
34  const int64_t tiles_y,
35  const int64_t halo_y_pixels_per_tile_boundary,
36  const int64_t x_pixels_per_tile,
37  const int64_t y_pixels_per_tile,
38  const int64_t num_raster_channels,
39  const int64_t source_tile_y_offset) {
40  if (tile_y + source_tile_y_offset >= tiles_y || tile_y + source_tile_y_offset < 0) {
41  return;
42  }
43  const int64_t output_tile_slot = tile_y * tiles_x + tile_x;
44  const int64_t source_tile_slot = (tile_y + source_tile_y_offset) * tiles_x + tile_x;
45  const int64_t pixels_per_tile = x_pixels_per_tile * y_pixels_per_tile;
46  const int64_t channel_pixels_per_tile = pixels_per_tile * num_raster_channels;
47  int64_t halo_y_start;
48  int64_t source_y_start;
49  int64_t halo_y_increment;
50  int64_t source_y_increment;
51  if (source_tile_y_offset > 0) {
52  // fill in top/north halo
53  halo_y_start = y_pixels_per_tile - 1;
54  halo_y_increment = -1;
55  source_y_start = 2 * halo_y_pixels_per_tile_boundary - 1;
56  source_y_increment = -1;
57  } else {
58  // fill in bottom/south halo
59  halo_y_start = 0;
60  halo_y_increment = 1;
61  source_y_start = y_pixels_per_tile - 2 * halo_y_pixels_per_tile_boundary + 1;
62  source_y_increment = 1;
63  }
64 
66  tbb::blocked_range<int64_t>(0, halo_y_pixels_per_tile_boundary),
67  [&](const tbb::blocked_range<int64_t>& halo_y_pixels_range) {
68  const int64_t start_halo_y_offset = halo_y_pixels_range.begin();
69  const int64_t end_halo_y_offset = halo_y_pixels_range.end();
70  for (int64_t halo_y_offset = start_halo_y_offset;
71  halo_y_offset < end_halo_y_offset;
72  ++halo_y_offset) {
73  const int64_t halo_y = halo_y_start + halo_y_offset * halo_y_increment;
74  const int64_t source_y = source_y_start + halo_y_offset * source_y_increment;
75  const int64_t output_starting_slot_for_halo_row =
76  output_tile_slot * channel_pixels_per_tile + halo_y * x_pixels_per_tile;
77  const int64_t source_starting_slot_for_halo_row =
78  source_tile_slot * channel_pixels_per_tile + source_y * x_pixels_per_tile;
79  for (int64_t local_x_pixel = 0; local_x_pixel < x_pixels_per_tile;
80  ++local_x_pixel) {
81  const int64_t output_starting_slot =
82  output_starting_slot_for_halo_row + local_x_pixel;
83  const int64_t source_starting_slot =
84  source_starting_slot_for_halo_row + local_x_pixel;
85  output_data[output_starting_slot] = output_data[source_starting_slot];
86  output_data[output_starting_slot + pixels_per_tile] =
87  output_data[source_starting_slot + pixels_per_tile];
88  output_data[output_starting_slot + 2 * pixels_per_tile] =
89  output_data[source_starting_slot + 2 * pixels_per_tile];
90  }
91  }
92  });
93 }
94 
95 template <typename OutputType>
96 TEMPLATE_NOINLINE void fill_col_halo(std::vector<OutputType>& output_data,
97  const int64_t tile_x,
98  const int64_t tile_y,
99  const int64_t tiles_x,
100  const int64_t tiles_y,
101  const int64_t halo_x_pixels_per_tile_boundary,
102  const int64_t x_pixels_per_tile,
103  const int64_t y_pixels_per_tile,
104  const int64_t num_raster_channels,
105  const int64_t source_tile_x_offset) {
106  if (tile_x + source_tile_x_offset >= tiles_x || tile_x + source_tile_x_offset < 0) {
107  return;
108  }
109  const int64_t output_tile_slot = tile_y * tiles_x + tile_x;
110  const int64_t source_tile_slot = tile_y * tiles_x + tile_x + source_tile_x_offset;
111  const int64_t pixels_per_tile = x_pixels_per_tile * y_pixels_per_tile;
112  const int64_t channel_pixels_per_tile = pixels_per_tile * num_raster_channels;
113  int64_t halo_x_start;
114  int64_t source_x_start;
115  int64_t halo_x_increment;
116  int64_t source_x_increment;
117  if (source_tile_x_offset > 0) {
118  // fill in right/east halo
119  halo_x_start = x_pixels_per_tile - 1;
120  halo_x_increment = -1;
121  source_x_start = 2 * halo_x_pixels_per_tile_boundary - 1;
122  source_x_increment = -1;
123  } else {
124  // fill in left/west halo
125  halo_x_start = 0;
126  halo_x_increment = 1;
127  source_x_start = x_pixels_per_tile - 2 * halo_x_pixels_per_tile_boundary + 1;
128  source_x_increment = 1;
129  }
130 
132  tbb::blocked_range<int64_t>(0, y_pixels_per_tile),
133  [&](const tbb::blocked_range<int64_t>& y_pixels_range) {
134  const int64_t start_local_y_pixel = y_pixels_range.begin();
135  const int64_t end_local_y_pixel = y_pixels_range.end();
136 
137  for (int64_t local_y_pixel = start_local_y_pixel;
138  local_y_pixel < end_local_y_pixel;
139  ++local_y_pixel) {
140  const int64_t output_starting_slot_for_halo_row =
141  output_tile_slot * channel_pixels_per_tile +
142  local_y_pixel * x_pixels_per_tile;
143  const int64_t source_starting_slot_for_halo_row =
144  source_tile_slot * channel_pixels_per_tile +
145  local_y_pixel * x_pixels_per_tile;
146 
147  for (int64_t halo_x_offset = 0; halo_x_offset < halo_x_pixels_per_tile_boundary;
148  ++halo_x_offset) {
149  const int64_t halo_x = halo_x_start + halo_x_offset * halo_x_increment;
150  const int64_t source_x = source_x_start + halo_x_offset * source_x_increment;
151  const int64_t output_starting_slot =
152  output_starting_slot_for_halo_row + halo_x;
153  const int64_t source_starting_slot =
154  source_starting_slot_for_halo_row + source_x;
155  output_data[output_starting_slot] = output_data[source_starting_slot];
156  output_data[output_starting_slot + pixels_per_tile] =
157  output_data[source_starting_slot + pixels_per_tile];
158  output_data[output_starting_slot + 2 * pixels_per_tile] =
159  output_data[source_starting_slot + 2 * pixels_per_tile];
160  }
161  }
162  });
163 }
164 
165 template <typename PixelType, typename ColorType, typename OutputType>
166 TEMPLATE_NOINLINE std::pair<std::vector<OutputType>, RasterInfo> format_raster_data(
167  const Column<PixelType>& raster_x,
168  const Column<PixelType>& raster_y,
169  const ColumnList<ColorType>& raster_channels,
170  const PixelType x_input_units_per_pixel,
171  const PixelType y_input_units_per_pixel,
172  const float max_color_val,
173  const int64_t x_pixels_per_tile,
174  const int64_t y_pixels_per_tile,
175  const int64_t tile_boundary_halo_pixels,
176  const int64_t target_batch_size_multiple,
177  std::shared_ptr<CpuTimer> timer) {
178  const double x_pixels_per_input_unit = 1.0 / x_input_units_per_pixel;
179  const double y_pixels_per_input_unit = 1.0 / y_input_units_per_pixel;
180  timer->start_event_timer("Compute raster metadata");
181  const auto x_pixel_range = get_column_min_max(raster_x);
182  const auto y_pixel_range = get_column_min_max(raster_y);
183 
184  const int64_t logical_x_pixels =
185  (x_pixel_range.second - x_pixel_range.first) * x_pixels_per_input_unit + 1;
186  const auto min_x_input_unit = x_pixel_range.first;
187 
188  const int64_t logical_y_pixels =
189  (y_pixel_range.second - y_pixel_range.first) * y_pixels_per_input_unit + 1;
190  const auto min_y_input_unit = y_pixel_range.first;
191 
192  int64_t x_tiles = 1;
193  int64_t y_tiles = 1;
194  int64_t halo_x_pixels_per_tile_boundary = 0;
195  int64_t halo_y_pixels_per_tile_boundary = 0;
196  int64_t logical_x_pixels_per_tile = x_pixels_per_tile;
197  int64_t logical_y_pixels_per_tile = y_pixels_per_tile;
198  if (logical_x_pixels > x_pixels_per_tile || logical_y_pixels > y_pixels_per_tile) {
199  // We need to consider padding if we cannot fit in single buffer
200  halo_x_pixels_per_tile_boundary =
201  std::min(tile_boundary_halo_pixels, (x_pixels_per_tile - 1) / 2);
202  halo_y_pixels_per_tile_boundary =
203  std::min(tile_boundary_halo_pixels, (y_pixels_per_tile - 1) / 2);
204  logical_x_pixels_per_tile -= halo_x_pixels_per_tile_boundary * 2;
205  logical_y_pixels_per_tile -= halo_y_pixels_per_tile_boundary * 2;
206  x_tiles =
207  (logical_x_pixels + (logical_x_pixels_per_tile - 1)) / logical_x_pixels_per_tile;
208  y_tiles =
209  (logical_y_pixels + (logical_y_pixels_per_tile - 1)) / logical_y_pixels_per_tile;
210  }
211 
212  const int64_t num_input_pixels = raster_x.size();
213  const int64_t num_raster_channels = raster_channels.numCols();
214 
215  const float normalize_ratio = 1.0f / max_color_val;
216  const int64_t num_tiles = x_tiles * y_tiles;
217  const int64_t batch_tiles =
218  num_tiles == 1 ? 1
219  : (num_tiles + target_batch_size_multiple - 1) /
220  target_batch_size_multiple * target_batch_size_multiple;
221  const int64_t batch_pixels = batch_tiles * x_pixels_per_tile * y_pixels_per_tile;
222  std::vector<OutputType> output_data(batch_pixels * num_raster_channels,
223  static_cast<OutputType>(0));
224  const RasterInfo raster_info{num_raster_channels,
225  x_pixels_per_tile,
226  y_pixels_per_tile,
227  halo_x_pixels_per_tile_boundary,
228  halo_y_pixels_per_tile_boundary,
229  logical_x_pixels_per_tile,
230  logical_y_pixels_per_tile,
231  x_tiles,
232  y_tiles,
233  batch_tiles,
234  static_cast<double>(x_input_units_per_pixel),
235  static_cast<double>(y_input_units_per_pixel),
236  static_cast<double>(min_x_input_unit),
237  static_cast<double>(min_y_input_unit)};
238 
239  timer->start_event_timer("Format logical pixels");
240  const int64_t pixels_per_tile = x_pixels_per_tile * y_pixels_per_tile;
241  if (x_tiles == 1 && y_tiles == 1) {
243  tbb::blocked_range<int64_t>(0, num_input_pixels),
244  [&](const tbb::blocked_range<int64_t>& input_pixel_range) {
245  const int64_t end_input_pixel_idx = input_pixel_range.end();
246  for (int64_t input_pixel_idx = input_pixel_range.begin();
247  input_pixel_idx < end_input_pixel_idx;
248  ++input_pixel_idx) {
249  const int64_t output_pixel_x = round(
250  (raster_x[input_pixel_idx] - min_x_input_unit) * x_pixels_per_input_unit);
251  const int64_t output_pixel_y = round(
252  (raster_y[input_pixel_idx] - min_y_input_unit) * y_pixels_per_input_unit);
253  const int64_t output_starting_slot =
254  output_pixel_y * x_pixels_per_tile + output_pixel_x;
255  for (int64_t channel_idx = 0; channel_idx < num_raster_channels;
256  ++channel_idx) {
257  output_data[output_starting_slot + channel_idx * pixels_per_tile] =
258  std::min(
259  static_cast<float>(raster_channels[channel_idx][input_pixel_idx]) *
260  normalize_ratio,
261  1.0f);
262  }
263  }
264  });
265  } else {
266  const int64_t channel_pixels_per_tile = pixels_per_tile * num_raster_channels;
268  tbb::blocked_range<int64_t>(0, num_input_pixels),
269  [&](const tbb::blocked_range<int64_t>& input_pixel_range) {
270  const int64_t end_input_pixel_idx = input_pixel_range.end();
271  for (int64_t input_pixel_idx = input_pixel_range.begin();
272  input_pixel_idx < end_input_pixel_idx;
273  ++input_pixel_idx) {
274  const int64_t logical_output_pixel_x = round(
275  (raster_x[input_pixel_idx] - min_x_input_unit) * x_pixels_per_input_unit);
276  const int64_t logical_output_pixel_y = round(
277  (raster_y[input_pixel_idx] - min_y_input_unit) * y_pixels_per_input_unit);
278  const int64_t output_tile_x =
279  logical_output_pixel_x / logical_x_pixels_per_tile;
280  const int64_t output_tile_y =
281  logical_output_pixel_y / logical_y_pixels_per_tile;
282  const int64_t local_output_pixel_x =
283  (logical_output_pixel_x % logical_x_pixels_per_tile) +
284  halo_x_pixels_per_tile_boundary;
285  const int64_t local_output_pixel_y =
286  (logical_output_pixel_y % logical_y_pixels_per_tile) +
287  halo_y_pixels_per_tile_boundary;
288  const int64_t output_tile_slot = output_tile_y * x_tiles + output_tile_x;
289  const int64_t output_starting_slot =
290  output_tile_slot * channel_pixels_per_tile +
291  local_output_pixel_y * x_pixels_per_tile + local_output_pixel_x;
292  for (int64_t channel_idx = 0; channel_idx < num_raster_channels;
293  ++channel_idx) {
294  output_data[output_starting_slot + channel_idx * pixels_per_tile] =
295  std::min(
296  static_cast<float>(raster_channels[channel_idx][input_pixel_idx]) *
297  normalize_ratio,
298  1.0f);
299  }
300  }
301  });
302 
303  // Now fill in halos
304  if (halo_x_pixels_per_tile_boundary > 0 || halo_y_pixels_per_tile_boundary > 0) {
305  timer->start_event_timer("Add halos");
306  for (int64_t tile_y = 0; tile_y < y_tiles; ++tile_y) {
307  for (int64_t tile_x = 0; tile_x < x_tiles; ++tile_x) {
308  for (int64_t offset = -1; offset < 1; offset += 2) {
309  fill_row_halo(output_data,
310  tile_x,
311  tile_y,
312  x_tiles,
313  y_tiles,
314  halo_y_pixels_per_tile_boundary,
315  x_pixels_per_tile,
316  y_pixels_per_tile,
317  num_raster_channels,
318  offset);
319  }
320  }
321  }
322  for (int64_t tile_y = 0; tile_y < y_tiles; ++tile_y) {
323  for (int64_t tile_x = 0; tile_x < x_tiles; ++tile_x) {
324  fill_col_halo(output_data,
325  tile_x,
326  tile_y,
327  x_tiles,
328  y_tiles,
329  halo_x_pixels_per_tile_boundary,
330  x_pixels_per_tile,
331  y_pixels_per_tile,
332  num_raster_channels,
333  -1);
334  fill_col_halo(output_data,
335  tile_x,
336  tile_y,
337  x_tiles,
338  y_tiles,
339  halo_y_pixels_per_tile_boundary,
340  x_pixels_per_tile,
341  y_pixels_per_tile,
342  num_raster_channels,
343  1);
344  }
345  }
346  }
347  }
348  return std::make_pair(output_data, raster_info);
349 }
350 
351 } // namespace RasterFormat_Namespace
352 
353 #endif // #ifdef __CUDACC__
NEVER_INLINE HOST std::pair< T, T > get_column_min_max(const Column< T > &col)
DEVICE int64_t size() const
TEMPLATE_NOINLINE void fill_row_halo(std::vector< OutputType > &output_data, const int64_t tile_x, const int64_t tile_y, const int64_t tiles_x, const int64_t tiles_y, const int64_t halo_y_pixels_per_tile_boundary, const int64_t x_pixels_per_tile, const int64_t y_pixels_per_tile, const int64_t num_raster_channels, const int64_t source_tile_y_offset)
DEVICE int64_t numCols() const
TEMPLATE_NOINLINE void fill_col_halo(std::vector< OutputType > &output_data, const int64_t tile_x, const int64_t tile_y, const int64_t tiles_x, const int64_t tiles_y, const int64_t halo_x_pixels_per_tile_boundary, const int64_t x_pixels_per_tile, const int64_t y_pixels_per_tile, const int64_t num_raster_channels, const int64_t source_tile_x_offset)
TEMPLATE_NOINLINE std::pair< std::vector< OutputType >, RasterInfo > format_raster_data(const Column< PixelType > &raster_x, const Column< PixelType > &raster_y, const ColumnList< ColorType > &raster_channels, const PixelType x_input_units_per_pixel, const PixelType y_input_units_per_pixel, const float max_color_val, const int64_t x_pixels_per_tile, const int64_t y_pixels_per_tile, const int64_t tile_boundary_halo_pixels, const int64_t target_batch_size_multiple, std::shared_ptr< CpuTimer > timer)
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
#define TEMPLATE_NOINLINE
Definition: heavydbTypes.h:60