OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RasterImporter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file RasterImporter.cpp
19  * @brief GDAL Raster File Importer
20  *
21  */
22 
24 
25 #include <boost/algorithm/string.hpp>
26 #include <boost/filesystem.hpp>
27 #include <boost/tokenizer.hpp>
28 
29 #include <xercesc/dom/DOMDocument.hpp>
30 #include <xercesc/dom/DOMElement.hpp>
31 #include <xercesc/dom/DOMNodeList.hpp>
32 #include <xercesc/framework/MemBufInputSource.hpp>
33 #include <xercesc/parsers/XercesDOMParser.hpp>
34 
35 #include <gdal.h>
36 #include <gdal_alg.h>
37 #include <ogrsf_frmts.h>
38 
39 #include "Shared/import_helpers.h"
40 #include "Shared/scope.h"
41 
42 #define DEBUG_RASTER_IMPORT 0
43 
44 namespace import_export {
45 
46 GCPTransformer::GCPTransformer(OGRDataSource* datasource, const Mode mode)
47  : transform_arg_{nullptr}, mode_{mode} {
48  CHECK(datasource);
49  auto const gcp_count = datasource->GetGCPCount();
50  auto const* gcp_list = datasource->GetGCPs();
51  switch (mode_) {
52  case Mode::kPolynomial: {
53  static constexpr int kPolynomialOrder = 2;
54  transform_arg_ =
55  GDALCreateGCPTransformer(gcp_count, gcp_list, kPolynomialOrder, false);
56  break;
57  }
58  case Mode::kThinPlateSpline: {
59  transform_arg_ = GDALCreateTPSTransformer(gcp_count, gcp_list, false);
60  break;
61  }
62  }
63  CHECK(transform_arg_);
64 }
65 
67  switch (mode_) {
68  case Mode::kPolynomial:
69  GDALDestroyGCPTransformer(transform_arg_);
70  break;
72  GDALDestroyTPSTransformer(transform_arg_);
73  break;
74  }
75 }
76 
77 void GCPTransformer::transform(double& x, double& y) {
78  int success{0};
79  switch (mode_) {
80  case Mode::kPolynomial:
81  GDALGCPTransform(transform_arg_, false, 1, &x, &y, nullptr, &success);
82  break;
84  GDALTPSTransform(transform_arg_, false, 1, &x, &y, nullptr, &success);
85  break;
86  }
87  if (!success) {
88  throw std::runtime_error("Failed GCP/TPS Transform");
89  }
90 }
91 
92 namespace {
93 
94 // for these conversions, consider that we do not have unsigned integer SQLTypes
95 // we must therefore promote Byte to SMALLINT, UInt16 to INT, and UInt32 to BIGINT
96 
97 SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type) {
98  switch (gdal_data_type) {
99  case GDT_Byte:
100  case GDT_Int16:
101  return kSMALLINT;
102  case GDT_UInt16:
103  case GDT_Int32:
104  return kINT;
105  case GDT_UInt32:
106  return kBIGINT;
107  case GDT_Float32:
108  return kFLOAT;
109  case GDT_Float64:
110  return kDOUBLE;
111  case GDT_CInt16:
112  case GDT_CInt32:
113  case GDT_CFloat32:
114  case GDT_CFloat64:
115  default:
116  break;
117  }
118  throw std::runtime_error("Unknown/unsupported GDAL data type: " +
119  std::to_string(gdal_data_type));
120 }
121 
122 GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type) {
123  switch (sql_type) {
124  case kTINYINT:
125  case kSMALLINT:
126  return GDT_Int16;
127  case kINT:
128  return GDT_Int32;
129  case kBIGINT:
130  return GDT_UInt32;
131  case kFLOAT:
132  return GDT_Float32;
133  case kDOUBLE:
134  return GDT_Float64;
135  default:
136  break;
137  }
138  throw std::runtime_error("Unknown/unsupported SQL type: " + to_string(sql_type));
139 }
140 
141 std::vector<std::string> get_ome_tiff_band_names(
142  const std::string& tifftag_imagedescription) {
143  // expected schema:
144  //
145  // ...
146  // <Image ...>
147  // <Pixels ...>
148  // <Channel ID="Channel:0:<index>" Name="<name>" ...>
149  // ...
150  // </Channel>
151  // ...
152  // </Pixels>
153  // </Image>
154  // ...
155 
156  using Document = xercesc_3_2::DOMDocument;
157  using Element = xercesc_3_2::DOMElement;
158  using String = xercesc_3_2::XMLString;
159  using Utils = xercesc_3_2::XMLPlatformUtils;
160  using Parser = xercesc_3_2::XercesDOMParser;
161  using Source = xercesc_3_2::MemBufInputSource;
162 
163  Utils::Initialize();
164 
165  std::unordered_map<std::string, XMLCh*> tags;
166 
167  ScopeGuard release_tags = [&] {
168  for (auto& tag : tags) {
169  String::release(&tag.second);
170  }
171  };
172 
173  tags.emplace("ID", String::transcode("ID"));
174  tags.emplace("Name", String::transcode("Name"));
175  tags.emplace("Buffer", String::transcode("Buffer"));
176  tags.emplace("Image", String::transcode("Image"));
177  tags.emplace("Pixels", String::transcode("Pixels"));
178  tags.emplace("Channel", String::transcode("Channel"));
179 
180  auto get_tag = [&](const std::string& name) -> const XMLCh* {
181  return tags.find(name)->second;
182  };
183 
184  Parser parser;
185 
186  parser.setValidationScheme(Parser::Val_Never);
187  parser.setDoNamespaces(false);
188  parser.setDoSchema(false);
189  parser.setLoadExternalDTD(false);
190 
191  Source source(reinterpret_cast<const XMLByte*>(tifftag_imagedescription.c_str()),
192  tifftag_imagedescription.length(),
193  get_tag("Buffer"));
194 
195  parser.parse(source);
196 
197  std::vector<std::string> band_names;
198 
199  auto const* document = parser.getDocument();
200  if (document) {
201  auto const* root_element = document->getDocumentElement();
202  if (root_element) {
203  auto const* image_list = root_element->getElementsByTagName(get_tag("Image"));
204  if (image_list && image_list->getLength() > 0) {
205  auto const* image_element = dynamic_cast<const Element*>(image_list->item(0));
206  auto const* pixels_list = image_element->getElementsByTagName(get_tag("Pixels"));
207  if (pixels_list && pixels_list->getLength() > 0) {
208  auto const* pixels_element = dynamic_cast<const Element*>(pixels_list->item(0));
209  auto const* channel_list =
210  pixels_element->getElementsByTagName(get_tag("Channel"));
211  for (XMLSize_t i = 0; i < channel_list->getLength(); i++) {
212  auto const* channel_element =
213  dynamic_cast<const Element*>(channel_list->item(i));
214  auto const* name_attribute = channel_element->getAttribute(get_tag("Name"));
215  if (name_attribute) {
216  auto const* name = String::transcode(name_attribute);
217  if (name) {
218  band_names.push_back(name);
219  }
220  }
221  }
222  }
223  }
224  }
225  }
226 
227  return band_names;
228 }
229 
231  switch (point_type) {
233  return kSMALLINT;
235  return kINT;
237  return kFLOAT;
239  return kDOUBLE;
241  return kPOINT;
242  default:
243  CHECK(false);
244  }
245  return kNULLT;
246 }
247 
248 double conv_4326_900913_x(const double x) {
249  return x * 111319.490778;
250 }
251 
252 double conv_4326_900913_y(const double y) {
253  return 6378136.99911 * log(tan(.00872664626 * y + .785398163397));
254 }
255 
256 } // namespace
257 
258 //
259 // class RasterImporter
260 //
261 
262 void RasterImporter::detect(const std::string& file_name,
263  const std::string& specified_band_names,
264  const std::string& specified_band_dimensions,
265  const PointType point_type,
266  const PointTransform point_transform,
267  const bool point_compute_angle,
268  const bool throw_on_error,
269  const MetadataColumnInfos& metadata_column_infos) {
270  // open base file to check for subdatasources
271  bool has_spatial_reference{false};
272  {
273  // prepare to open
274  // open the file
277  if (datasource == nullptr) {
278  throw std::runtime_error("Raster Importer: Unable to open raster file " +
279  file_name);
280  }
281 
282 #if DEBUG_RASTER_IMPORT
283  // log all its metadata
284  Geospatial::GDAL::logMetadata(datasource);
285 #endif
286 
287  // get and add subdatasource datasource names
288  auto const subdatasources =
289  Geospatial::GDAL::unpackMetadata(datasource->GetMetadata("SUBDATASETS"));
290  for (auto const& subdatasource : subdatasources) {
291  auto const name_equals = subdatasource.find("_NAME=");
292  if (name_equals != std::string::npos) {
293  auto subdatasource_name =
294  subdatasource.substr(name_equals + 6, std::string::npos);
296  << "DEBUG: Found subdatasource '" << subdatasource_name << "'";
297  datasource_names_.push_back(subdatasource_name);
298  }
299  }
300 
301  // note if it has a spatial reference (of either type)
302  if (datasource->GetSpatialRef()) {
303  has_spatial_reference = true;
304  LOG_IF(INFO, DEBUG_RASTER_IMPORT) << "DEBUG: Found regular Spatial Reference";
305  } else if (datasource->GetGCPSpatialRef()) {
306  auto const num_points = datasource->GetGCPCount();
308  << "DEBUG: Found GCP Spatial Reference with " << num_points << " GCPs";
309  has_spatial_reference = (num_points > 0);
310  }
311 
312  // fetch
313  getRawBandNamesForFormat(datasource);
314  }
315 
316  // if we didn't find any subdatasources, just use the base file
317  if (datasource_names_.size() == 0) {
319  << "DEBUG: No sub-datasources found, just using '" << file_name << "'";
320  datasource_names_.push_back(file_name);
321  }
322 
323  // auto point transform
324  if (point_transform == PointTransform::kAuto) {
326  has_spatial_reference ? PointTransform::kWorld : PointTransform::kNone;
327  } else {
328  point_transform_ = point_transform;
329  }
330 
331  // auto point type
332  bool optimize_to_smallint = false;
333  if (point_type == PointType::kAuto) {
336  optimize_to_smallint = true;
337  } else {
339  }
340  } else {
341  point_type_ = point_type;
342  }
343 
344  std::set<std::pair<int, int>> found_dimensions;
345 
346  // lambda to process a datasource
347  auto process_datasource = [&](const Geospatial::GDAL::DataSourceUqPtr& datasource,
348  const uint32_t datasource_idx) {
349  auto raster_count = datasource->GetRasterCount();
350  if (raster_count == 0) {
351  throw std::runtime_error("Raster Importer: Raster file " + file_name +
352  " has no rasters");
353  }
354 
355  // for each band (1-based index)
356  for (int i = 1; i <= raster_count; i++) {
357  // get band name
358  auto band_name = getBandName(datasource_idx, i);
359 
360  // if there are specified band names, and this isn't one of them, skip
361  if (!shouldImportBandWithName(band_name)) {
362  continue;
363  }
364 
365  // get the band
366  auto band = datasource->GetRasterBand(i);
367  CHECK(band);
368 
369  // get band dimensions
370  auto const band_width = band->GetXSize();
371  auto const band_height = band->GetYSize();
372  int block_size_x, block_size_y;
373  band->GetBlockSize(&block_size_x, &block_size_y);
374 
375  // report
376  LOG(INFO) << "Raster Importer: Found Band '" << band_name << "', with dimensions "
377  << band_width << "x" << band_height;
378 
379  // if there are specified band dimensions, and this band doesn't match, skip
380  if (!shouldImportBandWithDimensions(band_width, band_height)) {
381  continue;
382  }
383 
384  // add to found dimensions
385  found_dimensions.insert({band_width, band_height});
386 
387  // get SQL type
388  auto sql_type = gdal_data_type_to_sql_type(band->GetRasterDataType());
389 
390  // get null value and validity
391  int null_value_valid{0};
392  auto null_value = band->GetNoDataValue(&null_value_valid);
393 
394  // store info
395  if (specified_band_names_map_.size()) {
396  // find and finalize existing info for this band
397  auto itr =
398  std::find_if(import_band_infos_.begin(),
399  import_band_infos_.end(),
400  [&](ImportBandInfo& info) { return info.name == band_name; });
401  CHECK(itr != import_band_infos_.end());
402  itr->datasource_idx = datasource_idx;
403  itr->band_idx = i;
404  itr->sql_type = sql_type;
405  itr->null_value = null_value;
406  itr->null_value_valid = (null_value_valid != 0);
407  } else {
408  // import all bands
409  import_band_infos_.push_back({band_name,
410  band_name,
411  sql_type,
412  datasource_idx,
413  i,
414  null_value,
415  null_value_valid != 0});
416  }
417  }
418  };
419 
420  // initialize filtering
422  specified_band_names, specified_band_dimensions, metadata_column_infos);
423 
424  // process datasources
425  uint32_t datasource_idx{0u};
426  std::vector<std::string> valid_datasource_names;
427  for (auto const& datasource_name : datasource_names_) {
428  // open it
429  Geospatial::GDAL::DataSourceUqPtr datasource_handle =
430  Geospatial::GDAL::openDataSource(datasource_name,
432 
433  // did it open?
434  if (datasource_handle == nullptr) {
435  continue;
436  } else {
437  valid_datasource_names.push_back(datasource_name);
438  }
439 
440  // process it
441  process_datasource(datasource_handle, datasource_idx++);
442  }
443 
444  // check dimensions
445  if (found_dimensions.size() > 1u) {
446  // report
447  LOG(WARNING) << "Raster Importer: Dimensions found as follows:";
448  for (auto const& dimension : found_dimensions) {
449  LOG(WARNING) << "Raster Importer: " << dimension.first << "x" << dimension.second;
450  }
451  if (throw_on_error) {
452  throw std::runtime_error("Raster Importer: Raster file '" + file_name +
453  "' datasource/band dimensions are inconsistent. This file "
454  "cannot be imported into a single table.");
455  } else {
456  LOG(WARNING) << "Raster Importer: Raster file '" << file_name
457  << "' datasource/band dimensions are inconsistent. This file "
458  "cannot be imported into a single table.";
459  }
460  } else if (found_dimensions.size() == 1u) {
461  bands_width_ = found_dimensions.begin()->first;
462  bands_height_ = found_dimensions.begin()->second;
463  LOG(INFO) << "Raster Importer: Importing dimension " << bands_width_ << "x"
464  << bands_height_;
465  }
466 
467  // report if we found nothing
468  if (import_band_infos_.size() == 0 || found_dimensions.size() == 0u) {
469  if (throw_on_error) {
470  throw std::runtime_error("Raster Importer: Raster file " + file_name +
471  " has no importable bands");
472  } else {
473  LOG(ERROR) << "Raster Importer: Raster file " << file_name
474  << " has no importable bands";
475  }
476  }
477 
478  // report any invalid datasources and keep only the valid ones
479  // the datasource indices stored in the infos will match the valid ones
480  auto const failed_datasource_count =
481  datasource_names_.size() - valid_datasource_names.size();
482  if (failed_datasource_count) {
483  LOG(WARNING) << "Raster Importer: Failed to open " << failed_datasource_count
484  << " out of " << std::to_string(datasource_names_.size())
485  << " datasources";
486  }
487  datasource_names_ = valid_datasource_names;
488 
489  // fail if any specified import band names were not found
491 
492  // optimize point_type for small rasters
493  if (optimize_to_smallint && (bands_width_ <= std::numeric_limits<int16_t>::max() &&
494  bands_height_ <= std::numeric_limits<int16_t>::max())) {
496  }
497 
498  // capture this
499  point_compute_angle_ = point_compute_angle;
500 
501  // validate final point type/transform
502  if (!has_spatial_reference && point_transform_ == PointTransform::kWorld) {
503  throw std::runtime_error(
504  "Raster Importer: Raster file has no geo-spatial reference metadata, unable to "
505  "transform points to World Space. Use raster_point_transform='none|file' "
506  "instead.");
507  }
510  throw std::runtime_error(
511  "Raster Importer: Cannot do World Transform with SMALLINT/INT Point type");
512  }
513  } else if (point_type_ == PointType::kPoint) {
515  throw std::runtime_error(
516  "Raster Importer: Must do World Transform with POINT Point type");
517  }
518  }
520  (bands_width_ > std::numeric_limits<int16_t>::max() ||
521  bands_height_ > std::numeric_limits<int16_t>::max())) {
522  throw std::runtime_error(
523  "Raster Importer: Raster file '" + file_name +
524  "' has band dimensions too large for 'SMALLINT' raster_point_type (" +
526  }
528  throw std::runtime_error(
529  "Raster Importer: Must do World Transform with raster_point_compute_angle");
530  }
531 }
532 
533 void RasterImporter::import(const uint32_t max_threads) {
534  // validate
535  if (import_band_infos_.size() == 0) {
536  throw std::runtime_error("Raster Import aborted. No bands to import.");
537  }
538 
539  // validate
540  CHECK_GE(max_threads, 1u);
541 
542  // open all datasources on all threads
543  for (auto const& datasource_name : datasource_names_) {
544  std::vector<Geospatial::GDAL::DataSourceUqPtr> datasource_thread_handles;
545  for (uint32_t i = 0; i < max_threads; i++) {
546  auto datasource_handle = Geospatial::GDAL::openDataSource(
547  datasource_name, import_export::SourceType::kRasterFile);
548  if (datasource_handle == nullptr) {
549  throw std::runtime_error("Raster Importer: Unable to open raster file " +
550  datasource_name);
551  }
552  datasource_thread_handles.emplace_back(std::move(datasource_handle));
553  }
554  datasource_handles_.emplace_back(std::move(datasource_thread_handles));
555  }
556 
557  // use handle for the first datasource from the first thread to read the globals
558  auto const& global_datasource_handle = datasource_handles_[0][0];
559 
560  // get the raster affine transform
561  // this converts the points from pixel space to the file coordinate system space
562  global_datasource_handle->GetGeoTransform(affine_transform_matrix_.data());
563 
564  // transform logic
565  // @TODO(se) discuss!
566  // if the raster_point_type is SMALLINT or INT, we just store pixel X/Y
567  // if the raster_point_type is FLOAT, DOUBLE, or POINT, we store projected X/Y
568  // this will either be in the file coordinate space (raster_point_transform='affine')
569  // or in world space (raster_point_transform='default')
570  // yeah, that's a mess, but I need to sleep on it
571 
572  // determine the final desired SRID
573  // the first column will either be a scalar of some type or a POINT
574  // FLOAT or DOUBLE can support world space coords (so assume 4326)
575  // POINT can be anything and can define an SRID (but assume 4326 for now)
576  int srid{0};
577  switch (point_type_) {
578  case PointType::kNone:
580  case PointType::kInt:
581  break;
582  case PointType::kFloat:
583  case PointType::kDouble:
584  case PointType::kPoint: {
585  srid = 4326;
586  } break;
587  case PointType::kAuto:
588  CHECK(false);
589  }
590 
591  // create a world-space coordinate transformation for the points?
592  if (srid != 0 && point_transform_ == PointTransform::kWorld) {
593  // get the file's spatial reference, if it has one (only geo files will)
594  // also determine if we need to do additional GCP transformations
595  bool need_gcp_transformers{false};
596  auto const* spatial_reference = global_datasource_handle->GetSpatialRef();
597  if (!spatial_reference) {
598  spatial_reference = global_datasource_handle->GetGCPSpatialRef();
599  if (spatial_reference) {
600  need_gcp_transformers = true;
601  }
602  }
603  if (spatial_reference) {
604  // if it's valid, create a transformation to use on the points
605  // make a spatial reference for the desired SRID
606  OGRSpatialReference sr_geometry;
607  auto import_err = sr_geometry.importFromEPSG(srid);
608  if (import_err != OGRERR_NONE) {
609  throw std::runtime_error("Failed to create spatial reference for EPSG " +
610  std::to_string(srid));
611  }
612 #if GDAL_VERSION_MAJOR >= 3
613  sr_geometry.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
614 #endif
615 
616  for (uint32_t i = 0; i < max_threads; i++) {
617  coordinate_transformations_.emplace_back(
618  OGRCreateCoordinateTransformation(spatial_reference, &sr_geometry));
619  if (!coordinate_transformations_.back()) {
620  throw std::runtime_error(
621  "Failed to create coordinate system transformation to EPSG " +
622  std::to_string(srid));
623  }
624  if (need_gcp_transformers) {
625  gcp_transformers_.emplace_back(
626  new GCPTransformer(global_datasource_handle.get()));
627  }
628  }
629  }
630  }
631 }
632 
633 const uint32_t RasterImporter::getNumBands() const {
634  return import_band_infos_.size();
635 }
636 
638  NamesAndSQLTypes names_and_sql_types;
639  if (point_type_ != PointType::kNone) {
640  auto sql_type = point_type_to_sql_type(point_type_);
643  names_and_sql_types.emplace_back("raster_point", sql_type);
644  } else {
645  names_and_sql_types.emplace_back("raster_lon", sql_type);
646  names_and_sql_types.emplace_back("raster_lat", sql_type);
647  }
648  if (point_compute_angle_) {
649  names_and_sql_types.emplace_back("raster_angle", kFLOAT);
650  }
651  } else {
652  names_and_sql_types.emplace_back("raster_x", sql_type);
653  names_and_sql_types.emplace_back("raster_y", sql_type);
654  }
655  }
656  return names_and_sql_types;
657 }
658 
660  return point_transform_;
661 }
662 
664  NamesAndSQLTypes names_and_sql_types;
665  // return alt names
666  for (auto const& info : import_band_infos_) {
667  names_and_sql_types.emplace_back(info.alt_name, info.sql_type);
668  }
669  return names_and_sql_types;
670 }
671 
673  const int band_idx) const {
674  CHECK_LT(static_cast<uint32_t>(band_idx), import_band_infos_.size());
675  auto const& info = import_band_infos_[band_idx];
676  return {info.null_value, info.null_value_valid};
677 }
678 
680  const uint32_t thread_idx,
681  const int y) const {
682  Coords coords(bands_width_);
683 
684  double prev_mx{0.0}, prev_my{0.0};
685 
686  for (int x = 0; x < bands_width_; x++) {
687  // start with the pixel coord
688  double dx = double(x);
689  double dy = double(y);
690 
691  // transforms
693  // affine transform to the file coordinate space
694  double fdx = affine_transform_matrix_[0] + (dx * affine_transform_matrix_[1]) +
695  (dy * affine_transform_matrix_[2]);
696  double fdy = affine_transform_matrix_[3] + (dx * affine_transform_matrix_[4]) +
697  (dy * affine_transform_matrix_[5]);
698  dx = fdx;
699  dy = fdy;
700 
701  // do geo-spatial transform if we can (otherwise leave alone)
703  // first any GCP transformation
704  if (thread_idx < gcp_transformers_.size()) {
705  gcp_transformers_[thread_idx]->transform(dx, dy);
706  }
707  // then any additional transformation to world space
708  if (thread_idx < coordinate_transformations_.size()) {
709  int success{0};
710  coordinate_transformations_[thread_idx]->Transform(
711  1, &dx, &dy, nullptr, &success);
712  if (!success) {
713  throw std::runtime_error("Failed OGRCoordinateTransform");
714  }
715  }
716  }
717  }
718 
719  // store
720  coords[x] = {dx, dy, 0.0f};
721 
722  // compute and overwrite angle?
723  if (point_compute_angle_) {
724  if (x == 0) {
725  // capture first pixel's Mercator coords
726  prev_mx = conv_4326_900913_x(dx);
727  prev_my = conv_4326_900913_y(dy);
728  } else {
729  // compute angle between this pixel's Mercator coords and the previous
730  // expressed as clockwise degrees for symbol rotation
731  auto const mx = conv_4326_900913_x(dx);
732  auto const my = conv_4326_900913_y(dy);
733  auto const angle = atan2f(my - prev_my, mx - prev_mx) * (-180.0f / M_PI);
734  prev_mx = mx;
735  prev_my = my;
736 
737  // overwrite this angle (and that of the first pixel, if this is the second)
738  std::get<2>(coords[x]) = angle;
739  if (x == 1) {
740  std::get<2>(coords[0]) = angle;
741  }
742  }
743  }
744  }
745 
746  // done
747  return coords;
748 }
749 
750 void RasterImporter::getRawPixels(const uint32_t thread_idx,
751  const uint32_t band_idx,
752  const int y_start,
753  const int num_rows,
754  const SQLTypes column_sql_type,
755  RawPixels& raw_pixel_bytes) {
756  // get the band info
757  CHECK_LT(band_idx, import_band_infos_.size());
758  auto const band_info = import_band_infos_[band_idx];
759 
760  // get the dataset handle for this dataset and thread
761  CHECK_LT(band_info.datasource_idx, datasource_handles_.size());
762  auto const& datasource_handles_per_thread =
763  datasource_handles_[band_info.datasource_idx];
764  CHECK_LT(thread_idx, datasource_handles_per_thread.size());
765  auto const& datasource_handle = datasource_handles_per_thread[thread_idx];
766  CHECK(datasource_handle);
767 
768  // get the band
769  auto* band = datasource_handle->GetRasterBand(band_info.band_idx);
770  CHECK(band);
771 
772  // translate the requested data type
773  auto const gdal_data_type = sql_type_to_gdal_data_type(column_sql_type);
774 
775  // read the scanlines
776  auto result = band->RasterIO(GF_Read,
777  0,
778  y_start,
779  bands_width_,
780  num_rows,
781  raw_pixel_bytes.data(),
782  bands_width_,
783  num_rows,
784  gdal_data_type,
785  0,
786  0,
787  nullptr);
788  CHECK_EQ(result, CE_None);
789 }
790 
791 //
792 // private
793 //
794 
796  const Geospatial::GDAL::DataSourceUqPtr& datasource) {
797  // get the name of the driver that GDAL picked to open the file
798  std::string driver_name = datasource->GetDriverName();
799 
800  LOG(INFO) << "Raster Importer: Using Raster Driver '" << driver_name << "'";
801 
802  // logic is different for each format
803  if (driver_name == "GTiff") {
804  //
805  // TIFF
806  // Could be an OME TIFF or a GeoTIFF
807  //
808  auto const tifftag_imagedescription = Geospatial::GDAL::getMetadataString(
809  datasource->GetMetadata(), "TIFFTAG_IMAGEDESCRIPTION");
810  if (tifftag_imagedescription.length()) {
811  //
812  // OME TIFF
813  // one datasource per band
814  // names are in a JSON blob
815  //
816  auto const names = get_ome_tiff_band_names(tifftag_imagedescription);
817  for (auto const& name : names) {
818  raw_band_names_.push_back({name});
819  }
820  } else {
821  //
822  // Some other GeoTIFF variant
823  // single datasource
824  // names in band descriptions?
825  //
826  std::vector<std::string> names;
827  auto const raster_count = datasource->GetRasterCount();
828  for (int i = 1; i <= raster_count; i++) {
829  auto* band = datasource->GetRasterBand(i);
830  CHECK(band);
831  auto const* description = band->GetDescription();
832  if (!description || strlen(description) == 0) {
833  raw_band_names_.clear();
834  return;
835  }
836  names.push_back(description);
837  }
838  raw_band_names_.emplace_back(std::move(names));
839  }
840  } else if (driver_name == "netCDF" || driver_name == "Zarr") {
841  //
842  // for these formats the band names are in the datasource names
843  // that we already obtained, of the format:
844  // <FORMAT>:"<filename>":<bandname>
845  // one band per datasource
846  //
847  for (auto const& datasource_name : datasource_names_) {
848  std::vector<std::string> tokens;
849  boost::split(tokens, datasource_name, boost::is_any_of(":"));
850  if (tokens.size() < 3 || tokens[2].length() == 0u) {
851  LOG(WARNING) << "Raster Importer: Failed to parse band name from datasource name";
852  raw_band_names_.clear();
853  return;
854  }
855  raw_band_names_.push_back({tokens[2]});
856  }
857  } else if (driver_name == "GRIB") {
858  //
859  // GRIB/GRIB2
860  // one datasource
861  // names are in the per-band metadata
862  //
863  std::vector<std::string> names;
864  auto const raster_count = datasource->GetRasterCount();
865  for (int i = 1; i <= raster_count; i++) {
866  auto* band = datasource->GetRasterBand(i);
867  CHECK(band);
868  auto const grib_comment =
869  Geospatial::GDAL::getMetadataString(band->GetMetadata(), "GRIB_COMMENT");
870  if (grib_comment.length() == 0) {
871  LOG(WARNING) << "Raster Importer: Failed to parse band name from GRIB_COMMENT";
872  raw_band_names_.clear();
873  return;
874  }
875  names.push_back(grib_comment);
876  }
877  raw_band_names_.emplace_back(std::move(names));
878  }
879 }
880 
882  const std::string& specified_band_names,
883  const std::string& specified_band_dimensions,
884  const MetadataColumnInfos& metadata_column_infos) {
885  // specified names?
886  if (specified_band_names.length()) {
887  // tokenize names
888  std::vector<std::string> names_and_alt_names;
889  boost::split(names_and_alt_names, specified_band_names, boost::is_any_of(","));
890 
891  // pre-populate infos (in given order) and found map
892  for (auto const& name_and_alt_name : names_and_alt_names) {
893  // parse for optional alt name
894  std::vector<std::string> tokens;
895  boost::split(tokens, name_and_alt_name, boost::is_any_of("="));
896  if (tokens.size() < 1 || tokens.size() > 2) {
897  throw std::runtime_error("Failed to parse specified name '" + name_and_alt_name +
898  "'");
899  }
900  auto const& name = strip(tokens[0]);
901  auto const& alt_name = strip(tokens[tokens.size() == 2 ? 1 : 0]);
902  import_band_infos_.push_back({name, alt_name, kNULLT, 0u, 0, 0.0, false});
903  specified_band_names_map_.emplace(name, false);
904  }
905  }
906 
907  column_name_repeats_map_.clear();
908 
909  // initialize repeats map with point column names(s)
910  // in case there are bands with the same name
911  auto const names_and_sql_types = getPointNamesAndSQLTypes();
912  for (auto const& name_and_sql_type : names_and_sql_types) {
913  column_name_repeats_map_.emplace(
914  boost::algorithm::to_lower_copy(name_and_sql_type.first), 1);
915  }
916 
917  // specified band dimensions?
918  if (specified_band_dimensions.length()) {
919  // tokenize dimension values
920  std::vector<std::string> values;
921  boost::split(values, specified_band_dimensions, boost::is_any_of(",x "));
922  if (values.size() != 2u) {
923  throw std::invalid_argument("failed to parse width/height values from '" +
924  specified_band_dimensions + "'");
925  }
926  try {
927  size_t num_chars_w{0u}, num_chars_h{0u};
928  specified_band_width_ = std::stoi(values[0], &num_chars_w);
929  specified_band_height_ = std::stoi(values[1], &num_chars_h);
930  if (num_chars_w == 0u || num_chars_h == 0u) {
931  throw std::invalid_argument("empty width/height value");
932  }
934  throw std::invalid_argument("negative width/height value");
935  }
936  } catch (std::invalid_argument& e) {
937  throw std::runtime_error("Raster Importer: Invalid specified dimensions (" +
938  std::string(e.what()) + ")");
939  } catch (std::out_of_range& e) {
940  throw std::runtime_error("Raster Importer: Out-of-range specified dimensions (" +
941  std::string(e.what()) + ")");
942  }
943  }
944 
945  // also any metadata column names
946  for (auto const& mci : metadata_column_infos) {
947  auto column_name_lower =
948  boost::algorithm::to_lower_copy(mci.column_descriptor.columnName);
949  if (column_name_repeats_map_.find(column_name_lower) !=
950  column_name_repeats_map_.end()) {
951  throw std::runtime_error("Invalid metadata column name '" +
952  mci.column_descriptor.columnName +
953  "' (clashes with existing column name)");
954  }
955  column_name_repeats_map_.emplace(column_name_lower, 1);
956  }
957 }
958 
960  // if no specified band names, import everything
961  if (specified_band_names_map_.size() == 0u) {
962  return true;
963  }
964  // find it, and mark as found
965  auto itr = specified_band_names_map_.find(name);
966  if (itr != specified_band_names_map_.end()) {
967  itr->second = true;
968  return true;
969  }
970  return false;
971 }
972 
973 bool RasterImporter::shouldImportBandWithDimensions(const int width, const int height) {
974  // if no specified dimensions, import everything
976  return true;
977  }
978  // import only if dimensions match
979  return (width == specified_band_width_) && (height == specified_band_height_);
980 }
981 
982 std::string RasterImporter::getBandName(const uint32_t datasource_idx,
983  const int band_idx) {
984  std::string band_name;
985 
986  // format-specific name fetching
987  if (datasource_idx < raw_band_names_.size()) {
988  if (band_idx > 0 &&
989  band_idx <= static_cast<int>(raw_band_names_[datasource_idx].size())) {
990  band_name = raw_band_names_[datasource_idx][band_idx - 1];
991  }
992  }
993 
994  // if we didn't get a format-specific name, use a default name
995  if (band_name.length() == 0) {
996  band_name =
997  "band_" + std::to_string(datasource_idx + 1) + "_" + std::to_string(band_idx);
998  }
999 
1000  // additional suffix if not unique
1001  auto band_name_lower = boost::algorithm::to_lower_copy(band_name);
1002  auto itr = column_name_repeats_map_.find(band_name_lower);
1003  if (itr != column_name_repeats_map_.end()) {
1004  auto const suffix = ++(itr->second);
1005  band_name += "_" + std::to_string(suffix);
1006  } else {
1007  column_name_repeats_map_.emplace(band_name_lower, 1);
1008  }
1009 
1010  // sanitize and return
1011  return ImportHelpers::sanitize_name(band_name, /*underscore=*/true);
1012 }
1013 
1015  for (auto const& itr : specified_band_names_map_) {
1016  if (!itr.second) {
1017  throw std::runtime_error("Specified import band name '" + itr.first +
1018  "' was not found in the input raster file");
1019  }
1020  }
1021 }
1022 
1023 } // namespace import_export
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const NamesAndSQLTypes getBandNamesAndSQLTypes() const
SQLTypes
Definition: sqltypes.h:55
const NullValue getBandNullValue(const int band_idx) const
#define DEBUG_RASTER_IMPORT
static std::vector< std::string > unpackMetadata(char **metadata)
Definition: GDAL.cpp:246
void getRawBandNamesForFormat(const Geospatial::GDAL::DataSourceUqPtr &datasource)
static void logMetadata(GDALMajorObject *object)
Definition: GDAL.cpp:257
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::tuple< double, double, float >> Coords
GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type)
void transform(double &x, double &y)
#define CHECK_GE(x, y)
Definition: Logger.h:306
const uint32_t getNumBands() const
SQLTypes point_type_to_sql_type(const RasterImporter::PointType point_type)
const PointTransform getPointTransform() const
std::vector< std::byte > RawPixels
bool shouldImportBandWithName(const std::string &name)
std::string suffix(SQLTypes type)
Definition: Codegen.cpp:69
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
#define M_PI
Definition: constants.h:25
std::vector< std::vector< std::string > > raw_band_names_
#define LOG_IF(severity, condition)
Definition: Logger.h:383
std::vector< Geospatial::GDAL::CoordinateTransformationUqPtr > coordinate_transformations_
bool shouldImportBandWithDimensions(const int width, const int height)
const NamesAndSQLTypes getPointNamesAndSQLTypes() const
std::vector< ImportBandInfo > import_band_infos_
std::vector< std::string > datasource_names_
void import(const uint32_t max_threads)
std::vector< std::string > get_ome_tiff_band_names(const std::string &tifftag_imagedescription)
#define CHECK_LT(x, y)
Definition: Logger.h:303
void getRawPixels(const uint32_t thread_idx, const uint32_t band_idx, const int y_start, const int num_rows, const SQLTypes column_sql_type, RawPixels &raw_pixel_bytes)
std::string sanitize_name(const std::string &name, const bool underscore=false)
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
Definition: GDAL.h:48
std::map< std::string, bool > specified_band_names_map_
std::vector< std::vector< Geospatial::GDAL::DataSourceUqPtr > > datasource_handles_
std::pair< double, bool > NullValue
const Coords getProjectedPixelCoords(const uint32_t thread_idx, const int y) const
std::vector< std::pair< std::string, SQLTypes >> NamesAndSQLTypes
std::map< std::string, int > column_name_repeats_map_
#define CHECK(condition)
Definition: Logger.h:291
std::array< double, 6 > affine_transform_matrix_
void initializeFiltering(const std::string &specified_band_names, const std::string &specified_band_dimensions, const MetadataColumnInfos &metadata_column_infos)
Definition: sqltypes.h:62
string name
Definition: setup.in.py:72
std::string getBandName(const uint32_t datasource_idx, const int band_idx)
static DataSourceUqPtr openDataSource(const std::string &name, const import_export::SourceType source_type)
Definition: GDAL.cpp:181
EXTENSION_NOINLINE double conv_4326_900913_y(const double y)
void detect(const std::string &file_name, const std::string &specified_band_names, const std::string &specified_band_dimensions, const PointType point_type, const PointTransform point_transform, const bool point_compute_angle, const bool throw_on_error, const MetadataColumnInfos &metadata_column_infos)
EXTENSION_NOINLINE double conv_4326_900913_x(const double x)
static std::string getMetadataString(char **metadata, const std::string &key)
Definition: GDAL.cpp:276
SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type)
std::vector< std::unique_ptr< GCPTransformer > > gcp_transformers_
std::vector< MetadataColumnInfo > MetadataColumnInfos