OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RasterImporter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file RasterImporter.cpp
19  * @brief GDAL Raster File Importer
20  *
21  */
22 
24 
25 #include <boost/algorithm/string.hpp>
26 #include <boost/filesystem.hpp>
27 #include <boost/tokenizer.hpp>
28 
29 #include <xercesc/dom/DOMDocument.hpp>
30 #include <xercesc/dom/DOMElement.hpp>
31 #include <xercesc/dom/DOMNodeList.hpp>
32 #include <xercesc/framework/MemBufInputSource.hpp>
33 #include <xercesc/parsers/XercesDOMParser.hpp>
34 
35 #include <gdal.h>
36 #include <gdal_alg.h>
37 #include <ogrsf_frmts.h>
38 
39 #include "Shared/import_helpers.h"
40 #include "Shared/misc.h"
41 #include "Shared/scope.h"
42 
43 #define DEBUG_RASTER_IMPORT 0
44 
45 namespace import_export {
46 
47 GCPTransformer::GCPTransformer(OGRDataSource* datasource, const Mode mode)
48  : transform_arg_{nullptr}, mode_{mode} {
49  CHECK(datasource);
50  auto const gcp_count = datasource->GetGCPCount();
51  auto const* gcp_list = datasource->GetGCPs();
52  switch (mode_) {
53  case Mode::kPolynomial: {
54  static constexpr int kPolynomialOrder = 2;
55  transform_arg_ =
56  GDALCreateGCPTransformer(gcp_count, gcp_list, kPolynomialOrder, false);
57  break;
58  }
59  case Mode::kThinPlateSpline: {
60  transform_arg_ = GDALCreateTPSTransformer(gcp_count, gcp_list, false);
61  break;
62  }
63  }
64  CHECK(transform_arg_);
65 }
66 
68  switch (mode_) {
69  case Mode::kPolynomial:
70  GDALDestroyGCPTransformer(transform_arg_);
71  break;
73  GDALDestroyTPSTransformer(transform_arg_);
74  break;
75  }
76 }
77 
78 void GCPTransformer::transform(double& x, double& y) {
79  int success{0};
80  switch (mode_) {
81  case Mode::kPolynomial:
82  GDALGCPTransform(transform_arg_, false, 1, &x, &y, nullptr, &success);
83  break;
85  GDALTPSTransform(transform_arg_, false, 1, &x, &y, nullptr, &success);
86  break;
87  }
88  if (!success) {
89  throw std::runtime_error("Failed GCP/TPS Transform");
90  }
91 }
92 
93 namespace {
94 
95 // for these conversions, consider that we do not have unsigned integer SQLTypes
96 // we must therefore promote Byte to SMALLINT, UInt16 to INT, and UInt32 to BIGINT
97 
98 SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type) {
99  switch (gdal_data_type) {
100  case GDT_Byte:
101  case GDT_Int16:
102  return kSMALLINT;
103  case GDT_UInt16:
104  case GDT_Int32:
105  return kINT;
106  case GDT_UInt32:
107  return kBIGINT;
108  case GDT_Float32:
109  return kFLOAT;
110  case GDT_Float64:
111  return kDOUBLE;
112  case GDT_CInt16:
113  case GDT_CInt32:
114  case GDT_CFloat32:
115  case GDT_CFloat64:
116  default:
117  break;
118  }
119  throw std::runtime_error("Unknown/unsupported GDAL data type: " +
120  std::to_string(gdal_data_type));
121 }
122 
123 GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type) {
124  switch (sql_type) {
125  case kTINYINT:
126  case kSMALLINT:
127  return GDT_Int16;
128  case kINT:
129  return GDT_Int32;
130  case kBIGINT:
131  return GDT_UInt32;
132  case kFLOAT:
133  return GDT_Float32;
134  case kDOUBLE:
135  return GDT_Float64;
136  default:
137  break;
138  }
139  throw std::runtime_error("Unknown/unsupported SQL type: " + to_string(sql_type));
140 }
141 
142 std::vector<std::string> get_ome_tiff_band_names(
143  const std::string& tifftag_imagedescription) {
144  // expected schema:
145  //
146  // ...
147  // <Image ...>
148  // <Pixels ...>
149  // <Channel ID="Channel:0:<index>" Name="<name>" ...>
150  // ...
151  // </Channel>
152  // ...
153  // </Pixels>
154  // </Image>
155  // ...
156 
157  using Document = xercesc_3_2::DOMDocument;
158  using Element = xercesc_3_2::DOMElement;
159  using String = xercesc_3_2::XMLString;
160  using Utils = xercesc_3_2::XMLPlatformUtils;
161  using Parser = xercesc_3_2::XercesDOMParser;
162  using Source = xercesc_3_2::MemBufInputSource;
163 
164  Utils::Initialize();
165 
166  std::unordered_map<std::string, XMLCh*> tags;
167 
168  ScopeGuard release_tags = [&] {
169  for (auto& tag : tags) {
170  String::release(&tag.second);
171  }
172  };
173 
174  tags.emplace("ID", String::transcode("ID"));
175  tags.emplace("Name", String::transcode("Name"));
176  tags.emplace("Buffer", String::transcode("Buffer"));
177  tags.emplace("Image", String::transcode("Image"));
178  tags.emplace("Pixels", String::transcode("Pixels"));
179  tags.emplace("Channel", String::transcode("Channel"));
180 
181  auto get_tag = [&](const std::string& name) -> const XMLCh* {
182  return tags.find(name)->second;
183  };
184 
185  Parser parser;
186 
187  parser.setValidationScheme(Parser::Val_Never);
188  parser.setDoNamespaces(false);
189  parser.setDoSchema(false);
190  parser.setLoadExternalDTD(false);
191 
192  Source source(reinterpret_cast<const XMLByte*>(tifftag_imagedescription.c_str()),
193  tifftag_imagedescription.length(),
194  get_tag("Buffer"));
195 
196  parser.parse(source);
197 
198  std::vector<std::string> band_names;
199 
200  auto const* document = parser.getDocument();
201  if (document) {
202  auto const* root_element = document->getDocumentElement();
203  if (root_element) {
204  auto const* image_list = root_element->getElementsByTagName(get_tag("Image"));
205  if (image_list && image_list->getLength() > 0) {
206  auto const* image_element = dynamic_cast<const Element*>(image_list->item(0));
207  auto const* pixels_list = image_element->getElementsByTagName(get_tag("Pixels"));
208  if (pixels_list && pixels_list->getLength() > 0) {
209  auto const* pixels_element = dynamic_cast<const Element*>(pixels_list->item(0));
210  auto const* channel_list =
211  pixels_element->getElementsByTagName(get_tag("Channel"));
212  for (XMLSize_t i = 0; i < channel_list->getLength(); i++) {
213  auto const* channel_element =
214  dynamic_cast<const Element*>(channel_list->item(i));
215  auto const* name_attribute = channel_element->getAttribute(get_tag("Name"));
216  if (name_attribute) {
217  auto const* name = String::transcode(name_attribute);
218  if (name) {
219  band_names.emplace_back(name);
220  }
221  }
222  }
223  }
224  }
225  }
226  }
227 
228  return band_names;
229 }
230 
232  switch (point_type) {
234  return kSMALLINT;
236  return kINT;
238  return kFLOAT;
240  return kDOUBLE;
242  return kPOINT;
243  default:
244  CHECK(false);
245  }
246  return kNULLT;
247 }
248 
249 double conv_4326_900913_x(const double x) {
250  return x * 111319.490778;
251 }
252 
253 double conv_4326_900913_y(const double y) {
254  return 6378136.99911 * log(tan(.00872664626 * y + .785398163397));
255 }
256 
257 std::string get_datasource_driver_name(OGRDataSource* source) {
258  CHECK(source);
259  auto driver = source->GetDriver();
260  CHECK(driver);
261  std::string name = driver->GetDescription();
262  return name;
263 }
264 
265 bool datasource_requires_libhdf(OGRDataSource* source) {
266  std::string name = get_datasource_driver_name(source);
267  return name == "HDF5Image" || name == "HDF5" || name == "BAG" || name == "KEA";
268 }
269 
270 } // namespace
271 
272 //
273 // class RasterImporter
274 //
275 
276 void RasterImporter::detect(const std::string& file_name,
277  const std::string& specified_band_names,
278  const std::string& specified_band_dimensions,
279  const PointType point_type,
280  const PointTransform point_transform,
281  const bool point_compute_angle,
282  const bool throw_on_error,
283  const MetadataColumnInfos& metadata_column_infos) {
284  // open base file to check for subdatasources
285  bool has_spatial_reference{false};
286  {
287  // prepare to open
288  // open the file
291  if (datasource == nullptr) {
292  throw std::runtime_error("Raster Importer: Unable to open raster file " +
293  file_name);
294  }
295 
296 #if DEBUG_RASTER_IMPORT
297  // log all its metadata
298  Geospatial::GDAL::logMetadata(datasource);
299 #endif
300 
301  // get and add subdatasource datasource names
302  auto const subdatasources =
303  Geospatial::GDAL::unpackMetadata(datasource->GetMetadata("SUBDATASETS"));
304  for (auto const& subdatasource : subdatasources) {
305  auto const name_equals = subdatasource.find("_NAME=");
306  if (name_equals != std::string::npos) {
307  auto subdatasource_name =
308  subdatasource.substr(name_equals + 6, std::string::npos);
310  << "DEBUG: Found subdatasource '" << subdatasource_name << "'";
311  datasource_names_.push_back(subdatasource_name);
312  }
313  }
314 
315  // note if it has a spatial reference (of either type)
316  if (datasource->GetSpatialRef()) {
317  has_spatial_reference = true;
318  LOG_IF(INFO, DEBUG_RASTER_IMPORT) << "DEBUG: Found regular Spatial Reference";
319  } else if (datasource->GetGCPSpatialRef()) {
320  auto const num_points = datasource->GetGCPCount();
322  << "DEBUG: Found GCP Spatial Reference with " << num_points << " GCPs";
323  has_spatial_reference = (num_points > 0);
324  }
325 
326  // fetch
327  getRawBandNamesForFormat(datasource);
328  }
329 
330  // if we didn't find any subdatasources, just use the base file
331  if (datasource_names_.size() == 0) {
333  << "DEBUG: No sub-datasources found, just using '" << file_name << "'";
334  datasource_names_.push_back(file_name);
335  }
336 
337  // auto point transform
338  if (point_transform == PointTransform::kAuto) {
340  has_spatial_reference ? PointTransform::kWorld : PointTransform::kNone;
341  } else {
342  point_transform_ = point_transform;
343  }
344 
345  // auto point type
346  bool optimize_to_smallint = false;
347  if (point_type == PointType::kAuto) {
350  optimize_to_smallint = true;
351  } else {
353  }
354  } else {
355  point_type_ = point_type;
356  }
357 
358  std::set<std::pair<int, int>> found_dimensions;
359 
360  // lambda to process a datasource
361  auto process_datasource = [&](const Geospatial::GDAL::DataSourceUqPtr& datasource,
362  const uint32_t datasource_idx) {
363  auto raster_count = datasource->GetRasterCount();
364  if (raster_count == 0) {
365  throw std::runtime_error("Raster Importer: Raster file " + file_name +
366  " has no rasters");
367  }
368 
369  // for each band (1-based index)
370  for (int i = 1; i <= raster_count; i++) {
371  // get band name
372  auto band_name = getBandName(datasource_idx, i);
373 
374  // if there are specified band names, and this isn't one of them, skip
375  if (!shouldImportBandWithName(band_name)) {
376  continue;
377  }
378 
379  // get the band
380  auto band = datasource->GetRasterBand(i);
381  CHECK(band);
382 
383  // get band dimensions
384  auto const band_width = band->GetXSize();
385  auto const band_height = band->GetYSize();
386  int block_size_x, block_size_y;
387  band->GetBlockSize(&block_size_x, &block_size_y);
388 
389  // report
390  LOG(INFO) << "Raster Importer: Found Band '" << band_name << "', with dimensions "
391  << band_width << "x" << band_height;
392 
393  // if there are specified band dimensions, and this band doesn't match, skip
394  if (!shouldImportBandWithDimensions(band_width, band_height)) {
395  continue;
396  }
397 
398  // add to found dimensions
399  found_dimensions.insert({band_width, band_height});
400 
401  // get SQL type
402  auto sql_type = gdal_data_type_to_sql_type(band->GetRasterDataType());
403 
404  // get null value and validity
405  int null_value_valid{0};
406  auto null_value = band->GetNoDataValue(&null_value_valid);
407 
408  // store info
409  if (specified_band_names_map_.size()) {
410  // find and finalize existing info for this band
411  auto itr =
412  std::find_if(import_band_infos_.begin(),
413  import_band_infos_.end(),
414  [&](ImportBandInfo& info) { return info.name == band_name; });
415  CHECK(itr != import_band_infos_.end());
416  itr->datasource_idx = datasource_idx;
417  itr->band_idx = i;
418  itr->sql_type = sql_type;
419  itr->null_value = null_value;
420  itr->null_value_valid = (null_value_valid != 0);
421  } else {
422  // import all bands
423  import_band_infos_.push_back({band_name,
424  band_name,
425  sql_type,
426  datasource_idx,
427  i,
428  null_value,
429  null_value_valid != 0});
430  }
431  }
432  };
433 
434  // initialize filtering
436  specified_band_names, specified_band_dimensions, metadata_column_infos);
437 
438  // process datasources
439  uint32_t datasource_idx{0u};
440  std::vector<std::string> valid_datasource_names;
441  for (auto const& datasource_name : datasource_names_) {
442  // open it
443  Geospatial::GDAL::DataSourceUqPtr datasource_handle =
444  Geospatial::GDAL::openDataSource(datasource_name,
446 
447  // did it open?
448  if (datasource_handle == nullptr) {
449  continue;
450  } else {
451  valid_datasource_names.push_back(datasource_name);
452  }
453 
454  // process it
455  process_datasource(datasource_handle, datasource_idx++);
456  }
457 
458  // check dimensions
459  if (found_dimensions.size() > 1u) {
460  // report
461  LOG(WARNING) << "Raster Importer: Dimensions found as follows:";
462  for (auto const& dimension : found_dimensions) {
463  LOG(WARNING) << "Raster Importer: " << dimension.first << "x" << dimension.second;
464  }
465  if (throw_on_error) {
466  throw std::runtime_error("Raster Importer: Raster file '" + file_name +
467  "' datasource/band dimensions are inconsistent. This file "
468  "cannot be imported into a single table.");
469  } else {
470  LOG(WARNING) << "Raster Importer: Raster file '" << file_name
471  << "' datasource/band dimensions are inconsistent. This file "
472  "cannot be imported into a single table.";
473  }
474  } else if (found_dimensions.size() == 1u) {
475  bands_width_ = found_dimensions.begin()->first;
476  bands_height_ = found_dimensions.begin()->second;
477  LOG(INFO) << "Raster Importer: Importing dimension " << bands_width_ << "x"
478  << bands_height_;
479  }
480 
481  // report if we found nothing
482  if (import_band_infos_.size() == 0 || found_dimensions.size() == 0u) {
483  if (throw_on_error) {
484  throw std::runtime_error("Raster Importer: Raster file " + file_name +
485  " has no importable bands");
486  } else {
487  LOG(ERROR) << "Raster Importer: Raster file " << file_name
488  << " has no importable bands";
489  }
490  }
491 
492  // report any invalid datasources and keep only the valid ones
493  // the datasource indices stored in the infos will match the valid ones
494  auto const failed_datasource_count =
495  datasource_names_.size() - valid_datasource_names.size();
496  if (failed_datasource_count) {
497  LOG(WARNING) << "Raster Importer: Failed to open " << failed_datasource_count
498  << " out of " << std::to_string(datasource_names_.size())
499  << " datasources";
500  }
501  datasource_names_ = valid_datasource_names;
502 
503  // fail if any specified import band names were not found
505 
506  // optimize point_type for small rasters
507  if (optimize_to_smallint && (bands_width_ <= std::numeric_limits<int16_t>::max() &&
508  bands_height_ <= std::numeric_limits<int16_t>::max())) {
510  }
511 
512  // capture this
513  point_compute_angle_ = point_compute_angle;
514 
515  // validate final point type/transform
516  if (!has_spatial_reference && point_transform_ == PointTransform::kWorld) {
517  throw std::runtime_error(
518  "Raster Importer: Raster file has no geo-spatial reference metadata, unable to "
519  "transform points to World Space. Use raster_point_transform='none|file' "
520  "instead.");
521  }
524  throw std::runtime_error(
525  "Raster Importer: Cannot do World Transform with SMALLINT/INT Point type");
526  }
527  } else if (point_type_ == PointType::kPoint) {
529  throw std::runtime_error(
530  "Raster Importer: Must do World Transform with POINT Point type");
531  }
532  }
534  (bands_width_ > std::numeric_limits<int16_t>::max() ||
535  bands_height_ > std::numeric_limits<int16_t>::max())) {
536  throw std::runtime_error(
537  "Raster Importer: Raster file '" + file_name +
538  "' has band dimensions too large for 'SMALLINT' raster_point_type (" +
540  }
542  throw std::runtime_error(
543  "Raster Importer: Must do World Transform with raster_point_compute_angle");
544  }
545 }
546 
547 void RasterImporter::import(size_t& max_threads, const bool max_threads_using_default) {
548  // validate
549  if (import_band_infos_.size() == 0) {
550  throw std::runtime_error("Raster Import aborted. No bands to import.");
551  }
552 
553  // validate
554  CHECK_GE(max_threads, 1u);
555 
556  // open all datasources on all threads
557  std::map<std::string, std::vector<Geospatial::GDAL::DataSourceUqPtr>>
558  datasource_thread_handles_map;
559  for (uint32_t i = 0; i < max_threads; i++) {
560  for (auto const& datasource_name : datasource_names_) {
561  auto& datasource_thread_handles = datasource_thread_handles_map[datasource_name];
562  auto datasource_handle = Geospatial::GDAL::openDataSource(
563  datasource_name, import_export::SourceType::kRasterFile);
564  if (datasource_handle == nullptr) {
565  throw std::runtime_error("Raster Importer: Unable to open raster file " +
566  datasource_name);
567  }
568  if (i == 0) { // check for HDF5 data sources in first loop
569  if (datasource_requires_libhdf(datasource_handle.get()) && max_threads > 1) {
570  if (!max_threads_using_default) {
571  throw std::runtime_error("Raster Importer: Unable to import raster file " +
572  datasource_name + ": GDAL driver " +
573  get_datasource_driver_name(datasource_handle.get()) +
574  " requires use of HDF5 library which is "
575  "incompatible with multithreading.");
576  } else {
577  max_threads = 1; // force use of one thread
578  }
579  }
580  }
581  datasource_thread_handles.emplace_back(std::move(datasource_handle));
582  }
583  }
584 
585  for (auto const& datasource_name : datasource_names_) {
586  datasource_handles_.emplace_back(
587  std::move(shared::get_from_map(datasource_thread_handles_map, datasource_name)));
588  }
589 
590  // use handle for the first datasource from the first thread to read the globals
591  auto const& global_datasource_handle = datasource_handles_[0][0];
592 
593  // get the raster affine transform
594  // this converts the points from pixel space to the file coordinate system space
595  global_datasource_handle->GetGeoTransform(affine_transform_matrix_.data());
596 
597  // transform logic
598  // @TODO(se) discuss!
599  // if the raster_point_type is SMALLINT or INT, we just store pixel X/Y
600  // if the raster_point_type is FLOAT, DOUBLE, or POINT, we store projected X/Y
601  // this will either be in the file coordinate space (raster_point_transform='affine')
602  // or in world space (raster_point_transform='default')
603  // yeah, that's a mess, but I need to sleep on it
604 
605  // determine the final desired SRID
606  // the first column will either be a scalar of some type or a POINT
607  // FLOAT or DOUBLE can support world space coords (so assume 4326)
608  // POINT can be anything and can define an SRID (but assume 4326 for now)
609  int srid{0};
610  switch (point_type_) {
611  case PointType::kNone:
613  case PointType::kInt:
614  break;
615  case PointType::kFloat:
616  case PointType::kDouble:
617  case PointType::kPoint: {
618  srid = 4326;
619  } break;
620  case PointType::kAuto:
621  CHECK(false);
622  }
623 
624  // create a world-space coordinate transformation for the points?
625  if (srid != 0 && point_transform_ == PointTransform::kWorld) {
626  // get the file's spatial reference, if it has one (only geo files will)
627  // also determine if we need to do additional GCP transformations
628  bool need_gcp_transformers{false};
629  auto const* spatial_reference = global_datasource_handle->GetSpatialRef();
630  if (!spatial_reference) {
631  spatial_reference = global_datasource_handle->GetGCPSpatialRef();
632  if (spatial_reference) {
633  need_gcp_transformers = true;
634  }
635  }
636  if (spatial_reference) {
637  // if it's valid, create a transformation to use on the points
638  // make a spatial reference for the desired SRID
639  OGRSpatialReference sr_geometry;
640  auto import_err = sr_geometry.importFromEPSG(srid);
641  if (import_err != OGRERR_NONE) {
642  throw std::runtime_error("Failed to create spatial reference for EPSG " +
643  std::to_string(srid));
644  }
645 #if GDAL_VERSION_MAJOR >= 3
646  sr_geometry.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
647 #endif
648 
649  for (uint32_t i = 0; i < max_threads; i++) {
650  coordinate_transformations_.emplace_back(
651  OGRCreateCoordinateTransformation(spatial_reference, &sr_geometry));
652  if (!coordinate_transformations_.back()) {
653  throw std::runtime_error(
654  "Failed to create coordinate system transformation to EPSG " +
655  std::to_string(srid));
656  }
657  if (need_gcp_transformers) {
658  gcp_transformers_.emplace_back(
659  new GCPTransformer(global_datasource_handle.get()));
660  }
661  }
662  }
663  }
664 }
665 
666 const uint32_t RasterImporter::getNumBands() const {
667  return import_band_infos_.size();
668 }
669 
671  NamesAndSQLTypes names_and_sql_types;
672  if (point_type_ != PointType::kNone) {
673  auto sql_type = point_type_to_sql_type(point_type_);
676  names_and_sql_types.emplace_back("raster_point", sql_type);
677  } else {
678  names_and_sql_types.emplace_back("raster_lon", sql_type);
679  names_and_sql_types.emplace_back("raster_lat", sql_type);
680  }
681  if (point_compute_angle_) {
682  names_and_sql_types.emplace_back("raster_angle", kFLOAT);
683  }
684  } else {
685  names_and_sql_types.emplace_back("raster_x", sql_type);
686  names_and_sql_types.emplace_back("raster_y", sql_type);
687  }
688  }
689  return names_and_sql_types;
690 }
691 
693  return point_transform_;
694 }
695 
697  NamesAndSQLTypes names_and_sql_types;
698  // return alt names
699  for (auto const& info : import_band_infos_) {
700  names_and_sql_types.emplace_back(info.alt_name, info.sql_type);
701  }
702  return names_and_sql_types;
703 }
704 
706  const int band_idx) const {
707  CHECK_LT(static_cast<uint32_t>(band_idx), import_band_infos_.size());
708  auto const& info = import_band_infos_[band_idx];
709  return {info.null_value, info.null_value_valid};
710 }
711 
713  const uint32_t thread_idx,
714  const int y) const {
715  Coords coords(bands_width_);
716 
717  double prev_mx{0.0}, prev_my{0.0};
718 
719  for (int x = 0; x < bands_width_; x++) {
720  // start with the pixel coord
721  double dx = double(x);
722  double dy = double(y);
723 
724  // transforms
726  // affine transform to the file coordinate space
727  double fdx = affine_transform_matrix_[0] + (dx * affine_transform_matrix_[1]) +
728  (dy * affine_transform_matrix_[2]);
729  double fdy = affine_transform_matrix_[3] + (dx * affine_transform_matrix_[4]) +
730  (dy * affine_transform_matrix_[5]);
731  dx = fdx;
732  dy = fdy;
733 
734  // do geo-spatial transform if we can (otherwise leave alone)
736  // first any GCP transformation
737  if (thread_idx < gcp_transformers_.size()) {
738  gcp_transformers_[thread_idx]->transform(dx, dy);
739  }
740  // then any additional transformation to world space
741  if (thread_idx < coordinate_transformations_.size()) {
742  int success{0};
743  coordinate_transformations_[thread_idx]->Transform(
744  1, &dx, &dy, nullptr, &success);
745  if (!success) {
746  throw std::runtime_error("Failed OGRCoordinateTransform");
747  }
748  }
749  }
750  }
751 
752  // store
753  coords[x] = {dx, dy, 0.0f};
754 
755  // compute and overwrite angle?
756  if (point_compute_angle_) {
757  if (x == 0) {
758  // capture first pixel's Mercator coords
759  prev_mx = conv_4326_900913_x(dx);
760  prev_my = conv_4326_900913_y(dy);
761  } else {
762  // compute angle between this pixel's Mercator coords and the previous
763  // expressed as clockwise degrees for symbol rotation
764  auto const mx = conv_4326_900913_x(dx);
765  auto const my = conv_4326_900913_y(dy);
766  auto const angle = atan2f(my - prev_my, mx - prev_mx) * (-180.0f / M_PI);
767  prev_mx = mx;
768  prev_my = my;
769 
770  // overwrite this angle (and that of the first pixel, if this is the second)
771  std::get<2>(coords[x]) = angle;
772  if (x == 1) {
773  std::get<2>(coords[0]) = angle;
774  }
775  }
776  }
777  }
778 
779  // done
780  return coords;
781 }
782 
783 void RasterImporter::getRawPixels(const uint32_t thread_idx,
784  const uint32_t band_idx,
785  const int y_start,
786  const int num_rows,
787  const SQLTypes column_sql_type,
788  RawPixels& raw_pixel_bytes) {
789  // get the band info
790  CHECK_LT(band_idx, import_band_infos_.size());
791  auto const band_info = import_band_infos_[band_idx];
792 
793  // get the dataset handle for this dataset and thread
794  CHECK_LT(band_info.datasource_idx, datasource_handles_.size());
795  auto const& datasource_handles_per_thread =
796  datasource_handles_[band_info.datasource_idx];
797  CHECK_LT(thread_idx, datasource_handles_per_thread.size());
798  auto const& datasource_handle = datasource_handles_per_thread[thread_idx];
799  CHECK(datasource_handle);
800 
801  // get the band
802  auto* band = datasource_handle->GetRasterBand(band_info.band_idx);
803  CHECK(band);
804 
805  // translate the requested data type
806  auto const gdal_data_type = sql_type_to_gdal_data_type(column_sql_type);
807 
808  // read the scanlines
809  auto result = band->RasterIO(GF_Read,
810  0,
811  y_start,
812  bands_width_,
813  num_rows,
814  raw_pixel_bytes.data(),
815  bands_width_,
816  num_rows,
817  gdal_data_type,
818  0,
819  0,
820  nullptr);
821  if (result != CE_None) {
822  throw std::runtime_error("Failed to read raster pixels, rows " +
823  std::to_string(y_start) + " to " +
824  std::to_string(y_start + num_rows));
825  }
826 }
827 
828 //
829 // private
830 //
831 
833  const Geospatial::GDAL::DataSourceUqPtr& datasource) {
834  // get the name of the driver that GDAL picked to open the file
835  std::string driver_name = datasource->GetDriverName();
836 
837  LOG(INFO) << "Raster Importer: Using Raster Driver '" << driver_name << "'";
838 
839  // logic is different for each format
840  if (driver_name == "GTiff") {
841  //
842  // TIFF
843  // Could be an OME TIFF or a GeoTIFF
844  //
845  auto const tifftag_imagedescription = Geospatial::GDAL::getMetadataString(
846  datasource->GetMetadata(), "TIFFTAG_IMAGEDESCRIPTION");
847  if (tifftag_imagedescription.length()) {
848  //
849  // OME TIFF
850  // one datasource per band
851  // names are in a JSON blob
852  //
853  auto const names = get_ome_tiff_band_names(tifftag_imagedescription);
854  for (auto const& name : names) {
855  raw_band_names_.push_back({name});
856  }
857  } else {
858  //
859  // Some other GeoTIFF variant
860  // single datasource
861  // names in band descriptions?
862  //
863  std::vector<std::string> names;
864  auto const raster_count = datasource->GetRasterCount();
865  for (int i = 1; i <= raster_count; i++) {
866  auto* band = datasource->GetRasterBand(i);
867  CHECK(band);
868  auto const* description = band->GetDescription();
869  if (!description || strlen(description) == 0) {
870  raw_band_names_.clear();
871  return;
872  }
873  names.emplace_back(description);
874  }
875  raw_band_names_.emplace_back(std::move(names));
876  }
877  } else if (driver_name == "netCDF" || driver_name == "Zarr") {
878  //
879  // for these formats the band names are in the datasource names
880  // that we already obtained, of the format:
881  // <FORMAT>:"<filename>":<bandname>
882  // one band per datasource
883  //
884  for (auto const& datasource_name : datasource_names_) {
885  std::vector<std::string> tokens;
886  boost::split(tokens, datasource_name, boost::is_any_of(":"));
887  if (tokens.size() < 3 || tokens[2].length() == 0u) {
888  LOG(WARNING) << "Raster Importer: Failed to parse band name from datasource name";
889  raw_band_names_.clear();
890  return;
891  }
892  raw_band_names_.push_back({tokens[2]});
893  }
894  } else if (driver_name == "GRIB") {
895  //
896  // GRIB/GRIB2
897  // one datasource
898  // names are in the per-band metadata
899  //
900  std::vector<std::string> names;
901  auto const raster_count = datasource->GetRasterCount();
902  for (int i = 1; i <= raster_count; i++) {
903  auto* band = datasource->GetRasterBand(i);
904  CHECK(band);
905  auto const grib_comment =
906  Geospatial::GDAL::getMetadataString(band->GetMetadata(), "GRIB_COMMENT");
907  if (grib_comment.length() == 0) {
908  LOG(WARNING) << "Raster Importer: Failed to parse band name from GRIB_COMMENT";
909  raw_band_names_.clear();
910  return;
911  }
912  names.push_back(grib_comment);
913  }
914  raw_band_names_.emplace_back(std::move(names));
915  }
916 }
917 
919  const std::string& specified_band_names,
920  const std::string& specified_band_dimensions,
921  const MetadataColumnInfos& metadata_column_infos) {
922  // specified names?
923  if (specified_band_names.length()) {
924  // tokenize names
925  std::vector<std::string> names_and_alt_names;
926  boost::split(names_and_alt_names, specified_band_names, boost::is_any_of(","));
927 
928  // pre-populate infos (in given order) and found map
929  for (auto const& name_and_alt_name : names_and_alt_names) {
930  // parse for optional alt name
931  std::vector<std::string> tokens;
932  boost::split(tokens, name_and_alt_name, boost::is_any_of("="));
933  if (tokens.size() < 1 || tokens.size() > 2) {
934  throw std::runtime_error("Failed to parse specified name '" + name_and_alt_name +
935  "'");
936  }
937  auto const& name = strip(tokens[0]);
938  auto const& alt_name = strip(tokens[tokens.size() == 2 ? 1 : 0]);
939  import_band_infos_.push_back({name, alt_name, kNULLT, 0u, 0, 0.0, false});
940  specified_band_names_map_.emplace(name, false);
941  }
942  }
943 
944  column_name_repeats_map_.clear();
945 
946  // initialize repeats map with point column names(s)
947  // in case there are bands with the same name
948  auto const names_and_sql_types = getPointNamesAndSQLTypes();
949  for (auto const& name_and_sql_type : names_and_sql_types) {
950  column_name_repeats_map_.emplace(
951  boost::algorithm::to_lower_copy(name_and_sql_type.first), 1);
952  }
953 
954  // specified band dimensions?
955  if (specified_band_dimensions.length()) {
956  // tokenize dimension values
957  std::vector<std::string> values;
958  boost::split(values, specified_band_dimensions, boost::is_any_of(",x "));
959  if (values.size() != 2u) {
960  throw std::invalid_argument("failed to parse width/height values from '" +
961  specified_band_dimensions + "'");
962  }
963  try {
964  size_t num_chars_w{0u}, num_chars_h{0u};
965  specified_band_width_ = std::stoi(values[0], &num_chars_w);
966  specified_band_height_ = std::stoi(values[1], &num_chars_h);
967  if (num_chars_w == 0u || num_chars_h == 0u) {
968  throw std::invalid_argument("empty width/height value");
969  }
971  throw std::invalid_argument("negative width/height value");
972  }
973  } catch (std::invalid_argument& e) {
974  throw std::runtime_error("Raster Importer: Invalid specified dimensions (" +
975  std::string(e.what()) + ")");
976  } catch (std::out_of_range& e) {
977  throw std::runtime_error("Raster Importer: Out-of-range specified dimensions (" +
978  std::string(e.what()) + ")");
979  }
980  }
981 
982  // also any metadata column names
983  for (auto const& mci : metadata_column_infos) {
984  auto column_name_lower =
985  boost::algorithm::to_lower_copy(mci.column_descriptor.columnName);
986  if (column_name_repeats_map_.find(column_name_lower) !=
987  column_name_repeats_map_.end()) {
988  throw std::runtime_error("Invalid metadata column name '" +
989  mci.column_descriptor.columnName +
990  "' (clashes with existing column name)");
991  }
992  column_name_repeats_map_.emplace(column_name_lower, 1);
993  }
994 }
995 
997  // if no specified band names, import everything
998  if (specified_band_names_map_.size() == 0u) {
999  return true;
1000  }
1001  // find it, and mark as found
1002  auto itr = specified_band_names_map_.find(name);
1003  if (itr != specified_band_names_map_.end()) {
1004  itr->second = true;
1005  return true;
1006  }
1007  return false;
1008 }
1009 
1011  // if no specified dimensions, import everything
1013  return true;
1014  }
1015  // import only if dimensions match
1016  return (width == specified_band_width_) && (height == specified_band_height_);
1017 }
1018 
1019 std::string RasterImporter::getBandName(const uint32_t datasource_idx,
1020  const int band_idx) {
1021  std::string band_name;
1022 
1023  // format-specific name fetching
1024  if (datasource_idx < raw_band_names_.size()) {
1025  if (band_idx > 0 &&
1026  band_idx <= static_cast<int>(raw_band_names_[datasource_idx].size())) {
1027  band_name = raw_band_names_[datasource_idx][band_idx - 1];
1028  }
1029  }
1030 
1031  // if we didn't get a format-specific name, use a default name
1032  if (band_name.length() == 0) {
1033  band_name =
1034  "band_" + std::to_string(datasource_idx + 1) + "_" + std::to_string(band_idx);
1035  }
1036 
1037  // additional suffix if not unique
1038  auto band_name_lower = boost::algorithm::to_lower_copy(band_name);
1039  auto itr = column_name_repeats_map_.find(band_name_lower);
1040  if (itr != column_name_repeats_map_.end()) {
1041  auto const suffix = ++(itr->second);
1042  band_name += "_" + std::to_string(suffix);
1043  } else {
1044  column_name_repeats_map_.emplace(band_name_lower, 1);
1045  }
1046 
1047  // sanitize and return
1048  return ImportHelpers::sanitize_name(band_name, /*underscore=*/true);
1049 }
1050 
1052  for (auto const& itr : specified_band_names_map_) {
1053  if (!itr.second) {
1054  throw std::runtime_error("Specified import band name '" + itr.first +
1055  "' was not found in the input raster file");
1056  }
1057  }
1058 }
1059 
1060 } // namespace import_export
const NamesAndSQLTypes getBandNamesAndSQLTypes() const
SQLTypes
Definition: sqltypes.h:65
const NullValue getBandNullValue(const int band_idx) const
#define DEBUG_RASTER_IMPORT
static std::vector< std::string > unpackMetadata(char **metadata)
Definition: GDAL.cpp:246
void getRawBandNamesForFormat(const Geospatial::GDAL::DataSourceUqPtr &datasource)
static void logMetadata(GDALMajorObject *object)
Definition: GDAL.cpp:257
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::tuple< double, double, float >> Coords
GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type)
void transform(double &x, double &y)
#define CHECK_GE(x, y)
Definition: Logger.h:306
const uint32_t getNumBands() const
SQLTypes point_type_to_sql_type(const RasterImporter::PointType point_type)
const PointTransform getPointTransform() const
std::string get_datasource_driver_name(OGRDataSource *source)
std::vector< std::byte > RawPixels
bool shouldImportBandWithName(const std::string &name)
std::string suffix(SQLTypes type)
Definition: Codegen.cpp:69
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
#define M_PI
Definition: constants.h:25
std::vector< std::vector< std::string > > raw_band_names_
#define LOG_IF(severity, condition)
Definition: Logger.h:384
std::vector< Geospatial::GDAL::CoordinateTransformationUqPtr > coordinate_transformations_
bool shouldImportBandWithDimensions(const int width, const int height)
const NamesAndSQLTypes getPointNamesAndSQLTypes() const
std::vector< ImportBandInfo > import_band_infos_
std::vector< std::string > datasource_names_
std::vector< std::string > get_ome_tiff_band_names(const std::string &tifftag_imagedescription)
#define CHECK_LT(x, y)
Definition: Logger.h:303
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61
void getRawPixels(const uint32_t thread_idx, const uint32_t band_idx, const int y_start, const int num_rows, const SQLTypes column_sql_type, RawPixels &raw_pixel_bytes)
std::string sanitize_name(const std::string &name, const bool underscore=false)
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
Definition: GDAL.h:48
std::map< std::string, bool > specified_band_names_map_
std::vector< std::vector< Geospatial::GDAL::DataSourceUqPtr > > datasource_handles_
std::pair< double, bool > NullValue
const Coords getProjectedPixelCoords(const uint32_t thread_idx, const int y) const
std::vector< std::pair< std::string, SQLTypes >> NamesAndSQLTypes
std::map< std::string, int > column_name_repeats_map_
#define CHECK(condition)
Definition: Logger.h:291
std::array< double, 6 > affine_transform_matrix_
void initializeFiltering(const std::string &specified_band_names, const std::string &specified_band_dimensions, const MetadataColumnInfos &metadata_column_infos)
Definition: sqltypes.h:72
string name
Definition: setup.in.py:72
std::string getBandName(const uint32_t datasource_idx, const int band_idx)
static DataSourceUqPtr openDataSource(const std::string &name, const import_export::SourceType source_type)
Definition: GDAL.cpp:181
EXTENSION_NOINLINE double conv_4326_900913_y(const double y)
void detect(const std::string &file_name, const std::string &specified_band_names, const std::string &specified_band_dimensions, const PointType point_type, const PointTransform point_transform, const bool point_compute_angle, const bool throw_on_error, const MetadataColumnInfos &metadata_column_infos)
EXTENSION_NOINLINE double conv_4326_900913_x(const double x)
void import(size_t &max_threads, const bool max_threads_using_default)
static std::string getMetadataString(char **metadata, const std::string &key)
Definition: GDAL.cpp:276
SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type)
std::vector< std::unique_ptr< GCPTransformer > > gcp_transformers_
std::vector< MetadataColumnInfo > MetadataColumnInfos