25 #include <boost/algorithm/string.hpp>
26 #include <boost/filesystem.hpp>
27 #include <boost/tokenizer.hpp>
29 #include <xercesc/dom/DOMDocument.hpp>
30 #include <xercesc/dom/DOMElement.hpp>
31 #include <xercesc/dom/DOMNodeList.hpp>
32 #include <xercesc/framework/MemBufInputSource.hpp>
33 #include <xercesc/parsers/XercesDOMParser.hpp>
37 #include <ogrsf_frmts.h>
42 #define DEBUG_RASTER_IMPORT 0
44 namespace import_export {
47 : transform_arg_{
nullptr}, mode_{mode} {
49 auto const gcp_count = datasource->GetGCPCount();
50 auto const* gcp_list = datasource->GetGCPs();
52 case Mode::kPolynomial: {
53 static constexpr
int kPolynomialOrder = 2;
55 GDALCreateGCPTransformer(gcp_count, gcp_list, kPolynomialOrder,
false);
58 case Mode::kThinPlateSpline: {
59 transform_arg_ = GDALCreateTPSTransformer(gcp_count, gcp_list,
false);
63 CHECK(transform_arg_);
81 GDALGCPTransform(
transform_arg_,
false, 1, &x, &y,
nullptr, &success);
84 GDALTPSTransform(
transform_arg_,
false, 1, &x, &y,
nullptr, &success);
88 throw std::runtime_error(
"Failed GCP/TPS Transform");
98 switch (gdal_data_type) {
118 throw std::runtime_error(
"Unknown/unsupported GDAL data type: " +
138 throw std::runtime_error(
"Unknown/unsupported SQL type: " +
to_string(sql_type));
142 const std::string& tifftag_imagedescription) {
156 using Document = xercesc_3_2::DOMDocument;
157 using Element = xercesc_3_2::DOMElement;
158 using String = xercesc_3_2::XMLString;
159 using Utils = xercesc_3_2::XMLPlatformUtils;
160 using Parser = xercesc_3_2::XercesDOMParser;
161 using Source = xercesc_3_2::MemBufInputSource;
165 std::unordered_map<std::string, XMLCh*> tags;
168 for (
auto& tag : tags) {
169 String::release(&tag.second);
173 tags.emplace(
"ID", String::transcode(
"ID"));
174 tags.emplace(
"Name", String::transcode(
"Name"));
175 tags.emplace(
"Buffer", String::transcode(
"Buffer"));
176 tags.emplace(
"Image", String::transcode(
"Image"));
177 tags.emplace(
"Pixels", String::transcode(
"Pixels"));
178 tags.emplace(
"Channel", String::transcode(
"Channel"));
180 auto get_tag = [&](
const std::string&
name) ->
const XMLCh* {
181 return tags.find(
name)->second;
186 parser.setValidationScheme(Parser::Val_Never);
187 parser.setDoNamespaces(
false);
188 parser.setDoSchema(
false);
189 parser.setLoadExternalDTD(
false);
191 Source source(reinterpret_cast<const XMLByte*>(tifftag_imagedescription.c_str()),
192 tifftag_imagedescription.length(),
195 parser.parse(source);
197 std::vector<std::string> band_names;
199 auto const* document = parser.getDocument();
201 auto const* root_element = document->getDocumentElement();
203 auto const* image_list = root_element->getElementsByTagName(get_tag(
"Image"));
204 if (image_list && image_list->getLength() > 0) {
205 auto const* image_element =
dynamic_cast<const Element*
>(image_list->item(0));
206 auto const* pixels_list = image_element->getElementsByTagName(get_tag(
"Pixels"));
207 if (pixels_list && pixels_list->getLength() > 0) {
208 auto const* pixels_element =
dynamic_cast<const Element*
>(pixels_list->item(0));
209 auto const* channel_list =
210 pixels_element->getElementsByTagName(get_tag(
"Channel"));
211 for (XMLSize_t i = 0; i < channel_list->getLength(); i++) {
212 auto const* channel_element =
213 dynamic_cast<const Element*
>(channel_list->item(i));
214 auto const* name_attribute = channel_element->getAttribute(get_tag(
"Name"));
215 if (name_attribute) {
216 auto const*
name = String::transcode(name_attribute);
218 band_names.push_back(
name);
231 switch (point_type) {
249 return x * 111319.490778;
253 return 6378136.99911 * log(tan(.00872664626 * y + .785398163397));
263 const std::string& specified_band_names,
264 const std::string& specified_band_dimensions,
267 const bool point_compute_angle,
268 const bool throw_on_error,
271 bool has_spatial_reference{
false};
277 if (datasource ==
nullptr) {
278 throw std::runtime_error(
"Raster Importer: Unable to open raster file " +
282 #if DEBUG_RASTER_IMPORT
288 auto const subdatasources =
290 for (
auto const& subdatasource : subdatasources) {
291 auto const name_equals = subdatasource.find(
"_NAME=");
292 if (name_equals != std::string::npos) {
293 auto subdatasource_name =
294 subdatasource.substr(name_equals + 6, std::string::npos);
296 <<
"DEBUG: Found subdatasource '" << subdatasource_name <<
"'";
302 if (datasource->GetSpatialRef()) {
303 has_spatial_reference =
true;
305 }
else if (datasource->GetGCPSpatialRef()) {
306 auto const num_points = datasource->GetGCPCount();
308 <<
"DEBUG: Found GCP Spatial Reference with " << num_points <<
" GCPs";
309 has_spatial_reference = (num_points > 0);
319 <<
"DEBUG: No sub-datasources found, just using '" << file_name <<
"'";
332 bool optimize_to_smallint =
false;
336 optimize_to_smallint =
true;
344 std::set<std::pair<int, int>> found_dimensions;
348 const uint32_t datasource_idx) {
349 auto raster_count = datasource->GetRasterCount();
350 if (raster_count == 0) {
351 throw std::runtime_error(
"Raster Importer: Raster file " + file_name +
356 for (
int i = 1; i <= raster_count; i++) {
366 auto band = datasource->GetRasterBand(i);
370 auto const band_width = band->GetXSize();
371 auto const band_height = band->GetYSize();
372 int block_size_x, block_size_y;
373 band->GetBlockSize(&block_size_x, &block_size_y);
376 LOG(
INFO) <<
"Raster Importer: Found Band '" << band_name <<
"', with dimensions "
377 << band_width <<
"x" << band_height;
385 found_dimensions.insert({band_width, band_height});
391 int null_value_valid{0};
392 auto null_value = band->GetNoDataValue(&null_value_valid);
402 itr->datasource_idx = datasource_idx;
404 itr->sql_type = sql_type;
405 itr->null_value = null_value;
406 itr->null_value_valid = (null_value_valid != 0);
415 null_value_valid != 0});
422 specified_band_names, specified_band_dimensions, metadata_column_infos);
425 uint32_t datasource_idx{0u};
426 std::vector<std::string> valid_datasource_names;
434 if (datasource_handle ==
nullptr) {
437 valid_datasource_names.push_back(datasource_name);
441 process_datasource(datasource_handle, datasource_idx++);
445 if (found_dimensions.size() > 1u) {
447 LOG(
WARNING) <<
"Raster Importer: Dimensions found as follows:";
448 for (
auto const& dimension : found_dimensions) {
449 LOG(
WARNING) <<
"Raster Importer: " << dimension.first <<
"x" << dimension.second;
451 if (throw_on_error) {
452 throw std::runtime_error(
"Raster Importer: Raster file '" + file_name +
453 "' datasource/band dimensions are inconsistent. This file "
454 "cannot be imported into a single table.");
456 LOG(
WARNING) <<
"Raster Importer: Raster file '" << file_name
457 <<
"' datasource/band dimensions are inconsistent. This file "
458 "cannot be imported into a single table.";
460 }
else if (found_dimensions.size() == 1u) {
469 if (throw_on_error) {
470 throw std::runtime_error(
"Raster Importer: Raster file " + file_name +
471 " has no importable bands");
473 LOG(
ERROR) <<
"Raster Importer: Raster file " << file_name
474 <<
" has no importable bands";
480 auto const failed_datasource_count =
481 datasource_names_.size() - valid_datasource_names.size();
482 if (failed_datasource_count) {
483 LOG(
WARNING) <<
"Raster Importer: Failed to open " << failed_datasource_count
487 datasource_names_ = valid_datasource_names;
493 if (optimize_to_smallint && (
bands_width_ <= std::numeric_limits<int16_t>::max() &&
503 throw std::runtime_error(
504 "Raster Importer: Raster file has no geo-spatial reference metadata, unable to "
505 "transform points to World Space. Use raster_point_transform='none|file' "
510 throw std::runtime_error(
511 "Raster Importer: Cannot do World Transform with SMALLINT/INT Point type");
515 throw std::runtime_error(
516 "Raster Importer: Must do World Transform with POINT Point type");
522 throw std::runtime_error(
523 "Raster Importer: Raster file '" + file_name +
524 "' has band dimensions too large for 'SMALLINT' raster_point_type (" +
528 throw std::runtime_error(
529 "Raster Importer: Must do World Transform with raster_point_compute_angle");
536 throw std::runtime_error(
"Raster Import aborted. No bands to import.");
544 std::vector<Geospatial::GDAL::DataSourceUqPtr> datasource_thread_handles;
545 for (uint32_t i = 0; i < max_threads; i++) {
548 if (datasource_handle ==
nullptr) {
549 throw std::runtime_error(
"Raster Importer: Unable to open raster file " +
552 datasource_thread_handles.emplace_back(std::move(datasource_handle));
595 bool need_gcp_transformers{
false};
596 auto const* spatial_reference = global_datasource_handle->GetSpatialRef();
597 if (!spatial_reference) {
598 spatial_reference = global_datasource_handle->GetGCPSpatialRef();
599 if (spatial_reference) {
600 need_gcp_transformers =
true;
603 if (spatial_reference) {
606 OGRSpatialReference sr_geometry;
607 auto import_err = sr_geometry.importFromEPSG(srid);
608 if (import_err != OGRERR_NONE) {
609 throw std::runtime_error(
"Failed to create spatial reference for EPSG " +
612 #if GDAL_VERSION_MAJOR >= 3
613 sr_geometry.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
616 for (uint32_t i = 0; i < max_threads; i++) {
618 OGRCreateCoordinateTransformation(spatial_reference, &sr_geometry));
620 throw std::runtime_error(
621 "Failed to create coordinate system transformation to EPSG " +
624 if (need_gcp_transformers) {
643 names_and_sql_types.emplace_back(
"raster_point", sql_type);
645 names_and_sql_types.emplace_back(
"raster_lon", sql_type);
646 names_and_sql_types.emplace_back(
"raster_lat", sql_type);
649 names_and_sql_types.emplace_back(
"raster_angle",
kFLOAT);
652 names_and_sql_types.emplace_back(
"raster_x", sql_type);
653 names_and_sql_types.emplace_back(
"raster_y", sql_type);
656 return names_and_sql_types;
667 names_and_sql_types.emplace_back(info.alt_name, info.sql_type);
669 return names_and_sql_types;
673 const int band_idx)
const {
676 return {info.null_value, info.null_value_valid};
680 const uint32_t thread_idx,
684 double prev_mx{0.0}, prev_my{0.0};
688 double dx = double(x);
689 double dy = double(y);
696 double fdy = affine_transform_matrix_[3] + (dx * affine_transform_matrix_[4]) +
697 (dy * affine_transform_matrix_[5]);
711 1, &dx, &dy,
nullptr, &success);
713 throw std::runtime_error(
"Failed OGRCoordinateTransform");
720 coords[x] = {dx, dy, 0.0f};
733 auto const angle = atan2f(my - prev_my, mx - prev_mx) * (-180.0f /
M_PI);
738 std::get<2>(coords[x]) = angle;
740 std::get<2>(coords[0]) = angle;
751 const uint32_t band_idx,
762 auto const& datasource_handles_per_thread =
764 CHECK_LT(thread_idx, datasource_handles_per_thread.size());
765 auto const& datasource_handle = datasource_handles_per_thread[thread_idx];
766 CHECK(datasource_handle);
769 auto* band = datasource_handle->GetRasterBand(band_info.band_idx);
776 auto result = band->RasterIO(GF_Read,
781 raw_pixel_bytes.data(),
798 std::string driver_name = datasource->GetDriverName();
800 LOG(
INFO) <<
"Raster Importer: Using Raster Driver '" << driver_name <<
"'";
803 if (driver_name ==
"GTiff") {
809 datasource->GetMetadata(),
"TIFFTAG_IMAGEDESCRIPTION");
810 if (tifftag_imagedescription.length()) {
817 for (
auto const&
name : names) {
826 std::vector<std::string> names;
827 auto const raster_count = datasource->GetRasterCount();
828 for (
int i = 1; i <= raster_count; i++) {
829 auto* band = datasource->GetRasterBand(i);
831 auto const* description = band->GetDescription();
832 if (!description || strlen(description) == 0) {
836 names.push_back(description);
840 }
else if (driver_name ==
"netCDF" || driver_name ==
"Zarr") {
848 std::vector<std::string> tokens;
849 boost::split(tokens, datasource_name, boost::is_any_of(
":"));
850 if (tokens.size() < 3 || tokens[2].length() == 0u) {
851 LOG(
WARNING) <<
"Raster Importer: Failed to parse band name from datasource name";
857 }
else if (driver_name ==
"GRIB") {
863 std::vector<std::string> names;
864 auto const raster_count = datasource->GetRasterCount();
865 for (
int i = 1; i <= raster_count; i++) {
866 auto* band = datasource->GetRasterBand(i);
868 auto const grib_comment =
870 if (grib_comment.length() == 0) {
871 LOG(
WARNING) <<
"Raster Importer: Failed to parse band name from GRIB_COMMENT";
875 names.push_back(grib_comment);
882 const std::string& specified_band_names,
883 const std::string& specified_band_dimensions,
886 if (specified_band_names.length()) {
888 std::vector<std::string> names_and_alt_names;
889 boost::split(names_and_alt_names, specified_band_names, boost::is_any_of(
","));
892 for (
auto const& name_and_alt_name : names_and_alt_names) {
894 std::vector<std::string> tokens;
895 boost::split(tokens, name_and_alt_name, boost::is_any_of(
"="));
896 if (tokens.size() < 1 || tokens.size() > 2) {
897 throw std::runtime_error(
"Failed to parse specified name '" + name_and_alt_name +
901 auto const& alt_name =
strip(tokens[tokens.size() == 2 ? 1 : 0]);
912 for (
auto const& name_and_sql_type : names_and_sql_types) {
914 boost::algorithm::to_lower_copy(name_and_sql_type.first), 1);
918 if (specified_band_dimensions.length()) {
920 std::vector<std::string> values;
921 boost::split(values, specified_band_dimensions, boost::is_any_of(
",x "));
922 if (values.size() != 2u) {
923 throw std::invalid_argument(
"failed to parse width/height values from '" +
924 specified_band_dimensions +
"'");
927 size_t num_chars_w{0u}, num_chars_h{0u};
930 if (num_chars_w == 0u || num_chars_h == 0u) {
931 throw std::invalid_argument(
"empty width/height value");
934 throw std::invalid_argument(
"negative width/height value");
936 }
catch (std::invalid_argument& e) {
937 throw std::runtime_error(
"Raster Importer: Invalid specified dimensions (" +
938 std::string(e.what()) +
")");
939 }
catch (std::out_of_range& e) {
940 throw std::runtime_error(
"Raster Importer: Out-of-range specified dimensions (" +
941 std::string(e.what()) +
")");
946 for (
auto const& mci : metadata_column_infos) {
947 auto column_name_lower =
948 boost::algorithm::to_lower_copy(mci.column_descriptor.columnName);
951 throw std::runtime_error(
"Invalid metadata column name '" +
952 mci.column_descriptor.columnName +
953 "' (clashes with existing column name)");
983 const int band_idx) {
984 std::string band_name;
989 band_idx <= static_cast<int>(
raw_band_names_[datasource_idx].size())) {
995 if (band_name.length() == 0) {
1001 auto band_name_lower = boost::algorithm::to_lower_copy(band_name);
1004 auto const suffix = ++(itr->second);
1017 throw std::runtime_error(
"Specified import band name '" + itr.first +
1018 "' was not found in the input raster file");
PointTransform point_transform_
int specified_band_height_
const NamesAndSQLTypes getBandNamesAndSQLTypes() const
const NullValue getBandNullValue(const int band_idx) const
#define DEBUG_RASTER_IMPORT
static std::vector< std::string > unpackMetadata(char **metadata)
void getRawBandNamesForFormat(const Geospatial::GDAL::DataSourceUqPtr &datasource)
static void logMetadata(GDALMajorObject *object)
std::vector< std::tuple< double, double, float >> Coords
GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type)
const uint32_t getNumBands() const
SQLTypes point_type_to_sql_type(const RasterImporter::PointType point_type)
const PointTransform getPointTransform() const
std::vector< std::byte > RawPixels
bool shouldImportBandWithName(const std::string &name)
std::string suffix(SQLTypes type)
std::vector< std::vector< std::string > > raw_band_names_
#define LOG_IF(severity, condition)
std::vector< Geospatial::GDAL::CoordinateTransformationUqPtr > coordinate_transformations_
bool shouldImportBandWithDimensions(const int width, const int height)
void checkSpecifiedBandNamesFound() const
const NamesAndSQLTypes getPointNamesAndSQLTypes() const
std::vector< ImportBandInfo > import_band_infos_
std::vector< std::string > datasource_names_
void import(const uint32_t max_threads)
std::vector< std::string > get_ome_tiff_band_names(const std::string &tifftag_imagedescription)
void getRawPixels(const uint32_t thread_idx, const uint32_t band_idx, const int y_start, const int num_rows, const SQLTypes column_sql_type, RawPixels &raw_pixel_bytes)
int specified_band_width_
std::string sanitize_name(const std::string &name, const bool underscore=false)
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
std::map< std::string, bool > specified_band_names_map_
std::vector< std::vector< Geospatial::GDAL::DataSourceUqPtr > > datasource_handles_
bool point_compute_angle_
std::pair< double, bool > NullValue
const Coords getProjectedPixelCoords(const uint32_t thread_idx, const int y) const
std::vector< std::pair< std::string, SQLTypes >> NamesAndSQLTypes
std::map< std::string, int > column_name_repeats_map_
std::array< double, 6 > affine_transform_matrix_
void initializeFiltering(const std::string &specified_band_names, const std::string &specified_band_dimensions, const MetadataColumnInfos &metadata_column_infos)
std::string getBandName(const uint32_t datasource_idx, const int band_idx)
static DataSourceUqPtr openDataSource(const std::string &name, const import_export::SourceType source_type)
EXTENSION_NOINLINE double conv_4326_900913_y(const double y)
void detect(const std::string &file_name, const std::string &specified_band_names, const std::string &specified_band_dimensions, const PointType point_type, const PointTransform point_transform, const bool point_compute_angle, const bool throw_on_error, const MetadataColumnInfos &metadata_column_infos)
EXTENSION_NOINLINE double conv_4326_900913_x(const double x)
static std::string getMetadataString(char **metadata, const std::string &key)
SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type)
std::vector< std::unique_ptr< GCPTransformer > > gcp_transformers_
std::vector< MetadataColumnInfo > MetadataColumnInfos