OmniSciDB  dfae7c3b14
import_export::Importer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::Importer:
+ Collaboration diagram for import_export::Importer:

Classes

struct  GeoFileLayerInfo
 

Public Types

enum  GeoFileLayerContents { GeoFileLayerContents::EMPTY, GeoFileLayerContents::GEO, GeoFileLayerContents::NON_GEO, GeoFileLayerContents::UNSUPPORTED_GEO }
 

Public Member Functions

 Importer (Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
 
 Importer (Loader *providedLoader, const std::string &f, const CopyParams &p)
 
 ~Importer () override
 
ImportStatus import ()
 
ImportStatus importDelimited (const std::string &file_path, const bool decompressed) override
 
ImportStatus importGDAL (std::map< std::string, std::string > colname_to_src)
 
const CopyParamsget_copy_params () const
 
const std::list< const ColumnDescriptor * > & get_column_descs () const
 
void load (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count)
 
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > & get_import_buffers_vec ()
 
std::vector< std::unique_ptr< TypedImportBuffer > > & get_import_buffers (int i)
 
const bool * get_is_array () const
 
Catalog_Namespace::CataloggetCatalog ()
 
void checkpoint (const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
 
auto getLoader () const
 
- Public Member Functions inherited from import_export::DataStreamSink
 DataStreamSink ()
 
 DataStreamSink (const CopyParams &copy_params, const std::string file_path)
 
virtual ~DataStreamSink ()
 
const CopyParamsget_copy_params () const
 
void import_compressed (std::vector< std::string > &file_paths)
 

Static Public Member Functions

static ImportStatus get_import_status (const std::string &id)
 
static void set_import_status (const std::string &id, const ImportStatus is)
 
static const std::list< ColumnDescriptorgdalToColumnDescriptors (const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
 
static void readMetadataSampleGDAL (const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
 
static bool gdalFileExists (const std::string &path, const CopyParams &copy_params)
 
static bool gdalFileOrDirectoryExists (const std::string &path, const CopyParams &copy_params)
 
static std::vector< std::string > gdalGetAllFilesInArchive (const std::string &archive_path, const CopyParams &copy_params)
 
static std::vector< GeoFileLayerInfogdalGetLayersInGeoFile (const std::string &file_name, const CopyParams &copy_params)
 
static void set_geo_physical_import_buffer (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
 
static void set_geo_physical_import_buffer_columnar (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, int render_group, const int64_t replicate_count=0)
 

Static Private Member Functions

static bool gdalStatInternal (const std::string &path, const CopyParams &copy_params, bool also_dir)
 
static OGRDataSource * openGDALDataset (const std::string &fileName, const CopyParams &copy_params)
 
static void setGDALAuthorizationTokens (const CopyParams &copy_params)
 

Private Attributes

std::string import_id
 
size_t file_size
 
size_t max_threads
 
char * buffer [2]
 
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
 
std::unique_ptr< Loaderloader
 
std::unique_ptr< bool[]> is_array_a
 

Additional Inherited Members

- Protected Member Functions inherited from import_export::DataStreamSink
ImportStatus archivePlumber ()
 
- Protected Attributes inherited from import_export::DataStreamSink
CopyParams copy_params
 
const std::string file_path
 
FILE * p_file = nullptr
 
ImportStatus import_status
 
bool load_failed = false
 
size_t total_file_size {0}
 
std::vector< size_t > file_offsets
 
std::mutex file_offsets_mutex
 

Detailed Description

Definition at line 730 of file Importer.h.

Member Enumeration Documentation

◆ GeoFileLayerContents

Enumerator
EMPTY 
GEO 
NON_GEO 
UNSUPPORTED_GEO 

Definition at line 776 of file Importer.h.

776 { EMPTY, GEO, NON_GEO, UNSUPPORTED_GEO };

Constructor & Destructor Documentation

◆ Importer() [1/2]

import_export::Importer::Importer ( Catalog_Namespace::Catalog c,
const TableDescriptor t,
const std::string &  f,
const CopyParams p 
)

Definition at line 149 of file Importer.cpp.

153  : Importer(new Loader(c, t), f, p) {}
Importer(Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
Definition: Importer.cpp:149

◆ Importer() [2/2]

import_export::Importer::Importer ( Loader providedLoader,
const std::string &  f,
const CopyParams p 
)

Definition at line 155 of file Importer.cpp.

References buffer, import_export::DataStreamSink::file_path, file_size, import_id, is_array_a, kARRAY, loader, max_threads, and import_export::DataStreamSink::p_file.

156  : DataStreamSink(p, f), loader(providedLoader) {
157  import_id = boost::filesystem::path(file_path).filename().string();
158  file_size = 0;
159  max_threads = 0;
160  p_file = nullptr;
161  buffer[0] = nullptr;
162  buffer[1] = nullptr;
163  // we may be overallocating a little more memory here due to dropping phy cols.
164  // it shouldn't be an issue because iteration of it is not supposed to go OOB.
165  auto is_array = std::unique_ptr<bool[]>(new bool[loader->get_column_descs().size()]);
166  int i = 0;
167  bool has_array = false;
168  // TODO: replace this ugly way of skipping phy cols once if isPhyGeo is defined
169  int skip_physical_cols = 0;
170  for (auto& p : loader->get_column_descs()) {
171  // phy geo columns can't be in input file
172  if (skip_physical_cols-- > 0) {
173  continue;
174  }
175  // neither are rowid or $deleted$
176  // note: columns can be added after rowid/$deleted$
177  if (p->isVirtualCol || p->isDeletedCol) {
178  continue;
179  }
180  skip_physical_cols = p->columnType.get_physical_cols();
181  if (p->columnType.get_type() == kARRAY) {
182  is_array.get()[i] = true;
183  has_array = true;
184  } else {
185  is_array.get()[i] = false;
186  }
187  ++i;
188  }
189  if (has_array) {
190  is_array_a = std::unique_ptr<bool[]>(is_array.release());
191  } else {
192  is_array_a = std::unique_ptr<bool[]>(nullptr);
193  }
194 }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:825
std::string import_id
Definition: Importer.h:819
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648

◆ ~Importer()

import_export::Importer::~Importer ( )
override

Definition at line 196 of file Importer.cpp.

References buffer, and import_export::DataStreamSink::p_file.

196  {
197  if (p_file != nullptr) {
198  fclose(p_file);
199  }
200  if (buffer[0] != nullptr) {
201  free(buffer[0]);
202  }
203  if (buffer[1] != nullptr) {
204  free(buffer[1]);
205  }
206 }

Member Function Documentation

◆ checkpoint()

void import_export::Importer::checkpoint ( const std::vector< Catalog_Namespace::TableEpochInfo > &  table_epochs)

Definition at line 3268 of file Importer.cpp.

References DEBUG_TIMING, Data_Namespace::DISK_LEVEL, logger::ERROR, measure< TimeT >::execution(), StorageType::FOREIGN_TABLE, import_buffers_vec, logger::INFO, import_export::DataStreamSink::load_failed, loader, and LOG.

Referenced by importDelimited(), and importGDAL().

3269  {
3270  if (loader->getTableDesc()->storageType != StorageType::FOREIGN_TABLE) {
3271  if (load_failed) {
3272  // rollback to starting epoch - undo all the added records
3273  loader->setTableEpochs(table_epochs);
3274  } else {
3275  loader->checkpoint();
3276  }
3277  }
3278 
3279  if (loader->getTableDesc()->persistenceLevel ==
3280  Data_Namespace::MemoryLevel::DISK_LEVEL) { // only checkpoint disk-resident tables
3281  auto ms = measure<>::execution([&]() {
3282  if (!load_failed) {
3283  for (auto& p : import_buffers_vec[0]) {
3284  if (!p->stringDictCheckpoint()) {
3285  LOG(ERROR) << "Checkpointing Dictionary for Column "
3286  << p->getColumnDesc()->columnName << " failed.";
3287  load_failed = true;
3288  break;
3289  }
3290  }
3291  }
3292  });
3293  if (DEBUG_TIMING) {
3294  LOG(INFO) << "Dictionary Checkpointing took " << (double)ms / 1000.0 << " Seconds."
3295  << std::endl;
3296  }
3297  }
3298 }
#define LOG(tag)
Definition: Logger.h:188
#define DEBUG_TIMING
Definition: Importer.cpp:138
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
static constexpr char const * FOREIGN_TABLE
std::unique_ptr< Loader > loader
Definition: Importer.h:824
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalFileExists()

bool import_export::Importer::gdalFileExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4561 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::check_geospatial_files(), DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4561  {
4562  return gdalStatInternal(path, copy_params, false);
4563 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4528
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalFileOrDirectoryExists()

bool import_export::Importer::gdalFileOrDirectoryExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4566 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4567  {
4568  return gdalStatInternal(path, copy_params, true);
4569 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4528
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalGetAllFilesInArchive()

std::vector< std::string > import_export::Importer::gdalGetAllFilesInArchive ( const std::string &  archive_path,
const CopyParams copy_params 
)
static

Definition at line 4638 of file Importer.cpp.

References import_export::gdalGatherFilesInArchiveRecursive(), import_export::GDAL::init(), and setGDALAuthorizationTokens().

Referenced by anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive(), and DBHandler::get_all_files_in_archive().

4640  {
4641  // lazy init GDAL
4642  GDAL::init();
4643 
4644  // set authorization tokens
4646 
4647  // prepare to gather files
4648  std::vector<std::string> files;
4649 
4650  // gather the files recursively
4651  gdalGatherFilesInArchiveRecursive(archive_path, files);
4652 
4653  // convert to relative paths inside archive
4654  for (auto& file : files) {
4655  file.erase(0, archive_path.size() + 1); // remove archive_path and the slash
4656  }
4657 
4658  // done
4659  return files;
4660 }
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4178
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4571
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalGetLayersInGeoFile()

std::vector< Importer::GeoFileLayerInfo > import_export::Importer::gdalGetLayersInGeoFile ( const std::string &  file_name,
const CopyParams copy_params 
)
static

Definition at line 4663 of file Importer.cpp.

References CHECK, EMPTY, GEO, import_export::CopyParams::geo_explode_collections, import_export::GDAL::init(), NON_GEO, openGDALDataset(), setGDALAuthorizationTokens(), and UNSUPPORTED_GEO.

Referenced by DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4665  {
4666  // lazy init GDAL
4667  GDAL::init();
4668 
4669  // set authorization tokens
4671 
4672  // prepare to gather layer info
4673  std::vector<GeoFileLayerInfo> layer_info;
4674 
4675  // open the data set
4677  if (poDS == nullptr) {
4678  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4679  file_name);
4680  }
4681 
4682  // enumerate the layers
4683  for (auto&& poLayer : poDS->GetLayers()) {
4685  // prepare to read this layer
4686  poLayer->ResetReading();
4687  // skip layer if empty
4688  if (poLayer->GetFeatureCount() > 0) {
4689  // get first feature
4690  OGRFeatureUqPtr first_feature(poLayer->GetNextFeature());
4691  CHECK(first_feature);
4692  // check feature for geometry
4693  const OGRGeometry* geometry = first_feature->GetGeometryRef();
4694  if (!geometry) {
4695  // layer has no geometry
4696  contents = GeoFileLayerContents::NON_GEO;
4697  } else {
4698  // check the geometry type
4699  const OGRwkbGeometryType geometry_type = geometry->getGeometryType();
4700  switch (wkbFlatten(geometry_type)) {
4701  case wkbPoint:
4702  case wkbLineString:
4703  case wkbPolygon:
4704  case wkbMultiPolygon:
4705  // layer has supported geo
4706  contents = GeoFileLayerContents::GEO;
4707  break;
4708  case wkbMultiPoint:
4709  case wkbMultiLineString:
4710  // supported if geo_explode_collections is specified
4714  break;
4715  default:
4716  // layer has unsupported geometry
4718  break;
4719  }
4720  }
4721  }
4722  // store info for this layer
4723  layer_info.emplace_back(poLayer->GetName(), contents);
4724  }
4725 
4726  // done
4727  return layer_info;
4728 }
static void init()
Definition: GDAL.cpp:56
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:93
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:102
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4178
#define CHECK(condition)
Definition: Logger.h:197
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4248
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalStatInternal()

bool import_export::Importer::gdalStatInternal ( const std::string &  path,
const CopyParams copy_params,
bool  also_dir 
)
staticprivate

Definition at line 4528 of file Importer.cpp.

References import_export::GDAL::init(), run_benchmark_import::result, and setGDALAuthorizationTokens().

Referenced by gdalFileExists(), and gdalFileOrDirectoryExists().

4530  {
4531  // lazy init GDAL
4532  GDAL::init();
4533 
4534  // set authorization tokens
4536 
4537 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4538  // clear GDAL stat cache
4539  // without this, file existence will be cached, even if authentication changes
4540  // supposed to be available from GDAL 2.2.1 but our CentOS build disagrees
4541  VSICurlClearCache();
4542 #endif
4543 
4544  // stat path
4545  VSIStatBufL sb;
4546  int result = VSIStatExL(path.c_str(), &sb, VSI_STAT_EXISTS_FLAG);
4547  if (result < 0) {
4548  return false;
4549  }
4550 
4551  // exists?
4552  if (also_dir && (VSI_ISREG(sb.st_mode) || VSI_ISDIR(sb.st_mode))) {
4553  return true;
4554  } else if (VSI_ISREG(sb.st_mode)) {
4555  return true;
4556  }
4557  return false;
4558 }
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4178
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalToColumnDescriptors()

const std::list< ColumnDescriptor > import_export::Importer::gdalToColumnDescriptors ( const std::string &  fileName,
const std::string &  geoColumnName,
const CopyParams copy_params 
)
static

Definition at line 4438 of file Importer.cpp.

References CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::CopyParams::geo_coords_comp_param, import_export::CopyParams::geo_coords_encoding, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_coords_type, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), kARRAY, kENCODING_DICT, kMULTIPOLYGON, kPOLYGON, kTEXT, import_export::ogr_to_type(), openGDALDataset(), SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), and ColumnDescriptor::sourceName.

Referenced by DBHandler::detect_column_types().

4441  {
4442  std::list<ColumnDescriptor> cds;
4443 
4445  if (poDS == nullptr) {
4446  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4447  file_name);
4448  }
4449 
4450  OGRLayer& layer =
4452 
4453  layer.ResetReading();
4454  // TODO(andrewseidl): support multiple features
4455  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4456  if (poFeature == nullptr) {
4457  throw std::runtime_error("No features found in " + file_name);
4458  }
4459  // get fields as regular columns
4460  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4461  CHECK(poFDefn);
4462  int iField;
4463  for (iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4464  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4465  auto typePair = ogr_to_type(poFieldDefn->GetType());
4466  ColumnDescriptor cd;
4467  cd.columnName = poFieldDefn->GetNameRef();
4468  cd.sourceName = poFieldDefn->GetNameRef();
4469  SQLTypeInfo ti;
4470  if (typePair.second) {
4471  ti.set_type(kARRAY);
4472  ti.set_subtype(typePair.first);
4473  } else {
4474  ti.set_type(typePair.first);
4475  }
4476  if (typePair.first == kTEXT) {
4478  ti.set_comp_param(32);
4479  }
4480  ti.set_fixed_size();
4481  cd.columnType = ti;
4482  cds.push_back(cd);
4483  }
4484  // get geo column, if any
4485  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4486  if (poGeometry) {
4487  ColumnDescriptor cd;
4488  cd.columnName = geo_column_name;
4489  cd.sourceName = geo_column_name;
4490 
4491  // get GDAL type
4492  auto ogr_type = wkbFlatten(poGeometry->getGeometryType());
4493 
4494  // if exploding, override any collection type to child type
4496  if (ogr_type == wkbMultiPolygon) {
4497  ogr_type = wkbPolygon;
4498  } else if (ogr_type == wkbMultiLineString) {
4499  ogr_type = wkbLineString;
4500  } else if (ogr_type == wkbMultiPoint) {
4501  ogr_type = wkbPoint;
4502  }
4503  }
4504 
4505  // convert to internal type
4506  SQLTypes geoType = ogr_to_type(ogr_type);
4507 
4508  // for now, we promote POLYGON to MULTIPOLYGON (unless exploding)
4510  geoType = (geoType == kPOLYGON) ? kMULTIPOLYGON : geoType;
4511  }
4512 
4513  // build full internal type
4514  SQLTypeInfo ti;
4515  ti.set_type(geoType);
4521  cd.columnType = ti;
4522 
4523  cds.push_back(cd);
4524  }
4525  return cds;
4526 }
void set_compression(EncodingType c)
Definition: sqltypes.h:359
SQLTypes
Definition: sqltypes.h:40
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4281
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:350
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:93
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:102
std::string sourceName
void set_input_srid(int d)
Definition: sqltypes.h:353
void set_fixed_size()
Definition: sqltypes.h:358
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
specifies the content in-memory of a row in the column metadata table
void set_output_srid(int s)
Definition: sqltypes.h:355
void set_comp_param(int p)
Definition: sqltypes.h:360
std::string geo_layer_name
Definition: CopyParams.h:78
Definition: sqltypes.h:54
#define CHECK(condition)
Definition: Logger.h:197
SQLTypeInfo columnType
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4248
std::string columnName
std::pair< SQLTypes, bool > ogr_to_type(const OGRFieldType &ogr_type)
Definition: Importer.cpp:4383
EncodingType geo_coords_encoding
Definition: CopyParams.h:73
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:349
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_column_descs()

const std::list<const ColumnDescriptor*>& import_export::Importer::get_column_descs ( ) const
inline

Definition at line 743 of file Importer.h.

Referenced by import_export::DataStreamSink::archivePlumber(), import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

743  {
744  return loader->get_column_descs();
745  }
std::unique_ptr< Loader > loader
Definition: Importer.h:824
+ Here is the caller graph for this function:

◆ get_copy_params()

const CopyParams& import_export::Importer::get_copy_params ( ) const
inline

Definition at line 742 of file Importer.h.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

742 { return copy_params; }
+ Here is the caller graph for this function:

◆ get_import_buffers()

std::vector<std::unique_ptr<TypedImportBuffer> >& import_export::Importer::get_import_buffers ( int  i)
inline

Definition at line 751 of file Importer.h.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

751  {
752  return import_buffers_vec[i];
753  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
+ Here is the caller graph for this function:

◆ get_import_buffers_vec()

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > >& import_export::Importer::get_import_buffers_vec ( )
inline

Definition at line 748 of file Importer.h.

748  {
749  return import_buffers_vec;
750  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823

◆ get_import_status()

ImportStatus import_export::Importer::get_import_status ( const std::string &  id)
static

Definition at line 208 of file Importer.cpp.

Referenced by DBHandler::import_table_status().

208  {
209  mapd_shared_lock<mapd_shared_mutex> read_lock(status_mutex);
210  return import_status_map.at(import_id);
211 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:147
std::string import_id
Definition: Importer.h:819
mapd_shared_lock< mapd_shared_mutex > read_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:146
+ Here is the caller graph for this function:

◆ get_is_array()

const bool* import_export::Importer::get_is_array ( ) const
inline

Definition at line 754 of file Importer.h.

Referenced by import_export::import_thread_delimited().

754 { return is_array_a.get(); }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:825
+ Here is the caller graph for this function:

◆ getCatalog()

Catalog_Namespace::Catalog& import_export::Importer::getCatalog ( )
inline

Definition at line 786 of file Importer.h.

Referenced by import_export::Loader::checkpoint(), import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), import_export::Loader::getTableEpochs(), import_export::import_thread_delimited(), and import_export::Loader::setTableEpochs().

786 { return loader->getCatalog(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:824
+ Here is the caller graph for this function:

◆ getLoader()

auto import_export::Importer::getLoader ( ) const
inline

Definition at line 810 of file Importer.h.

Referenced by import_export::DataStreamSink::archivePlumber().

810 { return loader.get(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:824
+ Here is the caller graph for this function:

◆ import()

ImportStatus import_export::Importer::import ( )

Definition at line 3945 of file Importer.cpp.

References import_export::DataStreamSink::archivePlumber().

3945  {
3947 }
+ Here is the call graph for this function:

◆ importDelimited()

ImportStatus import_export::Importer::importDelimited ( const std::string &  file_path,
const bool  decompressed 
)
overridevirtual

Implements import_export::DataStreamSink.

Definition at line 3949 of file Importer.cpp.

References import_export::CopyParams::buffer_size, CHECK, checkpoint(), import_export::DataStreamSink::copy_params, logger::ERROR, import_export::DataStreamSink::file_offsets, import_export::DataStreamSink::file_offsets_mutex, file_size, import_export::delimited_parser::find_row_end_pos(), import_export::CopyParams::geo_assign_render_groups, import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_delimited(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, import_export::DataStreamSink::p_file, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), logger::thread_id(), import_export::CopyParams::threads, import_export::DataStreamSink::total_file_size, and VLOG.

Referenced by import_export::DataStreamSink::import_compressed().

3950  {
3951  bool load_truncated = false;
3953 
3954  if (!p_file) {
3955  p_file = fopen(file_path.c_str(), "rb");
3956  }
3957  if (!p_file) {
3958  throw std::runtime_error("failed to open file '" + file_path +
3959  "': " + strerror(errno));
3960  }
3961 
3962  if (!decompressed) {
3963  (void)fseek(p_file, 0, SEEK_END);
3964  file_size = ftell(p_file);
3965  }
3966 
3967  if (copy_params.threads == 0) {
3968  max_threads = static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF));
3969  } else {
3970  max_threads = static_cast<size_t>(copy_params.threads);
3971  }
3972 
3973  // deal with small files
3974  size_t alloc_size = copy_params.buffer_size;
3975  if (!decompressed && file_size < alloc_size) {
3976  alloc_size = file_size;
3977  }
3978 
3979  for (size_t i = 0; i < max_threads; i++) {
3980  import_buffers_vec.emplace_back();
3981  for (const auto cd : loader->get_column_descs()) {
3982  import_buffers_vec[i].emplace_back(
3983  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
3984  }
3985  }
3986 
3987  auto scratch_buffer = std::make_unique<char[]>(alloc_size);
3988  size_t current_pos = 0;
3989  size_t end_pos;
3990  size_t begin_pos = 0;
3991 
3992  (void)fseek(p_file, current_pos, SEEK_SET);
3993  size_t size =
3994  fread(reinterpret_cast<void*>(scratch_buffer.get()), 1, alloc_size, p_file);
3995 
3996  // make render group analyzers for each poly column
3997  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
3999  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
4000  loader->getTableDesc()->tableId, false, false, false);
4001  for (auto cd : columnDescriptors) {
4002  SQLTypes ct = cd->columnType.get_type();
4003  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4004  auto rga = std::make_shared<RenderGroupAnalyzer>();
4005  rga->seedFromExistingTableContents(loader, cd->columnName);
4006  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4007  }
4008  }
4009  }
4010 
4011  ChunkKey chunkKey = {loader->getCatalog().getCurrentDB().dbId,
4012  loader->getTableDesc()->tableId};
4013  auto table_epochs = loader->getTableEpochs();
4014  {
4015  std::list<std::future<ImportStatus>> threads;
4016 
4017  // use a stack to track thread_ids which must not overlap among threads
4018  // because thread_id is used to index import_buffers_vec[]
4019  std::stack<size_t> stack_thread_ids;
4020  for (size_t i = 0; i < max_threads; i++) {
4021  stack_thread_ids.push(i);
4022  }
4023  // added for true row index on error
4024  size_t first_row_index_this_buffer = 0;
4025 
4026  while (size > 0) {
4027  unsigned int num_rows_this_buffer = 0;
4028  CHECK(scratch_buffer);
4029  end_pos = delimited_parser::find_row_end_pos(alloc_size,
4030  scratch_buffer,
4031  size,
4032  copy_params,
4033  first_row_index_this_buffer,
4034  num_rows_this_buffer,
4035  p_file);
4036 
4037  // unput residual
4038  int nresidual = size - end_pos;
4039  std::unique_ptr<char[]> unbuf;
4040  if (nresidual > 0) {
4041  unbuf = std::make_unique<char[]>(nresidual);
4042  memcpy(unbuf.get(), scratch_buffer.get() + end_pos, nresidual);
4043  }
4044 
4045  // get a thread_id not in use
4046  auto thread_id = stack_thread_ids.top();
4047  stack_thread_ids.pop();
4048  // LOG(INFO) << " stack_thread_ids.pop " << thread_id << std::endl;
4049 
4050  threads.push_back(std::async(std::launch::async,
4052  thread_id,
4053  this,
4054  std::move(scratch_buffer),
4055  begin_pos,
4056  end_pos,
4057  end_pos,
4058  columnIdToRenderGroupAnalyzerMap,
4059  first_row_index_this_buffer));
4060 
4061  first_row_index_this_buffer += num_rows_this_buffer;
4062 
4063  current_pos += end_pos;
4064  scratch_buffer = std::make_unique<char[]>(alloc_size);
4065  CHECK(scratch_buffer);
4066  memcpy(scratch_buffer.get(), unbuf.get(), nresidual);
4067  size = nresidual +
4068  fread(scratch_buffer.get() + nresidual, 1, alloc_size - nresidual, p_file);
4069 
4070  begin_pos = 0;
4071 
4072  while (threads.size() > 0) {
4073  int nready = 0;
4074  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4075  it != threads.end();) {
4076  auto& p = *it;
4077  std::chrono::milliseconds span(
4078  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4079  if (p.wait_for(span) == std::future_status::ready) {
4080  auto ret_import_status = p.get();
4081  import_status += ret_import_status;
4082  // sum up current total file offsets
4083  size_t total_file_offset{0};
4084  if (decompressed) {
4085  std::unique_lock<std::mutex> lock(file_offsets_mutex);
4086  for (const auto file_offset : file_offsets) {
4087  total_file_offset += file_offset;
4088  }
4089  }
4090  // estimate number of rows per current total file offset
4091  if (decompressed ? total_file_offset : current_pos) {
4093  (decompressed ? (float)total_file_size / total_file_offset
4094  : (float)file_size / current_pos) *
4096  }
4097  VLOG(3) << "rows_completed " << import_status.rows_completed
4098  << ", rows_estimated " << import_status.rows_estimated
4099  << ", total_file_size " << total_file_size << ", total_file_offset "
4100  << total_file_offset;
4102  // recall thread_id for reuse
4103  stack_thread_ids.push(ret_import_status.thread_id);
4104  threads.erase(it++);
4105  ++nready;
4106  } else {
4107  ++it;
4108  }
4109  }
4110 
4111  if (nready == 0) {
4112  std::this_thread::yield();
4113  }
4114 
4115  // on eof, wait all threads to finish
4116  if (0 == size) {
4117  continue;
4118  }
4119 
4120  // keep reading if any free thread slot
4121  // this is one of the major difference from old threading model !!
4122  if (threads.size() < max_threads) {
4123  break;
4124  }
4125  }
4126 
4128  load_truncated = true;
4129  load_failed = true;
4130  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4131  break;
4132  }
4133  if (load_failed) {
4134  load_truncated = true;
4135  LOG(ERROR) << "A call to the Loader::load failed, Please review the logs for "
4136  "more details";
4137  break;
4138  }
4139  }
4140 
4141  // join dangling threads in case of LOG(ERROR) above
4142  for (auto& p : threads) {
4143  p.wait();
4144  }
4145  }
4146 
4147  checkpoint(table_epochs);
4148 
4149  // must set import_status.load_truncated before closing this end of pipe
4150  // otherwise, the thread on the other end would throw an unwanted 'write()'
4151  // exception
4152  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
4153  import_status.load_truncated = load_truncated;
4154 
4155  fclose(p_file);
4156  p_file = nullptr;
4157  return import_status;
4158 }
SQLTypes
Definition: sqltypes.h:40
#define LOG(tag)
Definition: Logger.h:188
std::map< int, std::shared_ptr< RenderGroupAnalyzer > > ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:135
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:213
static ImportStatus import_thread_delimited(int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
Definition: Importer.cpp:1796
std::string import_id
Definition: Importer.h:819
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3268
ThreadId thread_id()
Definition: Logger.cpp:731
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int > ChunkKey
Definition: types.h:37
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:146
std::vector< size_t > file_offsets
Definition: Importer.h:653
size_t find_row_end_pos(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FILE *file, foreign_storage::CsvReader *csv_reader)
Finds the closest possible row ending to the end of the given buffer. The buffer is resized as needed...
#define VLOG(n)
Definition: Logger.h:291
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ importGDAL()

ImportStatus import_export::Importer::importGDAL ( std::map< std::string, std::string >  colname_to_src)

Definition at line 4730 of file Importer.cpp.

References CHECK, CHECK_EQ, checkpoint(), import_export::DataStreamSink::copy_params, logger::ERROR, import_export::CopyParams::geo_assign_render_groups, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_shapefile(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, openGDALDataset(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, set_import_status(), logger::thread_id(), and import_export::CopyParams::threads.

Referenced by QueryRunner::ImportDriver::importGeoTable().

4731  {
4732  // initial status
4733  bool load_truncated = false;
4735 
4737  if (poDS == nullptr) {
4738  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4739  file_path);
4740  }
4741 
4742  OGRLayer& layer =
4744 
4745  // get the number of features in this layer
4746  size_t numFeatures = layer.GetFeatureCount();
4747 
4748  // build map of metadata field (additional columns) name to index
4749  // use shared_ptr since we need to pass it to the worker
4750  FieldNameToIndexMapType fieldNameToIndexMap;
4751  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4752  CHECK(poFDefn);
4753  size_t numFields = poFDefn->GetFieldCount();
4754  for (size_t iField = 0; iField < numFields; iField++) {
4755  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4756  fieldNameToIndexMap.emplace(std::make_pair(poFieldDefn->GetNameRef(), iField));
4757  }
4758 
4759  // the geographic spatial reference we want to put everything in
4760  OGRSpatialReferenceUqPtr poGeographicSR(new OGRSpatialReference());
4761  poGeographicSR->importFromEPSG(copy_params.geo_coords_srid);
4762 
4763 #if GDAL_VERSION_MAJOR >= 3
4764  // GDAL 3.x (really Proj.4 6.x) now enforces lat, lon order
4765  // this results in X and Y being transposed for angle-based
4766  // coordinate systems. This restores the previous behavior.
4767  poGeographicSR->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
4768 #endif
4769 
4770 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4771  // just one "thread"
4772  size_t max_threads = 1;
4773 #else
4774  // how many threads to use
4775  size_t max_threads = 0;
4776  if (copy_params.threads == 0) {
4777  max_threads = sysconf(_SC_NPROCESSORS_CONF);
4778  } else {
4779  max_threads = copy_params.threads;
4780  }
4781 #endif
4782 
4783  // make an import buffer for each thread
4784  CHECK_EQ(import_buffers_vec.size(), 0u);
4785  import_buffers_vec.resize(max_threads);
4786  for (size_t i = 0; i < max_threads; i++) {
4787  for (const auto cd : loader->get_column_descs()) {
4788  import_buffers_vec[i].emplace_back(
4789  new TypedImportBuffer(cd, loader->getStringDict(cd)));
4790  }
4791  }
4792 
4793  // make render group analyzers for each poly column
4794  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
4796  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
4797  loader->getTableDesc()->tableId, false, false, false);
4798  for (auto cd : columnDescriptors) {
4799  SQLTypes ct = cd->columnType.get_type();
4800  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4801  auto rga = std::make_shared<RenderGroupAnalyzer>();
4802  rga->seedFromExistingTableContents(loader, cd->columnName);
4803  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4804  }
4805  }
4806  }
4807 
4808 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4809  // threads
4810  std::list<std::future<ImportStatus>> threads;
4811 
4812  // use a stack to track thread_ids which must not overlap among threads
4813  // because thread_id is used to index import_buffers_vec[]
4814  std::stack<size_t> stack_thread_ids;
4815  for (size_t i = 0; i < max_threads; i++) {
4816  stack_thread_ids.push(i);
4817  }
4818 #endif
4819 
4820  // checkpoint the table
4821  auto table_epochs = loader->getTableEpochs();
4822 
4823  // reset the layer
4824  layer.ResetReading();
4825 
4826  static const size_t MAX_FEATURES_PER_CHUNK = 1000;
4827 
4828  // make a features buffer for each thread
4829  std::vector<FeaturePtrVector> features(max_threads);
4830 
4831  // for each feature...
4832  size_t firstFeatureThisChunk = 0;
4833  while (firstFeatureThisChunk < numFeatures) {
4834  // how many features this chunk
4835  size_t numFeaturesThisChunk =
4836  std::min(MAX_FEATURES_PER_CHUNK, numFeatures - firstFeatureThisChunk);
4837 
4838 // get a thread_id not in use
4839 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4840  size_t thread_id = 0;
4841 #else
4842  auto thread_id = stack_thread_ids.top();
4843  stack_thread_ids.pop();
4844  CHECK(thread_id < max_threads);
4845 #endif
4846 
4847  // fill features buffer for new thread
4848  for (size_t i = 0; i < numFeaturesThisChunk; i++) {
4849  features[thread_id].emplace_back(layer.GetNextFeature());
4850  }
4851 
4852 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4853  // call worker function directly
4854  auto ret_import_status = import_thread_shapefile(0,
4855  this,
4856  poGeographicSR.get(),
4857  std::move(features[thread_id]),
4858  firstFeatureThisChunk,
4859  numFeaturesThisChunk,
4860  fieldNameToIndexMap,
4861  columnNameToSourceNameMap,
4862  columnIdToRenderGroupAnalyzerMap);
4863  import_status += ret_import_status;
4864  import_status.rows_estimated = ((float)firstFeatureThisChunk / (float)numFeatures) *
4865  import_status.rows_completed;
4866  set_import_status(import_id, import_status);
4867 #else
4868  // fire up that thread to import this geometry
4869  threads.push_back(std::async(std::launch::async,
4871  thread_id,
4872  this,
4873  poGeographicSR.get(),
4874  std::move(features[thread_id]),
4875  firstFeatureThisChunk,
4876  numFeaturesThisChunk,
4877  fieldNameToIndexMap,
4878  columnNameToSourceNameMap,
4879  columnIdToRenderGroupAnalyzerMap));
4880 
4881  // let the threads run
4882  while (threads.size() > 0) {
4883  int nready = 0;
4884  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4885  it != threads.end();) {
4886  auto& p = *it;
4887  std::chrono::milliseconds span(
4888  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4889  if (p.wait_for(span) == std::future_status::ready) {
4890  auto ret_import_status = p.get();
4891  import_status += ret_import_status;
4892  import_status.rows_estimated =
4893  ((float)firstFeatureThisChunk / (float)numFeatures) *
4894  import_status.rows_completed;
4895  set_import_status(import_id, import_status);
4896 
4897  // recall thread_id for reuse
4898  stack_thread_ids.push(ret_import_status.thread_id);
4899 
4900  threads.erase(it++);
4901  ++nready;
4902  } else {
4903  ++it;
4904  }
4905  }
4906 
4907  if (nready == 0) {
4908  std::this_thread::yield();
4909  }
4910 
4911  // keep reading if any free thread slot
4912  // this is one of the major difference from old threading model !!
4913  if (threads.size() < max_threads) {
4914  break;
4915  }
4916  }
4917 #endif
4918 
4919  // out of rows?
4920  if (import_status.rows_rejected > copy_params.max_reject) {
4921  load_truncated = true;
4922  load_failed = true;
4923  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4924  break;
4925  }
4926 
4927  // failed?
4928  if (load_failed) {
4929  load_truncated = true;
4930  LOG(ERROR)
4931  << "A call to the Loader::load failed, Please review the logs for more details";
4932  break;
4933  }
4934 
4935  firstFeatureThisChunk += numFeaturesThisChunk;
4936  }
4937 
4938 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4939  // wait for any remaining threads
4940  if (threads.size()) {
4941  for (auto& p : threads) {
4942  // wait for the thread
4943  p.wait();
4944  // get the result and update the final import status
4945  auto ret_import_status = p.get();
4946  import_status += ret_import_status;
4947  import_status.rows_estimated = import_status.rows_completed;
4948  set_import_status(import_id, import_status);
4949  }
4950  }
4951 #endif
4952 
4953  checkpoint(table_epochs);
4954 
4955  // must set import_status.load_truncated before closing this end of pipe
4956  // otherwise, the thread on the other end would throw an unwanted 'write()'
4957  // exception
4958  import_status.load_truncated = load_truncated;
4959  return import_status;
4960 }
std::map< std::string, size_t > FieldNameToIndexMapType
Definition: Importer.cpp:132
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SQLTypes
Definition: sqltypes.h:40
std::unique_ptr< OGRSpatialReference, OGRSpatialReferenceDeleter > OGRSpatialReferenceUqPtr
Definition: Importer.cpp:112
#define LOG(tag)
Definition: Logger.h:188
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4281
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:93
std::map< int, std::shared_ptr< RenderGroupAnalyzer > > ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:135
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
std::string geo_layer_name
Definition: CopyParams.h:78
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:213
std::string import_id
Definition: Importer.h:819
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3268
ThreadId thread_id()
Definition: Logger.cpp:731
#define CHECK(condition)
Definition: Logger.h:197
static ImportStatus import_thread_shapefile(int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
Definition: Importer.cpp:2120
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4248
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ load()

void import_export::Importer::load ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t  row_count 
)

Definition at line 3261 of file Importer.cpp.

References import_export::DataStreamSink::load_failed, and loader.

Referenced by import_export::DataStreamSink::archivePlumber(), import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

3262  {
3263  if (!loader->loadNoCheckpoint(import_buffers, row_count)) {
3264  load_failed = true;
3265  }
3266 }
std::unique_ptr< Loader > loader
Definition: Importer.h:824
+ Here is the caller graph for this function:

◆ openGDALDataset()

OGRDataSource * import_export::Importer::openGDALDataset ( const std::string &  fileName,
const CopyParams copy_params 
)
staticprivate

Definition at line 4248 of file Importer.cpp.

References logger::ERROR, logger::INFO, import_export::GDAL::init(), LOG, and setGDALAuthorizationTokens().

Referenced by gdalGetLayersInGeoFile(), gdalToColumnDescriptors(), importGDAL(), and readMetadataSampleGDAL().

4249  {
4250  // lazy init GDAL
4251  GDAL::init();
4252 
4253  // set authorization tokens
4255 
4256  // open the file
4257  OGRDataSource* poDS;
4258 #if GDAL_VERSION_MAJOR == 1
4259  poDS = (OGRDataSource*)OGRSFDriverRegistrar::Open(file_name.c_str(), false);
4260 #else
4261  poDS = (OGRDataSource*)GDALOpenEx(
4262  file_name.c_str(), GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4263  if (poDS == nullptr) {
4264  poDS = (OGRDataSource*)GDALOpenEx(
4265  file_name.c_str(), GDAL_OF_READONLY | GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4266  if (poDS) {
4267  LOG(INFO) << "openGDALDataset had to open as read-only";
4268  }
4269  }
4270 #endif
4271  if (poDS == nullptr) {
4272  LOG(ERROR) << "openGDALDataset Error: " << CPLGetLastErrorMsg();
4273  }
4274  // NOTE(adb): If extending this function, refactor to ensure any errors will not result
4275  // in a memory leak if GDAL successfully opened the input dataset.
4276  return poDS;
4277 }
#define LOG(tag)
Definition: Logger.h:188
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4178
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ readMetadataSampleGDAL()

void import_export::Importer::readMetadataSampleGDAL ( const std::string &  fileName,
const std::string &  geoColumnName,
std::map< std::string, std::vector< std::string >> &  metadata,
int  rowLimit,
const CopyParams copy_params 
)
static

Definition at line 4304 of file Importer.cpp.

References CHECK, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), and openGDALDataset().

Referenced by DBHandler::detect_column_types().

4309  {
4311  if (poDS == nullptr) {
4312  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4313  file_name);
4314  }
4315 
4316  OGRLayer& layer =
4318 
4319  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4320  CHECK(poFDefn);
4321 
4322  // typeof GetFeatureCount() is different between GDAL 1.x (int32_t) and 2.x (int64_t)
4323  auto nFeats = layer.GetFeatureCount();
4324  size_t numFeatures =
4325  std::max(static_cast<decltype(nFeats)>(0),
4326  std::min(static_cast<decltype(nFeats)>(rowLimit), nFeats));
4327  for (auto iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4328  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4329  // FIXME(andrewseidl): change this to the faster one used by readVerticesFromGDAL
4330  metadata.emplace(poFieldDefn->GetNameRef(), std::vector<std::string>(numFeatures));
4331  }
4332  metadata.emplace(geo_column_name, std::vector<std::string>(numFeatures));
4333  layer.ResetReading();
4334  size_t iFeature = 0;
4335  while (iFeature < numFeatures) {
4336  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4337  if (!poFeature) {
4338  break;
4339  }
4340 
4341  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4342  if (poGeometry != nullptr) {
4343  // validate geom type (again?)
4344  switch (wkbFlatten(poGeometry->getGeometryType())) {
4345  case wkbPoint:
4346  case wkbLineString:
4347  case wkbPolygon:
4348  case wkbMultiPolygon:
4349  break;
4350  case wkbMultiPoint:
4351  case wkbMultiLineString:
4352  // supported if geo_explode_collections is specified
4354  throw std::runtime_error("Unsupported geometry type: " +
4355  std::string(poGeometry->getGeometryName()));
4356  }
4357  break;
4358  default:
4359  throw std::runtime_error("Unsupported geometry type: " +
4360  std::string(poGeometry->getGeometryName()));
4361  }
4362 
4363  // populate metadata for regular fields
4364  for (auto i : metadata) {
4365  auto iField = poFeature->GetFieldIndex(i.first.c_str());
4366  if (iField >= 0) { // geom is -1
4367  metadata[i.first].at(iFeature) =
4368  std::string(poFeature->GetFieldAsString(iField));
4369  }
4370  }
4371 
4372  // populate metadata for geo column with WKT string
4373  char* wkts = nullptr;
4374  poGeometry->exportToWkt(&wkts);
4375  CHECK(wkts);
4376  metadata[geo_column_name].at(iFeature) = wkts;
4377  CPLFree(wkts);
4378  }
4379  iFeature++;
4380  }
4381 }
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4281
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:93
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:102
std::string geo_layer_name
Definition: CopyParams.h:78
#define CHECK(condition)
Definition: Logger.h:197
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4248
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_geo_physical_import_buffer()

void import_export::Importer::set_geo_physical_import_buffer ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< double > &  coords,
std::vector< double > &  bounds,
std::vector< int > &  ring_sizes,
std::vector< int > &  poly_rings,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1421 of file Importer.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), Parser::AddColumnStmt::execute(), import_export::import_thread_delimited(), DBHandler::load_table(), and foreign_storage::csv_file_buffer_parser::process_geo_column().

1431  {
1432  const auto col_ti = cd->columnType;
1433  const auto col_type = col_ti.get_type();
1434  auto columnId = cd->columnId;
1435  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1436  bool is_null_geo = false;
1437  bool is_null_point = false;
1438  if (!col_ti.get_notnull()) {
1439  // Check for NULL geo
1440  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1441  is_null_point = true;
1442  coords.clear();
1443  }
1444  is_null_geo = coords.empty();
1445  if (is_null_point) {
1446  coords.push_back(NULL_ARRAY_DOUBLE);
1447  coords.push_back(NULL_DOUBLE);
1448  // Treating POINT coords as notnull, need to store actual encoding
1449  // [un]compressed+[not]null
1450  is_null_geo = false;
1451  }
1452  }
1453  TDatum tdd_coords;
1454  // Get the raw data representing [optionally compressed] non-NULL geo's coords.
1455  // One exception - NULL POINT geo: coords need to be processed to encode nullness
1456  // in a fixlen array, compressed and uncompressed.
1457  if (!is_null_geo) {
1458  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(coords, col_ti);
1459  tdd_coords.val.arr_val.reserve(compressed_coords.size());
1460  for (auto cc : compressed_coords) {
1461  tdd_coords.val.arr_val.emplace_back();
1462  tdd_coords.val.arr_val.back().val.int_val = cc;
1463  }
1464  }
1465  tdd_coords.is_null = is_null_geo;
1466  import_buffers[col_idx++]->add_value(cd_coords, tdd_coords, false, replicate_count);
1467 
1468  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1469  // Create ring_sizes array value and add it to the physical column
1470  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1471  TDatum tdd_ring_sizes;
1472  tdd_ring_sizes.val.arr_val.reserve(ring_sizes.size());
1473  if (!is_null_geo) {
1474  for (auto ring_size : ring_sizes) {
1475  tdd_ring_sizes.val.arr_val.emplace_back();
1476  tdd_ring_sizes.val.arr_val.back().val.int_val = ring_size;
1477  }
1478  }
1479  tdd_ring_sizes.is_null = is_null_geo;
1480  import_buffers[col_idx++]->add_value(
1481  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1482  }
1483 
1484  if (col_type == kMULTIPOLYGON) {
1485  // Create poly_rings array value and add it to the physical column
1486  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1487  TDatum tdd_poly_rings;
1488  tdd_poly_rings.val.arr_val.reserve(poly_rings.size());
1489  if (!is_null_geo) {
1490  for (auto num_rings : poly_rings) {
1491  tdd_poly_rings.val.arr_val.emplace_back();
1492  tdd_poly_rings.val.arr_val.back().val.int_val = num_rings;
1493  }
1494  }
1495  tdd_poly_rings.is_null = is_null_geo;
1496  import_buffers[col_idx++]->add_value(
1497  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1498  }
1499 
1500  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1501  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1502  TDatum tdd_bounds;
1503  tdd_bounds.val.arr_val.reserve(bounds.size());
1504  if (!is_null_geo) {
1505  for (auto b : bounds) {
1506  tdd_bounds.val.arr_val.emplace_back();
1507  tdd_bounds.val.arr_val.back().val.real_val = b;
1508  }
1509  }
1510  tdd_bounds.is_null = is_null_geo;
1511  import_buffers[col_idx++]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1512  }
1513 
1514  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1515  // Create render_group value and add it to the physical column
1516  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1517  TDatum td_render_group;
1518  td_render_group.val.int_val = render_group;
1519  td_render_group.is_null = is_null_geo;
1520  import_buffers[col_idx++]->add_value(
1521  cd_render_group, td_render_group, false, replicate_count);
1522  }
1523 }
#define NULL_DOUBLE
Definition: sqltypes.h:186
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
SQLTypeInfo columnType
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_geo_physical_import_buffer_columnar()

void import_export::Importer::set_geo_physical_import_buffer_columnar ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< std::vector< double >> &  coords_column,
std::vector< std::vector< double >> &  bounds_column,
std::vector< std::vector< int >> &  ring_sizes_column,
std::vector< std::vector< int >> &  poly_rings_column,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1525 of file Importer.cpp.

References CHECK, ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by DBHandler::load_table_binary_columnar().

1535  {
1536  const auto col_ti = cd->columnType;
1537  const auto col_type = col_ti.get_type();
1538  auto columnId = cd->columnId;
1539 
1540  auto coords_row_count = coords_column.size();
1541  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1542  for (auto coords : coords_column) {
1543  bool is_null_geo = false;
1544  bool is_null_point = false;
1545  if (!col_ti.get_notnull()) {
1546  // Check for NULL geo
1547  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1548  is_null_point = true;
1549  coords.clear();
1550  }
1551  is_null_geo = coords.empty();
1552  if (is_null_point) {
1553  coords.push_back(NULL_ARRAY_DOUBLE);
1554  coords.push_back(NULL_DOUBLE);
1555  // Treating POINT coords as notnull, need to store actual encoding
1556  // [un]compressed+[not]null
1557  is_null_geo = false;
1558  }
1559  }
1560  std::vector<TDatum> td_coords_data;
1561  if (!is_null_geo) {
1562  std::vector<uint8_t> compressed_coords =
1563  Geospatial::compress_coords(coords, col_ti);
1564  for (auto cc : compressed_coords) {
1565  TDatum td_byte;
1566  td_byte.val.int_val = cc;
1567  td_coords_data.push_back(td_byte);
1568  }
1569  }
1570  TDatum tdd_coords;
1571  tdd_coords.val.arr_val = td_coords_data;
1572  tdd_coords.is_null = is_null_geo;
1573  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false, replicate_count);
1574  }
1575  col_idx++;
1576 
1577  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1578  if (ring_sizes_column.size() != coords_row_count) {
1579  CHECK(false) << "Geometry import columnar: ring sizes column size mismatch";
1580  }
1581  // Create ring_sizes array value and add it to the physical column
1582  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1583  for (auto ring_sizes : ring_sizes_column) {
1584  bool is_null_geo = false;
1585  if (!col_ti.get_notnull()) {
1586  // Check for NULL geo
1587  is_null_geo = ring_sizes.empty();
1588  }
1589  std::vector<TDatum> td_ring_sizes;
1590  for (auto ring_size : ring_sizes) {
1591  TDatum td_ring_size;
1592  td_ring_size.val.int_val = ring_size;
1593  td_ring_sizes.push_back(td_ring_size);
1594  }
1595  TDatum tdd_ring_sizes;
1596  tdd_ring_sizes.val.arr_val = td_ring_sizes;
1597  tdd_ring_sizes.is_null = is_null_geo;
1598  import_buffers[col_idx]->add_value(
1599  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1600  }
1601  col_idx++;
1602  }
1603 
1604  if (col_type == kMULTIPOLYGON) {
1605  if (poly_rings_column.size() != coords_row_count) {
1606  CHECK(false) << "Geometry import columnar: poly rings column size mismatch";
1607  }
1608  // Create poly_rings array value and add it to the physical column
1609  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1610  for (auto poly_rings : poly_rings_column) {
1611  bool is_null_geo = false;
1612  if (!col_ti.get_notnull()) {
1613  // Check for NULL geo
1614  is_null_geo = poly_rings.empty();
1615  }
1616  std::vector<TDatum> td_poly_rings;
1617  for (auto num_rings : poly_rings) {
1618  TDatum td_num_rings;
1619  td_num_rings.val.int_val = num_rings;
1620  td_poly_rings.push_back(td_num_rings);
1621  }
1622  TDatum tdd_poly_rings;
1623  tdd_poly_rings.val.arr_val = td_poly_rings;
1624  tdd_poly_rings.is_null = is_null_geo;
1625  import_buffers[col_idx]->add_value(
1626  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1627  }
1628  col_idx++;
1629  }
1630 
1631  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1632  if (bounds_column.size() != coords_row_count) {
1633  CHECK(false) << "Geometry import columnar: bounds column size mismatch";
1634  }
1635  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1636  for (auto bounds : bounds_column) {
1637  bool is_null_geo = false;
1638  if (!col_ti.get_notnull()) {
1639  // Check for NULL geo
1640  is_null_geo = (bounds.empty() || bounds[0] == NULL_ARRAY_DOUBLE);
1641  }
1642  std::vector<TDatum> td_bounds_data;
1643  for (auto b : bounds) {
1644  TDatum td_double;
1645  td_double.val.real_val = b;
1646  td_bounds_data.push_back(td_double);
1647  }
1648  TDatum tdd_bounds;
1649  tdd_bounds.val.arr_val = td_bounds_data;
1650  tdd_bounds.is_null = is_null_geo;
1651  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1652  }
1653  col_idx++;
1654  }
1655 
1656  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1657  // Create render_group value and add it to the physical column
1658  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1659  TDatum td_render_group;
1660  td_render_group.val.int_val = render_group;
1661  td_render_group.is_null = false;
1662  for (decltype(coords_row_count) i = 0; i < coords_row_count; i++) {
1663  import_buffers[col_idx]->add_value(
1664  cd_render_group, td_render_group, false, replicate_count);
1665  }
1666  col_idx++;
1667  }
1668 }
#define NULL_DOUBLE
Definition: sqltypes.h:186
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
SQLTypeInfo columnType
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ set_import_status()

void import_export::Importer::set_import_status ( const std::string &  id,
const ImportStatus  is 
)
static

Definition at line 213 of file Importer.cpp.

References import_export::ImportStatus::elapsed, import_export::ImportStatus::end, import_id, and import_export::ImportStatus::start.

Referenced by importDelimited(), and importGDAL().

213  {
214  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
215  is.end = std::chrono::steady_clock::now();
216  is.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(is.end - is.start);
218 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:147
std::string import_id
Definition: Importer.h:819
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:146
+ Here is the caller graph for this function:

◆ setGDALAuthorizationTokens()

void import_export::Importer::setGDALAuthorizationTokens ( const CopyParams copy_params)
staticprivate

Definition at line 4178 of file Importer.cpp.

References logger::INFO, LOG, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, and import_export::CopyParams::s3_secret_key.

Referenced by gdalGetAllFilesInArchive(), gdalGetLayersInGeoFile(), gdalStatInternal(), and openGDALDataset().

4178  {
4179  // for now we only support S3
4180  // @TODO generalize CopyParams to have a dictionary of GDAL tokens
4181  // only set if non-empty to allow GDAL defaults to persist
4182  // explicitly clear if empty to revert to default and not reuse a previous session's
4183  // keys
4184  if (copy_params.s3_region.size()) {
4185 #if DEBUG_AWS_AUTHENTICATION
4186  LOG(INFO) << "GDAL: Setting AWS_REGION to '" << copy_params.s3_region << "'";
4187 #endif
4188  CPLSetConfigOption("AWS_REGION", copy_params.s3_region.c_str());
4189  } else {
4190 #if DEBUG_AWS_AUTHENTICATION
4191  LOG(INFO) << "GDAL: Clearing AWS_REGION";
4192 #endif
4193  CPLSetConfigOption("AWS_REGION", nullptr);
4194  }
4195  if (copy_params.s3_endpoint.size()) {
4196 #if DEBUG_AWS_AUTHENTICATION
4197  LOG(INFO) << "GDAL: Setting AWS_S3_ENDPOINT to '" << copy_params.s3_endpoint << "'";
4198 #endif
4199  CPLSetConfigOption("AWS_S3_ENDPOINT", copy_params.s3_endpoint.c_str());
4200  } else {
4201 #if DEBUG_AWS_AUTHENTICATION
4202  LOG(INFO) << "GDAL: Clearing AWS_S3_ENDPOINT";
4203 #endif
4204  CPLSetConfigOption("AWS_S3_ENDPOINT", nullptr);
4205  }
4206  if (copy_params.s3_access_key.size()) {
4207 #if DEBUG_AWS_AUTHENTICATION
4208  LOG(INFO) << "GDAL: Setting AWS_ACCESS_KEY_ID to '" << copy_params.s3_access_key
4209  << "'";
4210 #endif
4211  CPLSetConfigOption("AWS_ACCESS_KEY_ID", copy_params.s3_access_key.c_str());
4212  } else {
4213 #if DEBUG_AWS_AUTHENTICATION
4214  LOG(INFO) << "GDAL: Clearing AWS_ACCESS_KEY_ID";
4215 #endif
4216  CPLSetConfigOption("AWS_ACCESS_KEY_ID", nullptr);
4217  }
4218  if (copy_params.s3_secret_key.size()) {
4219 #if DEBUG_AWS_AUTHENTICATION
4220  LOG(INFO) << "GDAL: Setting AWS_SECRET_ACCESS_KEY to '" << copy_params.s3_secret_key
4221  << "'";
4222 #endif
4223  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", copy_params.s3_secret_key.c_str());
4224  } else {
4225 #if DEBUG_AWS_AUTHENTICATION
4226  LOG(INFO) << "GDAL: Clearing AWS_SECRET_ACCESS_KEY";
4227 #endif
4228  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", nullptr);
4229  }
4230 
4231 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4232  // if we haven't set keys, we need to disable signed access
4233  if (copy_params.s3_access_key.size() || copy_params.s3_secret_key.size()) {
4234 #if DEBUG_AWS_AUTHENTICATION
4235  LOG(INFO) << "GDAL: Clearing AWS_NO_SIGN_REQUEST";
4236 #endif
4237  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", nullptr);
4238  } else {
4239 #if DEBUG_AWS_AUTHENTICATION
4240  LOG(INFO) << "GDAL: Setting AWS_NO_SIGN_REQUEST to 'YES'";
4241 #endif
4242  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", "YES");
4243  }
4244 #endif
4245 }
std::string s3_secret_key
Definition: CopyParams.h:63
#define LOG(tag)
Definition: Logger.h:188
std::string s3_access_key
Definition: CopyParams.h:62
+ Here is the caller graph for this function:

Member Data Documentation

◆ buffer

◆ file_size

size_t import_export::Importer::file_size
private

Definition at line 820 of file Importer.h.

Referenced by importDelimited(), and Importer().

◆ import_buffers_vec

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > > import_export::Importer::import_buffers_vec
private

◆ import_id

◆ is_array_a

std::unique_ptr<bool[]> import_export::Importer::is_array_a
private

Definition at line 825 of file Importer.h.

Referenced by Importer().

◆ loader

std::unique_ptr<Loader> import_export::Importer::loader
private

◆ max_threads

size_t import_export::Importer::max_threads
private

The documentation for this class was generated from the following files: