OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export::Importer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::Importer:
+ Collaboration diagram for import_export::Importer:

Classes

struct  GeoFileLayerInfo
 

Public Types

enum  GeoFileLayerContents { GeoFileLayerContents::EMPTY, GeoFileLayerContents::GEO, GeoFileLayerContents::NON_GEO, GeoFileLayerContents::UNSUPPORTED_GEO }
 

Public Member Functions

 Importer (Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
 
 Importer (Loader *providedLoader, const std::string &f, const CopyParams &p)
 
 ~Importer () override
 
ImportStatus import ()
 
ImportStatus importDelimited (const std::string &file_path, const bool decompressed) override
 
ImportStatus importGDAL (std::map< std::string, std::string > colname_to_src)
 
const CopyParamsget_copy_params () const
 
const std::list< const
ColumnDescriptor * > & 
get_column_descs () const
 
void load (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count)
 
std::vector< std::vector
< std::unique_ptr
< TypedImportBuffer > > > & 
get_import_buffers_vec ()
 
std::vector< std::unique_ptr
< TypedImportBuffer > > & 
get_import_buffers (int i)
 
const bool * get_is_array () const
 
Catalog_Namespace::CataloggetCatalog ()
 
void checkpoint (const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
 
auto getLoader () const
 
- Public Member Functions inherited from import_export::DataStreamSink
 DataStreamSink ()
 
 DataStreamSink (const CopyParams &copy_params, const std::string file_path)
 
virtual ~DataStreamSink ()
 
const CopyParamsget_copy_params () const
 
void import_compressed (std::vector< std::string > &file_paths)
 

Static Public Member Functions

static ImportStatus get_import_status (const std::string &id)
 
static void set_import_status (const std::string &id, const ImportStatus is)
 
static const std::list
< ColumnDescriptor
gdalToColumnDescriptors (const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
 
static void readMetadataSampleGDAL (const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
 
static bool gdalFileExists (const std::string &path, const CopyParams &copy_params)
 
static bool gdalFileOrDirectoryExists (const std::string &path, const CopyParams &copy_params)
 
static std::vector< std::string > gdalGetAllFilesInArchive (const std::string &archive_path, const CopyParams &copy_params)
 
static std::vector
< GeoFileLayerInfo
gdalGetLayersInGeoFile (const std::string &file_name, const CopyParams &copy_params)
 
static void set_geo_physical_import_buffer (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
 
static void set_geo_physical_import_buffer_columnar (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, int render_group, const int64_t replicate_count=0)
 

Static Private Member Functions

static bool gdalStatInternal (const std::string &path, const CopyParams &copy_params, bool also_dir)
 
static OGRDataSource * openGDALDataset (const std::string &fileName, const CopyParams &copy_params)
 
static void setGDALAuthorizationTokens (const CopyParams &copy_params)
 

Private Attributes

std::string import_id
 
size_t file_size
 
size_t max_threads
 
char * buffer [2]
 
std::vector< std::vector
< std::unique_ptr
< TypedImportBuffer > > > 
import_buffers_vec
 
std::unique_ptr< Loaderloader
 
std::unique_ptr< bool[]> is_array_a
 

Additional Inherited Members

- Protected Member Functions inherited from import_export::DataStreamSink
ImportStatus archivePlumber ()
 
- Protected Attributes inherited from import_export::DataStreamSink
CopyParams copy_params
 
const std::string file_path
 
FILE * p_file = nullptr
 
ImportStatus import_status
 
bool load_failed = false
 
size_t total_file_size {0}
 
std::vector< size_t > file_offsets
 
std::mutex file_offsets_mutex
 

Detailed Description

Definition at line 730 of file Importer.h.

Member Enumeration Documentation

Enumerator
EMPTY 
GEO 
NON_GEO 
UNSUPPORTED_GEO 

Definition at line 776 of file Importer.h.

776 { EMPTY, GEO, NON_GEO, UNSUPPORTED_GEO };

Constructor & Destructor Documentation

import_export::Importer::Importer ( Catalog_Namespace::Catalog c,
const TableDescriptor t,
const std::string &  f,
const CopyParams p 
)

Definition at line 153 of file Importer.cpp.

157  : Importer(new Loader(c, t), f, p) {}
Importer(Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
Definition: Importer.cpp:153
import_export::Importer::Importer ( Loader providedLoader,
const std::string &  f,
const CopyParams p 
)

Definition at line 159 of file Importer.cpp.

References buffer, import_export::DataStreamSink::file_path, file_size, import_id, is_array_a, kARRAY, loader, max_threads, and import_export::DataStreamSink::p_file.

160  : DataStreamSink(p, f), loader(providedLoader) {
161  import_id = boost::filesystem::path(file_path).filename().string();
162  file_size = 0;
163  max_threads = 0;
164  p_file = nullptr;
165  buffer[0] = nullptr;
166  buffer[1] = nullptr;
167  // we may be overallocating a little more memory here due to dropping phy cols.
168  // it shouldn't be an issue because iteration of it is not supposed to go OOB.
169  auto is_array = std::unique_ptr<bool[]>(new bool[loader->get_column_descs().size()]);
170  int i = 0;
171  bool has_array = false;
172  // TODO: replace this ugly way of skipping phy cols once if isPhyGeo is defined
173  int skip_physical_cols = 0;
174  for (auto& p : loader->get_column_descs()) {
175  // phy geo columns can't be in input file
176  if (skip_physical_cols-- > 0) {
177  continue;
178  }
179  // neither are rowid or $deleted$
180  // note: columns can be added after rowid/$deleted$
181  if (p->isVirtualCol || p->isDeletedCol) {
182  continue;
183  }
184  skip_physical_cols = p->columnType.get_physical_cols();
185  if (p->columnType.get_type() == kARRAY) {
186  is_array.get()[i] = true;
187  has_array = true;
188  } else {
189  is_array.get()[i] = false;
190  }
191  ++i;
192  }
193  if (has_array) {
194  is_array_a = std::unique_ptr<bool[]>(is_array.release());
195  } else {
196  is_array_a = std::unique_ptr<bool[]>(nullptr);
197  }
198 }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:825
std::string import_id
Definition: Importer.h:819
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648
import_export::Importer::~Importer ( )
override

Definition at line 200 of file Importer.cpp.

References buffer, and import_export::DataStreamSink::p_file.

200  {
201  if (p_file != nullptr) {
202  fclose(p_file);
203  }
204  if (buffer[0] != nullptr) {
205  free(buffer[0]);
206  }
207  if (buffer[1] != nullptr) {
208  free(buffer[1]);
209  }
210 }

Member Function Documentation

void import_export::Importer::checkpoint ( const std::vector< Catalog_Namespace::TableEpochInfo > &  table_epochs)

Definition at line 3297 of file Importer.cpp.

References DEBUG_TIMING, Data_Namespace::DISK_LEVEL, logger::ERROR, measure< TimeT >::execution(), StorageType::FOREIGN_TABLE, import_buffers_vec, logger::INFO, import_export::DataStreamSink::load_failed, loader, and LOG.

Referenced by importDelimited(), and importGDAL().

3298  {
3299  if (loader->getTableDesc()->storageType != StorageType::FOREIGN_TABLE) {
3300  if (load_failed) {
3301  // rollback to starting epoch - undo all the added records
3302  loader->setTableEpochs(table_epochs);
3303  } else {
3304  loader->checkpoint();
3305  }
3306  }
3307 
3308  if (loader->getTableDesc()->persistenceLevel ==
3309  Data_Namespace::MemoryLevel::DISK_LEVEL) { // only checkpoint disk-resident tables
3310  auto ms = measure<>::execution([&]() {
3311  if (!load_failed) {
3312  for (auto& p : import_buffers_vec[0]) {
3313  if (!p->stringDictCheckpoint()) {
3314  LOG(ERROR) << "Checkpointing Dictionary for Column "
3315  << p->getColumnDesc()->columnName << " failed.";
3316  load_failed = true;
3317  break;
3318  }
3319  }
3320  }
3321  });
3322  if (DEBUG_TIMING) {
3323  LOG(INFO) << "Dictionary Checkpointing took " << (double)ms / 1000.0 << " Seconds."
3324  << std::endl;
3325  }
3326  }
3327 }
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
#define LOG(tag)
Definition: Logger.h:188
#define DEBUG_TIMING
Definition: Importer.cpp:142
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
static constexpr char const * FOREIGN_TABLE
std::unique_ptr< Loader > loader
Definition: Importer.h:824

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalFileExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4599 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::check_geospatial_files(), DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4599  {
4600  return gdalStatInternal(path, copy_params, false);
4601 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4566

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalFileOrDirectoryExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4604 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4605  {
4606  return gdalStatInternal(path, copy_params, true);
4607 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4566

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::string > import_export::Importer::gdalGetAllFilesInArchive ( const std::string &  archive_path,
const CopyParams copy_params 
)
static

Definition at line 4676 of file Importer.cpp.

References import_export::gdalGatherFilesInArchiveRecursive(), Geospatial::GDAL::init(), and setGDALAuthorizationTokens().

Referenced by anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive(), and DBHandler::get_all_files_in_archive().

4678  {
4679  // lazy init GDAL
4681 
4682  // set authorization tokens
4684 
4685  // prepare to gather files
4686  std::vector<std::string> files;
4687 
4688  // gather the files recursively
4689  gdalGatherFilesInArchiveRecursive(archive_path, files);
4690 
4691  // convert to relative paths inside archive
4692  for (auto& file : files) {
4693  file.erase(0, archive_path.size() + 1); // remove archive_path and the slash
4694  }
4695 
4696  // done
4697  return files;
4698 }
static void init()
Definition: GDAL.cpp:59
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4216
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4609

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< Importer::GeoFileLayerInfo > import_export::Importer::gdalGetLayersInGeoFile ( const std::string &  file_name,
const CopyParams copy_params 
)
static

Definition at line 4701 of file Importer.cpp.

References CHECK, EMPTY, GEO, import_export::CopyParams::geo_explode_collections, Geospatial::GDAL::init(), NON_GEO, openGDALDataset(), setGDALAuthorizationTokens(), and UNSUPPORTED_GEO.

Referenced by DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4703  {
4704  // lazy init GDAL
4706 
4707  // set authorization tokens
4709 
4710  // prepare to gather layer info
4711  std::vector<GeoFileLayerInfo> layer_info;
4712 
4713  // open the data set
4715  if (poDS == nullptr) {
4716  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4717  file_name);
4718  }
4719 
4720  // enumerate the layers
4721  for (auto&& poLayer : poDS->GetLayers()) {
4723  // prepare to read this layer
4724  poLayer->ResetReading();
4725  // skip layer if empty
4726  if (poLayer->GetFeatureCount() > 0) {
4727  // get first feature
4728  OGRFeatureUqPtr first_feature(poLayer->GetNextFeature());
4729  CHECK(first_feature);
4730  // check feature for geometry
4731  const OGRGeometry* geometry = first_feature->GetGeometryRef();
4732  if (!geometry) {
4733  // layer has no geometry
4734  contents = GeoFileLayerContents::NON_GEO;
4735  } else {
4736  // check the geometry type
4737  const OGRwkbGeometryType geometry_type = geometry->getGeometryType();
4738  switch (wkbFlatten(geometry_type)) {
4739  case wkbPoint:
4740  case wkbLineString:
4741  case wkbPolygon:
4742  case wkbMultiPolygon:
4743  // layer has supported geo
4744  contents = GeoFileLayerContents::GEO;
4745  break;
4746  case wkbMultiPoint:
4747  case wkbMultiLineString:
4748  // supported if geo_explode_collections is specified
4752  break;
4753  default:
4754  // layer has unsupported geometry
4756  break;
4757  }
4758  }
4759  }
4760  // store info for this layer
4761  layer_info.emplace_back(poLayer->GetName(), contents);
4762  }
4763 
4764  // done
4765  return layer_info;
4766 }
static void init()
Definition: GDAL.cpp:59
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:97
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:106
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4216
#define CHECK(condition)
Definition: Logger.h:197
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4286

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalStatInternal ( const std::string &  path,
const CopyParams copy_params,
bool  also_dir 
)
staticprivate

Definition at line 4566 of file Importer.cpp.

References Geospatial::GDAL::init(), run_benchmark_import::result, and setGDALAuthorizationTokens().

Referenced by gdalFileExists(), and gdalFileOrDirectoryExists().

4568  {
4569  // lazy init GDAL
4571 
4572  // set authorization tokens
4574 
4575 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4576  // clear GDAL stat cache
4577  // without this, file existence will be cached, even if authentication changes
4578  // supposed to be available from GDAL 2.2.1 but our CentOS build disagrees
4579  VSICurlClearCache();
4580 #endif
4581 
4582  // stat path
4583  VSIStatBufL sb;
4584  int result = VSIStatExL(path.c_str(), &sb, VSI_STAT_EXISTS_FLAG);
4585  if (result < 0) {
4586  return false;
4587  }
4588 
4589  // exists?
4590  if (also_dir && (VSI_ISREG(sb.st_mode) || VSI_ISDIR(sb.st_mode))) {
4591  return true;
4592  } else if (VSI_ISREG(sb.st_mode)) {
4593  return true;
4594  }
4595  return false;
4596 }
static void init()
Definition: GDAL.cpp:59
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4216

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::list< ColumnDescriptor > import_export::Importer::gdalToColumnDescriptors ( const std::string &  fileName,
const std::string &  geoColumnName,
const CopyParams copy_params 
)
static

Definition at line 4476 of file Importer.cpp.

References CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::CopyParams::geo_coords_comp_param, import_export::CopyParams::geo_coords_encoding, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_coords_type, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), kARRAY, kENCODING_DICT, kMULTIPOLYGON, kPOLYGON, kTEXT, import_export::ogr_to_type(), openGDALDataset(), import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), and ColumnDescriptor::sourceName.

Referenced by DBHandler::detect_column_types().

4479  {
4480  std::list<ColumnDescriptor> cds;
4481 
4483  if (poDS == nullptr) {
4484  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4485  file_name);
4486  }
4487 
4488  OGRLayer& layer =
4490 
4491  layer.ResetReading();
4492  // TODO(andrewseidl): support multiple features
4493  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4494  if (poFeature == nullptr) {
4495  throw std::runtime_error("No features found in " + file_name);
4496  }
4497  // get fields as regular columns
4498  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4499  CHECK(poFDefn);
4500  int iField;
4501  for (iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4502  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4503  auto typePair = ogr_to_type(poFieldDefn->GetType());
4504  ColumnDescriptor cd;
4505  cd.columnName = poFieldDefn->GetNameRef();
4506  cd.sourceName = poFieldDefn->GetNameRef();
4507  SQLTypeInfo ti;
4508  if (typePair.second) {
4509  ti.set_type(kARRAY);
4510  ti.set_subtype(typePair.first);
4511  } else {
4512  ti.set_type(typePair.first);
4513  }
4514  if (typePair.first == kTEXT) {
4516  ti.set_comp_param(32);
4517  }
4518  ti.set_fixed_size();
4519  cd.columnType = ti;
4520  cds.push_back(cd);
4521  }
4522  // get geo column, if any
4523  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4524  if (poGeometry) {
4525  ColumnDescriptor cd;
4526  cd.columnName = geo_column_name;
4527  cd.sourceName = geo_column_name;
4528 
4529  // get GDAL type
4530  auto ogr_type = wkbFlatten(poGeometry->getGeometryType());
4531 
4532  // if exploding, override any collection type to child type
4534  if (ogr_type == wkbMultiPolygon) {
4535  ogr_type = wkbPolygon;
4536  } else if (ogr_type == wkbMultiLineString) {
4537  ogr_type = wkbLineString;
4538  } else if (ogr_type == wkbMultiPoint) {
4539  ogr_type = wkbPoint;
4540  }
4541  }
4542 
4543  // convert to internal type
4544  SQLTypes geoType = ogr_to_type(ogr_type);
4545 
4546  // for now, we promote POLYGON to MULTIPOLYGON (unless exploding)
4548  geoType = (geoType == kPOLYGON) ? kMULTIPOLYGON : geoType;
4549  }
4550 
4551  // build full internal type
4552  SQLTypeInfo ti;
4553  ti.set_type(geoType);
4559  cd.columnType = ti;
4560 
4561  cds.push_back(cd);
4562  }
4563  return cds;
4564 }
void set_compression(EncodingType c)
Definition: sqltypes.h:411
SQLTypes
Definition: sqltypes.h:37
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4319
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:402
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:97
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:106
std::string sourceName
void set_input_srid(int d)
Definition: sqltypes.h:405
void set_fixed_size()
Definition: sqltypes.h:410
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:148
specifies the content in-memory of a row in the column metadata table
void set_output_srid(int s)
Definition: sqltypes.h:407
void set_comp_param(int p)
Definition: sqltypes.h:412
std::string geo_layer_name
Definition: CopyParams.h:78
Definition: sqltypes.h:51
#define CHECK(condition)
Definition: Logger.h:197
SQLTypeInfo columnType
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4286
std::string columnName
std::pair< SQLTypes, bool > ogr_to_type(const OGRFieldType &ogr_type)
Definition: Importer.cpp:4421
EncodingType geo_coords_encoding
Definition: CopyParams.h:73
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:401

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::list<const ColumnDescriptor*>& import_export::Importer::get_column_descs ( ) const
inline

Definition at line 743 of file Importer.h.

References loader.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

743  {
744  return loader->get_column_descs();
745  }
std::unique_ptr< Loader > loader
Definition: Importer.h:824

+ Here is the caller graph for this function:

const CopyParams& import_export::Importer::get_copy_params ( ) const
inline

Definition at line 742 of file Importer.h.

References import_export::DataStreamSink::copy_params.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

742 { return copy_params; }

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<TypedImportBuffer> >& import_export::Importer::get_import_buffers ( int  i)
inline

Definition at line 751 of file Importer.h.

References import_buffers_vec.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

751  {
752  return import_buffers_vec[i];
753  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823

+ Here is the caller graph for this function:

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > >& import_export::Importer::get_import_buffers_vec ( )
inline

Definition at line 748 of file Importer.h.

References import_buffers_vec.

748  {
749  return import_buffers_vec;
750  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
ImportStatus import_export::Importer::get_import_status ( const std::string &  id)
static

Definition at line 212 of file Importer.cpp.

References import_export::import_status_map, and import_export::status_mutex.

Referenced by DBHandler::import_table_status().

212  {
213  mapd_shared_lock<mapd_shared_mutex> read_lock(status_mutex);
214  return import_status_map.at(import_id);
215 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:151
std::string import_id
Definition: Importer.h:819
mapd_shared_lock< mapd_shared_mutex > read_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:150

+ Here is the caller graph for this function:

const bool* import_export::Importer::get_is_array ( ) const
inline

Definition at line 754 of file Importer.h.

References is_array_a.

Referenced by import_export::import_thread_delimited().

754 { return is_array_a.get(); }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:825

+ Here is the caller graph for this function:

Catalog_Namespace::Catalog& import_export::Importer::getCatalog ( )
inline

Definition at line 786 of file Importer.h.

References loader.

Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), and import_export::import_thread_delimited().

786 { return loader->getCatalog(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:824

+ Here is the caller graph for this function:

auto import_export::Importer::getLoader ( ) const
inline

Definition at line 810 of file Importer.h.

References loader.

810 { return loader.get(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:824
ImportStatus import_export::Importer::import ( )

Definition at line 3981 of file Importer.cpp.

References import_export::DataStreamSink::archivePlumber().

3981  {
3983 }

+ Here is the call graph for this function:

ImportStatus import_export::Importer::importDelimited ( const std::string &  file_path,
const bool  decompressed 
)
overridevirtual

Implements import_export::DataStreamSink.

Definition at line 3985 of file Importer.cpp.

References import_export::CopyParams::buffer_size, CHECK, checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, import_export::DataStreamSink::file_offsets, import_export::DataStreamSink::file_offsets_mutex, file_size, import_export::delimited_parser::find_row_end_pos(), omnisci::fopen(), g_max_import_threads, import_export::CopyParams::geo_assign_render_groups, SQLTypeInfo::get_type(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_delimited(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, import_export::DataStreamSink::p_file, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), import_export::status_mutex, logger::thread_id(), import_export::CopyParams::threads, import_export::DataStreamSink::total_file_size, and VLOG.

3986  {
3987  bool load_truncated = false;
3989 
3990  if (!p_file) {
3991  p_file = fopen(file_path.c_str(), "rb");
3992  }
3993  if (!p_file) {
3994  throw std::runtime_error("failed to open file '" + file_path +
3995  "': " + strerror(errno));
3996  }
3997 
3998  if (!decompressed) {
3999  (void)fseek(p_file, 0, SEEK_END);
4000  file_size = ftell(p_file);
4001  }
4002 
4003  if (copy_params.threads == 0) {
4004  max_threads = std::min(static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF)),
4006  } else {
4007  max_threads = static_cast<size_t>(copy_params.threads);
4008  }
4009  VLOG(1) << "Delimited import # threads: " << max_threads;
4010 
4011  // deal with small files
4012  size_t alloc_size = copy_params.buffer_size;
4013  if (!decompressed && file_size < alloc_size) {
4014  alloc_size = file_size;
4015  }
4016 
4017  for (size_t i = 0; i < max_threads; i++) {
4018  import_buffers_vec.emplace_back();
4019  for (const auto cd : loader->get_column_descs()) {
4020  import_buffers_vec[i].emplace_back(
4021  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
4022  }
4023  }
4024 
4025  auto scratch_buffer = std::make_unique<char[]>(alloc_size);
4026  size_t current_pos = 0;
4027  size_t end_pos;
4028  size_t begin_pos = 0;
4029 
4030  (void)fseek(p_file, current_pos, SEEK_SET);
4031  size_t size =
4032  fread(reinterpret_cast<void*>(scratch_buffer.get()), 1, alloc_size, p_file);
4033 
4034  // make render group analyzers for each poly column
4035  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
4037  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
4038  loader->getTableDesc()->tableId, false, false, false);
4039  for (auto cd : columnDescriptors) {
4040  SQLTypes ct = cd->columnType.get_type();
4041  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4042  auto rga = std::make_shared<RenderGroupAnalyzer>();
4043  rga->seedFromExistingTableContents(loader, cd->columnName);
4044  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4045  }
4046  }
4047  }
4048 
4049  ChunkKey chunkKey = {loader->getCatalog().getCurrentDB().dbId,
4050  loader->getTableDesc()->tableId};
4051  auto table_epochs = loader->getTableEpochs();
4052  {
4053  std::list<std::future<ImportStatus>> threads;
4054 
4055  // use a stack to track thread_ids which must not overlap among threads
4056  // because thread_id is used to index import_buffers_vec[]
4057  std::stack<size_t> stack_thread_ids;
4058  for (size_t i = 0; i < max_threads; i++) {
4059  stack_thread_ids.push(i);
4060  }
4061  // added for true row index on error
4062  size_t first_row_index_this_buffer = 0;
4063 
4064  while (size > 0) {
4065  unsigned int num_rows_this_buffer = 0;
4066  CHECK(scratch_buffer);
4067  end_pos = delimited_parser::find_row_end_pos(alloc_size,
4068  scratch_buffer,
4069  size,
4070  copy_params,
4071  first_row_index_this_buffer,
4072  num_rows_this_buffer,
4073  p_file);
4074 
4075  // unput residual
4076  int nresidual = size - end_pos;
4077  std::unique_ptr<char[]> unbuf;
4078  if (nresidual > 0) {
4079  unbuf = std::make_unique<char[]>(nresidual);
4080  memcpy(unbuf.get(), scratch_buffer.get() + end_pos, nresidual);
4081  }
4082 
4083  // get a thread_id not in use
4084  auto thread_id = stack_thread_ids.top();
4085  stack_thread_ids.pop();
4086  // LOG(INFO) << " stack_thread_ids.pop " << thread_id << std::endl;
4087 
4088  threads.push_back(std::async(std::launch::async,
4090  thread_id,
4091  this,
4092  std::move(scratch_buffer),
4093  begin_pos,
4094  end_pos,
4095  end_pos,
4096  columnIdToRenderGroupAnalyzerMap,
4097  first_row_index_this_buffer));
4098 
4099  first_row_index_this_buffer += num_rows_this_buffer;
4100 
4101  current_pos += end_pos;
4102  scratch_buffer = std::make_unique<char[]>(alloc_size);
4103  CHECK(scratch_buffer);
4104  memcpy(scratch_buffer.get(), unbuf.get(), nresidual);
4105  size = nresidual +
4106  fread(scratch_buffer.get() + nresidual, 1, alloc_size - nresidual, p_file);
4107 
4108  begin_pos = 0;
4109 
4110  while (threads.size() > 0) {
4111  int nready = 0;
4112  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4113  it != threads.end();) {
4114  auto& p = *it;
4115  std::chrono::milliseconds span(
4116  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4117  if (p.wait_for(span) == std::future_status::ready) {
4118  auto ret_import_status = p.get();
4119  import_status += ret_import_status;
4120  // sum up current total file offsets
4121  size_t total_file_offset{0};
4122  if (decompressed) {
4123  std::unique_lock<std::mutex> lock(file_offsets_mutex);
4124  for (const auto file_offset : file_offsets) {
4125  total_file_offset += file_offset;
4126  }
4127  }
4128  // estimate number of rows per current total file offset
4129  if (decompressed ? total_file_offset : current_pos) {
4131  (decompressed ? (float)total_file_size / total_file_offset
4132  : (float)file_size / current_pos) *
4133  import_status.rows_completed;
4134  }
4135  VLOG(3) << "rows_completed " << import_status.rows_completed
4136  << ", rows_estimated " << import_status.rows_estimated
4137  << ", total_file_size " << total_file_size << ", total_file_offset "
4138  << total_file_offset;
4140  // recall thread_id for reuse
4141  stack_thread_ids.push(ret_import_status.thread_id);
4142  threads.erase(it++);
4143  ++nready;
4144  } else {
4145  ++it;
4146  }
4147  }
4148 
4149  if (nready == 0) {
4150  std::this_thread::yield();
4151  }
4152 
4153  // on eof, wait all threads to finish
4154  if (0 == size) {
4155  continue;
4156  }
4157 
4158  // keep reading if any free thread slot
4159  // this is one of the major difference from old threading model !!
4160  if (threads.size() < max_threads) {
4161  break;
4162  }
4163  }
4164 
4166  load_truncated = true;
4167  load_failed = true;
4168  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4169  break;
4170  }
4171  if (load_failed) {
4172  load_truncated = true;
4173  LOG(ERROR) << "A call to the Loader::load failed, Please review the logs for "
4174  "more details";
4175  break;
4176  }
4177  }
4178 
4179  // join dangling threads in case of LOG(ERROR) above
4180  for (auto& p : threads) {
4181  p.wait();
4182  }
4183  }
4184 
4185  checkpoint(table_epochs);
4186 
4187  // must set import_status.load_truncated before closing this end of pipe
4188  // otherwise, the thread on the other end would throw an unwanted 'write()'
4189  // exception
4190  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
4191  import_status.load_truncated = load_truncated;
4192 
4193  fclose(p_file);
4194  p_file = nullptr;
4195  return import_status;
4196 }
std::vector< int > ChunkKey
Definition: types.h:37
SQLTypes
Definition: sqltypes.h:37
::FILE * fopen(const char *filename, const char *mode)
Definition: omnisci_fs.cpp:72
#define LOG(tag)
Definition: Logger.h:188
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:217
static ImportStatus import_thread_delimited(int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
Definition: Importer.cpp:1808
std::string import_id
Definition: Importer.h:819
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3297
ThreadId thread_id()
Definition: Logger.cpp:732
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::shared_ptr< RenderGroupAnalyzer >> ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:139
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:150
size_t g_max_import_threads
Definition: Importer.cpp:75
std::vector< size_t > file_offsets
Definition: Importer.h:653
size_t find_row_end_pos(size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const CopyParams &copy_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FILE *file, foreign_storage::CsvReader *csv_reader)
Finds the closest possible row ending to the end of the given buffer. The buffer is resized as needed...
#define VLOG(n)
Definition: Logger.h:291
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648

+ Here is the call graph for this function:

ImportStatus import_export::Importer::importGDAL ( std::map< std::string, std::string >  colname_to_src)

Definition at line 4768 of file Importer.cpp.

References CHECK, CHECK_EQ, checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, g_max_import_threads, import_export::CopyParams::geo_assign_render_groups, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_layer_name, SQLTypeInfo::get_type(), import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_shapefile(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, openGDALDataset(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, set_import_status(), logger::thread_id(), import_export::CopyParams::threads, and VLOG.

Referenced by QueryRunner::ImportDriver::importGeoTable().

4769  {
4770  // initial status
4771  bool load_truncated = false;
4773 
4775  if (poDS == nullptr) {
4776  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4777  file_path);
4778  }
4779 
4780  OGRLayer& layer =
4782 
4783  // get the number of features in this layer
4784  size_t numFeatures = layer.GetFeatureCount();
4785 
4786  // build map of metadata field (additional columns) name to index
4787  // use shared_ptr since we need to pass it to the worker
4788  FieldNameToIndexMapType fieldNameToIndexMap;
4789  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4790  CHECK(poFDefn);
4791  size_t numFields = poFDefn->GetFieldCount();
4792  for (size_t iField = 0; iField < numFields; iField++) {
4793  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4794  fieldNameToIndexMap.emplace(std::make_pair(poFieldDefn->GetNameRef(), iField));
4795  }
4796 
4797  // the geographic spatial reference we want to put everything in
4798  OGRSpatialReferenceUqPtr poGeographicSR(new OGRSpatialReference());
4799  poGeographicSR->importFromEPSG(copy_params.geo_coords_srid);
4800 
4801 #if GDAL_VERSION_MAJOR >= 3
4802  // GDAL 3.x (really Proj.4 6.x) now enforces lat, lon order
4803  // this results in X and Y being transposed for angle-based
4804  // coordinate systems. This restores the previous behavior.
4805  poGeographicSR->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
4806 #endif
4807 
4808 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4809  // just one "thread"
4810  max_threads = 1;
4811 #else
4812  // how many threads to use
4813  if (copy_params.threads == 0) {
4814  max_threads = std::min(static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF)),
4816  } else {
4818  }
4819 #endif
4820 
4821  VLOG(1) << "GDAL import # threads: " << max_threads;
4822 
4823  // make an import buffer for each thread
4824  CHECK_EQ(import_buffers_vec.size(), 0u);
4826  for (size_t i = 0; i < max_threads; i++) {
4827  for (const auto cd : loader->get_column_descs()) {
4828  import_buffers_vec[i].emplace_back(
4829  new TypedImportBuffer(cd, loader->getStringDict(cd)));
4830  }
4831  }
4832 
4833  // make render group analyzers for each poly column
4834  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
4836  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
4837  loader->getTableDesc()->tableId, false, false, false);
4838  for (auto cd : columnDescriptors) {
4839  SQLTypes ct = cd->columnType.get_type();
4840  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4841  auto rga = std::make_shared<RenderGroupAnalyzer>();
4842  rga->seedFromExistingTableContents(loader, cd->columnName);
4843  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4844  }
4845  }
4846  }
4847 
4848 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4849  // threads
4850  std::list<std::future<ImportStatus>> threads;
4851 
4852  // use a stack to track thread_ids which must not overlap among threads
4853  // because thread_id is used to index import_buffers_vec[]
4854  std::stack<size_t> stack_thread_ids;
4855  for (size_t i = 0; i < max_threads; i++) {
4856  stack_thread_ids.push(i);
4857  }
4858 #endif
4859 
4860  // checkpoint the table
4861  auto table_epochs = loader->getTableEpochs();
4862 
4863  // reset the layer
4864  layer.ResetReading();
4865 
4866  static const size_t MAX_FEATURES_PER_CHUNK = 1000;
4867 
4868  // make a features buffer for each thread
4869  std::vector<FeaturePtrVector> features(max_threads);
4870 
4871  // for each feature...
4872  size_t firstFeatureThisChunk = 0;
4873  while (firstFeatureThisChunk < numFeatures) {
4874  // how many features this chunk
4875  size_t numFeaturesThisChunk =
4876  std::min(MAX_FEATURES_PER_CHUNK, numFeatures - firstFeatureThisChunk);
4877 
4878 // get a thread_id not in use
4879 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4880  size_t thread_id = 0;
4881 #else
4882  auto thread_id = stack_thread_ids.top();
4883  stack_thread_ids.pop();
4884  CHECK(thread_id < max_threads);
4885 #endif
4886 
4887  // fill features buffer for new thread
4888  for (size_t i = 0; i < numFeaturesThisChunk; i++) {
4889  features[thread_id].emplace_back(layer.GetNextFeature());
4890  }
4891 
4892 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4893  // call worker function directly
4894  auto ret_import_status = import_thread_shapefile(0,
4895  this,
4896  poGeographicSR.get(),
4897  std::move(features[thread_id]),
4898  firstFeatureThisChunk,
4899  numFeaturesThisChunk,
4900  fieldNameToIndexMap,
4901  columnNameToSourceNameMap,
4902  columnIdToRenderGroupAnalyzerMap);
4903  import_status += ret_import_status;
4904  import_status.rows_estimated = ((float)firstFeatureThisChunk / (float)numFeatures) *
4905  import_status.rows_completed;
4906  set_import_status(import_id, import_status);
4907 #else
4908  // fire up that thread to import this geometry
4909  threads.push_back(std::async(std::launch::async,
4911  thread_id,
4912  this,
4913  poGeographicSR.get(),
4914  std::move(features[thread_id]),
4915  firstFeatureThisChunk,
4916  numFeaturesThisChunk,
4917  fieldNameToIndexMap,
4918  columnNameToSourceNameMap,
4919  columnIdToRenderGroupAnalyzerMap));
4920 
4921  // let the threads run
4922  while (threads.size() > 0) {
4923  int nready = 0;
4924  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4925  it != threads.end();) {
4926  auto& p = *it;
4927  std::chrono::milliseconds span(
4928  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4929  if (p.wait_for(span) == std::future_status::ready) {
4930  auto ret_import_status = p.get();
4931  import_status += ret_import_status;
4932  import_status.rows_estimated =
4933  ((float)firstFeatureThisChunk / (float)numFeatures) *
4934  import_status.rows_completed;
4935  set_import_status(import_id, import_status);
4936 
4937  // recall thread_id for reuse
4938  stack_thread_ids.push(ret_import_status.thread_id);
4939 
4940  threads.erase(it++);
4941  ++nready;
4942  } else {
4943  ++it;
4944  }
4945  }
4946 
4947  if (nready == 0) {
4948  std::this_thread::yield();
4949  }
4950 
4951  // keep reading if any free thread slot
4952  // this is one of the major difference from old threading model !!
4953  if (threads.size() < max_threads) {
4954  break;
4955  }
4956  }
4957 #endif
4958 
4959  // out of rows?
4960  if (import_status.rows_rejected > copy_params.max_reject) {
4961  load_truncated = true;
4962  load_failed = true;
4963  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4964  break;
4965  }
4966 
4967  // failed?
4968  if (load_failed) {
4969  load_truncated = true;
4970  LOG(ERROR)
4971  << "A call to the Loader::load failed, Please review the logs for more details";
4972  break;
4973  }
4974 
4975  firstFeatureThisChunk += numFeaturesThisChunk;
4976  }
4977 
4978 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4979  // wait for any remaining threads
4980  if (threads.size()) {
4981  for (auto& p : threads) {
4982  // wait for the thread
4983  p.wait();
4984  // get the result and update the final import status
4985  auto ret_import_status = p.get();
4986  import_status += ret_import_status;
4987  import_status.rows_estimated = import_status.rows_completed;
4988  set_import_status(import_id, import_status);
4989  }
4990  }
4991 #endif
4992 
4993  checkpoint(table_epochs);
4994 
4995  // must set import_status.load_truncated before closing this end of pipe
4996  // otherwise, the thread on the other end would throw an unwanted 'write()'
4997  // exception
4998  import_status.load_truncated = load_truncated;
4999  return import_status;
5000 }
std::map< std::string, size_t > FieldNameToIndexMapType
Definition: Importer.cpp:136
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SQLTypes
Definition: sqltypes.h:37
std::unique_ptr< OGRSpatialReference, OGRSpatialReferenceDeleter > OGRSpatialReferenceUqPtr
Definition: Importer.cpp:116
#define LOG(tag)
Definition: Logger.h:188
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4319
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:97
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:823
std::string geo_layer_name
Definition: CopyParams.h:78
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:217
std::string import_id
Definition: Importer.h:819
void checkpoint(const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs)
Definition: Importer.cpp:3297
ThreadId thread_id()
Definition: Logger.cpp:732
#define CHECK(condition)
Definition: Logger.h:197
std::map< int, std::shared_ptr< RenderGroupAnalyzer >> ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:139
static ImportStatus import_thread_shapefile(int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
Definition: Importer.cpp:2149
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4286
size_t g_max_import_threads
Definition: Importer.cpp:75
#define VLOG(n)
Definition: Logger.h:291
std::unique_ptr< Loader > loader
Definition: Importer.h:824
const std::string file_path
Definition: Importer.h:648

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::load ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t  row_count 
)

Definition at line 3290 of file Importer.cpp.

References import_export::DataStreamSink::load_failed, and loader.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

3291  {
3292  if (!loader->loadNoCheckpoint(import_buffers, row_count)) {
3293  load_failed = true;
3294  }
3295 }
std::unique_ptr< Loader > loader
Definition: Importer.h:824

+ Here is the caller graph for this function:

OGRDataSource * import_export::Importer::openGDALDataset ( const std::string &  fileName,
const CopyParams copy_params 
)
staticprivate

Definition at line 4286 of file Importer.cpp.

References logger::ERROR, logger::INFO, Geospatial::GDAL::init(), LOG, and setGDALAuthorizationTokens().

Referenced by gdalGetLayersInGeoFile(), gdalToColumnDescriptors(), importGDAL(), and readMetadataSampleGDAL().

4287  {
4288  // lazy init GDAL
4290 
4291  // set authorization tokens
4293 
4294  // open the file
4295  OGRDataSource* poDS;
4296 #if GDAL_VERSION_MAJOR == 1
4297  poDS = (OGRDataSource*)OGRSFDriverRegistrar::Open(file_name.c_str(), false);
4298 #else
4299  poDS = (OGRDataSource*)GDALOpenEx(
4300  file_name.c_str(), GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4301  if (poDS == nullptr) {
4302  poDS = (OGRDataSource*)GDALOpenEx(
4303  file_name.c_str(), GDAL_OF_READONLY | GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4304  if (poDS) {
4305  LOG(INFO) << "openGDALDataset had to open as read-only";
4306  }
4307  }
4308 #endif
4309  if (poDS == nullptr) {
4310  LOG(ERROR) << "openGDALDataset Error: " << CPLGetLastErrorMsg();
4311  }
4312  // NOTE(adb): If extending this function, refactor to ensure any errors will not result
4313  // in a memory leak if GDAL successfully opened the input dataset.
4314  return poDS;
4315 }
#define LOG(tag)
Definition: Logger.h:188
static void init()
Definition: GDAL.cpp:59
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4216

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::readMetadataSampleGDAL ( const std::string &  fileName,
const std::string &  geoColumnName,
std::map< std::string, std::vector< std::string >> &  metadata,
int  rowLimit,
const CopyParams copy_params 
)
static

Definition at line 4342 of file Importer.cpp.

References CHECK, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), and openGDALDataset().

Referenced by DBHandler::detect_column_types().

4347  {
4349  if (poDS == nullptr) {
4350  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4351  file_name);
4352  }
4353 
4354  OGRLayer& layer =
4356 
4357  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4358  CHECK(poFDefn);
4359 
4360  // typeof GetFeatureCount() is different between GDAL 1.x (int32_t) and 2.x (int64_t)
4361  auto nFeats = layer.GetFeatureCount();
4362  size_t numFeatures =
4363  std::max(static_cast<decltype(nFeats)>(0),
4364  std::min(static_cast<decltype(nFeats)>(rowLimit), nFeats));
4365  for (auto iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4366  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4367  // FIXME(andrewseidl): change this to the faster one used by readVerticesFromGDAL
4368  metadata.emplace(poFieldDefn->GetNameRef(), std::vector<std::string>(numFeatures));
4369  }
4370  metadata.emplace(geo_column_name, std::vector<std::string>(numFeatures));
4371  layer.ResetReading();
4372  size_t iFeature = 0;
4373  while (iFeature < numFeatures) {
4374  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4375  if (!poFeature) {
4376  break;
4377  }
4378 
4379  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4380  if (poGeometry != nullptr) {
4381  // validate geom type (again?)
4382  switch (wkbFlatten(poGeometry->getGeometryType())) {
4383  case wkbPoint:
4384  case wkbLineString:
4385  case wkbPolygon:
4386  case wkbMultiPolygon:
4387  break;
4388  case wkbMultiPoint:
4389  case wkbMultiLineString:
4390  // supported if geo_explode_collections is specified
4392  throw std::runtime_error("Unsupported geometry type: " +
4393  std::string(poGeometry->getGeometryName()));
4394  }
4395  break;
4396  default:
4397  throw std::runtime_error("Unsupported geometry type: " +
4398  std::string(poGeometry->getGeometryName()));
4399  }
4400 
4401  // populate metadata for regular fields
4402  for (auto i : metadata) {
4403  auto iField = poFeature->GetFieldIndex(i.first.c_str());
4404  if (iField >= 0) { // geom is -1
4405  metadata[i.first].at(iFeature) =
4406  std::string(poFeature->GetFieldAsString(iField));
4407  }
4408  }
4409 
4410  // populate metadata for geo column with WKT string
4411  char* wkts = nullptr;
4412  poGeometry->exportToWkt(&wkts);
4413  CHECK(wkts);
4414  metadata[geo_column_name].at(iFeature) = wkts;
4415  CPLFree(wkts);
4416  }
4417  iFeature++;
4418  }
4419 }
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4319
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:97
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:106
std::string geo_layer_name
Definition: CopyParams.h:78
#define CHECK(condition)
Definition: Logger.h:197
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4286

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_geo_physical_import_buffer ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< double > &  coords,
std::vector< double > &  bounds,
std::vector< int > &  ring_sizes,
std::vector< int > &  poly_rings,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1433 of file Importer.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), Parser::AddColumnStmt::execute(), import_export::import_thread_delimited(), DBHandler::load_table(), and foreign_storage::csv_file_buffer_parser::process_geo_column().

1443  {
1444  const auto col_ti = cd->columnType;
1445  const auto col_type = col_ti.get_type();
1446  auto columnId = cd->columnId;
1447  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1448  bool is_null_geo = false;
1449  bool is_null_point = false;
1450  if (!col_ti.get_notnull()) {
1451  // Check for NULL geo
1452  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1453  is_null_point = true;
1454  coords.clear();
1455  }
1456  is_null_geo = coords.empty();
1457  if (is_null_point) {
1458  coords.push_back(NULL_ARRAY_DOUBLE);
1459  coords.push_back(NULL_DOUBLE);
1460  // Treating POINT coords as notnull, need to store actual encoding
1461  // [un]compressed+[not]null
1462  is_null_geo = false;
1463  }
1464  }
1465  TDatum tdd_coords;
1466  // Get the raw data representing [optionally compressed] non-NULL geo's coords.
1467  // One exception - NULL POINT geo: coords need to be processed to encode nullness
1468  // in a fixlen array, compressed and uncompressed.
1469  if (!is_null_geo) {
1470  std::vector<uint8_t> compressed_coords = Geospatial::compress_coords(coords, col_ti);
1471  tdd_coords.val.arr_val.reserve(compressed_coords.size());
1472  for (auto cc : compressed_coords) {
1473  tdd_coords.val.arr_val.emplace_back();
1474  tdd_coords.val.arr_val.back().val.int_val = cc;
1475  }
1476  }
1477  tdd_coords.is_null = is_null_geo;
1478  import_buffers[col_idx++]->add_value(cd_coords, tdd_coords, false, replicate_count);
1479 
1480  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1481  // Create ring_sizes array value and add it to the physical column
1482  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1483  TDatum tdd_ring_sizes;
1484  tdd_ring_sizes.val.arr_val.reserve(ring_sizes.size());
1485  if (!is_null_geo) {
1486  for (auto ring_size : ring_sizes) {
1487  tdd_ring_sizes.val.arr_val.emplace_back();
1488  tdd_ring_sizes.val.arr_val.back().val.int_val = ring_size;
1489  }
1490  }
1491  tdd_ring_sizes.is_null = is_null_geo;
1492  import_buffers[col_idx++]->add_value(
1493  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1494  }
1495 
1496  if (col_type == kMULTIPOLYGON) {
1497  // Create poly_rings array value and add it to the physical column
1498  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1499  TDatum tdd_poly_rings;
1500  tdd_poly_rings.val.arr_val.reserve(poly_rings.size());
1501  if (!is_null_geo) {
1502  for (auto num_rings : poly_rings) {
1503  tdd_poly_rings.val.arr_val.emplace_back();
1504  tdd_poly_rings.val.arr_val.back().val.int_val = num_rings;
1505  }
1506  }
1507  tdd_poly_rings.is_null = is_null_geo;
1508  import_buffers[col_idx++]->add_value(
1509  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1510  }
1511 
1512  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1513  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1514  TDatum tdd_bounds;
1515  tdd_bounds.val.arr_val.reserve(bounds.size());
1516  if (!is_null_geo) {
1517  for (auto b : bounds) {
1518  tdd_bounds.val.arr_val.emplace_back();
1519  tdd_bounds.val.arr_val.back().val.real_val = b;
1520  }
1521  }
1522  tdd_bounds.is_null = is_null_geo;
1523  import_buffers[col_idx++]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1524  }
1525 
1526  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1527  // Create render_group value and add it to the physical column
1528  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1529  TDatum td_render_group;
1530  td_render_group.val.int_val = render_group;
1531  td_render_group.is_null = is_null_geo;
1532  import_buffers[col_idx++]->add_value(
1533  cd_render_group, td_render_group, false, replicate_count);
1534  }
1535 }
#define NULL_DOUBLE
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
#define NULL_ARRAY_DOUBLE
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_geo_physical_import_buffer_columnar ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< std::vector< double >> &  coords_column,
std::vector< std::vector< double >> &  bounds_column,
std::vector< std::vector< int >> &  ring_sizes_column,
std::vector< std::vector< int >> &  poly_rings_column,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1537 of file Importer.cpp.

References CHECK, ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by DBHandler::load_table_binary_columnar().

1547  {
1548  const auto col_ti = cd->columnType;
1549  const auto col_type = col_ti.get_type();
1550  auto columnId = cd->columnId;
1551 
1552  auto coords_row_count = coords_column.size();
1553  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1554  for (auto coords : coords_column) {
1555  bool is_null_geo = false;
1556  bool is_null_point = false;
1557  if (!col_ti.get_notnull()) {
1558  // Check for NULL geo
1559  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1560  is_null_point = true;
1561  coords.clear();
1562  }
1563  is_null_geo = coords.empty();
1564  if (is_null_point) {
1565  coords.push_back(NULL_ARRAY_DOUBLE);
1566  coords.push_back(NULL_DOUBLE);
1567  // Treating POINT coords as notnull, need to store actual encoding
1568  // [un]compressed+[not]null
1569  is_null_geo = false;
1570  }
1571  }
1572  std::vector<TDatum> td_coords_data;
1573  if (!is_null_geo) {
1574  std::vector<uint8_t> compressed_coords =
1575  Geospatial::compress_coords(coords, col_ti);
1576  for (auto cc : compressed_coords) {
1577  TDatum td_byte;
1578  td_byte.val.int_val = cc;
1579  td_coords_data.push_back(td_byte);
1580  }
1581  }
1582  TDatum tdd_coords;
1583  tdd_coords.val.arr_val = td_coords_data;
1584  tdd_coords.is_null = is_null_geo;
1585  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false, replicate_count);
1586  }
1587  col_idx++;
1588 
1589  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1590  if (ring_sizes_column.size() != coords_row_count) {
1591  CHECK(false) << "Geometry import columnar: ring sizes column size mismatch";
1592  }
1593  // Create ring_sizes array value and add it to the physical column
1594  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1595  for (auto ring_sizes : ring_sizes_column) {
1596  bool is_null_geo = false;
1597  if (!col_ti.get_notnull()) {
1598  // Check for NULL geo
1599  is_null_geo = ring_sizes.empty();
1600  }
1601  std::vector<TDatum> td_ring_sizes;
1602  for (auto ring_size : ring_sizes) {
1603  TDatum td_ring_size;
1604  td_ring_size.val.int_val = ring_size;
1605  td_ring_sizes.push_back(td_ring_size);
1606  }
1607  TDatum tdd_ring_sizes;
1608  tdd_ring_sizes.val.arr_val = td_ring_sizes;
1609  tdd_ring_sizes.is_null = is_null_geo;
1610  import_buffers[col_idx]->add_value(
1611  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1612  }
1613  col_idx++;
1614  }
1615 
1616  if (col_type == kMULTIPOLYGON) {
1617  if (poly_rings_column.size() != coords_row_count) {
1618  CHECK(false) << "Geometry import columnar: poly rings column size mismatch";
1619  }
1620  // Create poly_rings array value and add it to the physical column
1621  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1622  for (auto poly_rings : poly_rings_column) {
1623  bool is_null_geo = false;
1624  if (!col_ti.get_notnull()) {
1625  // Check for NULL geo
1626  is_null_geo = poly_rings.empty();
1627  }
1628  std::vector<TDatum> td_poly_rings;
1629  for (auto num_rings : poly_rings) {
1630  TDatum td_num_rings;
1631  td_num_rings.val.int_val = num_rings;
1632  td_poly_rings.push_back(td_num_rings);
1633  }
1634  TDatum tdd_poly_rings;
1635  tdd_poly_rings.val.arr_val = td_poly_rings;
1636  tdd_poly_rings.is_null = is_null_geo;
1637  import_buffers[col_idx]->add_value(
1638  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1639  }
1640  col_idx++;
1641  }
1642 
1643  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1644  if (bounds_column.size() != coords_row_count) {
1645  CHECK(false) << "Geometry import columnar: bounds column size mismatch";
1646  }
1647  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1648  for (auto bounds : bounds_column) {
1649  bool is_null_geo = false;
1650  if (!col_ti.get_notnull()) {
1651  // Check for NULL geo
1652  is_null_geo = (bounds.empty() || bounds[0] == NULL_ARRAY_DOUBLE);
1653  }
1654  std::vector<TDatum> td_bounds_data;
1655  for (auto b : bounds) {
1656  TDatum td_double;
1657  td_double.val.real_val = b;
1658  td_bounds_data.push_back(td_double);
1659  }
1660  TDatum tdd_bounds;
1661  tdd_bounds.val.arr_val = td_bounds_data;
1662  tdd_bounds.is_null = is_null_geo;
1663  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1664  }
1665  col_idx++;
1666  }
1667 
1668  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1669  // Create render_group value and add it to the physical column
1670  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1671  TDatum td_render_group;
1672  td_render_group.val.int_val = render_group;
1673  td_render_group.is_null = false;
1674  for (decltype(coords_row_count) i = 0; i < coords_row_count; i++) {
1675  import_buffers[col_idx]->add_value(
1676  cd_render_group, td_render_group, false, replicate_count);
1677  }
1678  col_idx++;
1679  }
1680 }
#define NULL_DOUBLE
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
#define NULL_ARRAY_DOUBLE
#define CHECK(condition)
Definition: Logger.h:197
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_import_status ( const std::string &  id,
const ImportStatus  is 
)
static

Definition at line 217 of file Importer.cpp.

References import_export::ImportStatus::elapsed, import_export::ImportStatus::end, import_id, import_export::import_status_map, import_export::ImportStatus::start, and import_export::status_mutex.

Referenced by importDelimited(), and importGDAL().

217  {
218  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
219  is.end = std::chrono::steady_clock::now();
220  is.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(is.end - is.start);
222 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:151
std::string import_id
Definition: Importer.h:819
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:150

+ Here is the caller graph for this function:

void import_export::Importer::setGDALAuthorizationTokens ( const CopyParams copy_params)
staticprivate

Definition at line 4216 of file Importer.cpp.

References logger::INFO, LOG, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, and import_export::CopyParams::s3_secret_key.

Referenced by gdalGetAllFilesInArchive(), gdalGetLayersInGeoFile(), gdalStatInternal(), and openGDALDataset().

4216  {
4217  // for now we only support S3
4218  // @TODO generalize CopyParams to have a dictionary of GDAL tokens
4219  // only set if non-empty to allow GDAL defaults to persist
4220  // explicitly clear if empty to revert to default and not reuse a previous session's
4221  // keys
4222  if (copy_params.s3_region.size()) {
4223 #if DEBUG_AWS_AUTHENTICATION
4224  LOG(INFO) << "GDAL: Setting AWS_REGION to '" << copy_params.s3_region << "'";
4225 #endif
4226  CPLSetConfigOption("AWS_REGION", copy_params.s3_region.c_str());
4227  } else {
4228 #if DEBUG_AWS_AUTHENTICATION
4229  LOG(INFO) << "GDAL: Clearing AWS_REGION";
4230 #endif
4231  CPLSetConfigOption("AWS_REGION", nullptr);
4232  }
4233  if (copy_params.s3_endpoint.size()) {
4234 #if DEBUG_AWS_AUTHENTICATION
4235  LOG(INFO) << "GDAL: Setting AWS_S3_ENDPOINT to '" << copy_params.s3_endpoint << "'";
4236 #endif
4237  CPLSetConfigOption("AWS_S3_ENDPOINT", copy_params.s3_endpoint.c_str());
4238  } else {
4239 #if DEBUG_AWS_AUTHENTICATION
4240  LOG(INFO) << "GDAL: Clearing AWS_S3_ENDPOINT";
4241 #endif
4242  CPLSetConfigOption("AWS_S3_ENDPOINT", nullptr);
4243  }
4244  if (copy_params.s3_access_key.size()) {
4245 #if DEBUG_AWS_AUTHENTICATION
4246  LOG(INFO) << "GDAL: Setting AWS_ACCESS_KEY_ID to '" << copy_params.s3_access_key
4247  << "'";
4248 #endif
4249  CPLSetConfigOption("AWS_ACCESS_KEY_ID", copy_params.s3_access_key.c_str());
4250  } else {
4251 #if DEBUG_AWS_AUTHENTICATION
4252  LOG(INFO) << "GDAL: Clearing AWS_ACCESS_KEY_ID";
4253 #endif
4254  CPLSetConfigOption("AWS_ACCESS_KEY_ID", nullptr);
4255  }
4256  if (copy_params.s3_secret_key.size()) {
4257 #if DEBUG_AWS_AUTHENTICATION
4258  LOG(INFO) << "GDAL: Setting AWS_SECRET_ACCESS_KEY to '" << copy_params.s3_secret_key
4259  << "'";
4260 #endif
4261  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", copy_params.s3_secret_key.c_str());
4262  } else {
4263 #if DEBUG_AWS_AUTHENTICATION
4264  LOG(INFO) << "GDAL: Clearing AWS_SECRET_ACCESS_KEY";
4265 #endif
4266  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", nullptr);
4267  }
4268 
4269 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4270  // if we haven't set keys, we need to disable signed access
4271  if (copy_params.s3_access_key.size() || copy_params.s3_secret_key.size()) {
4272 #if DEBUG_AWS_AUTHENTICATION
4273  LOG(INFO) << "GDAL: Clearing AWS_NO_SIGN_REQUEST";
4274 #endif
4275  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", nullptr);
4276  } else {
4277 #if DEBUG_AWS_AUTHENTICATION
4278  LOG(INFO) << "GDAL: Setting AWS_NO_SIGN_REQUEST to 'YES'";
4279 #endif
4280  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", "YES");
4281  }
4282 #endif
4283 }
std::string s3_secret_key
Definition: CopyParams.h:63
#define LOG(tag)
Definition: Logger.h:188
std::string s3_access_key
Definition: CopyParams.h:62

+ Here is the caller graph for this function:

Member Data Documentation

char* import_export::Importer::buffer[2]
private

Definition at line 822 of file Importer.h.

Referenced by Importer(), and ~Importer().

size_t import_export::Importer::file_size
private

Definition at line 820 of file Importer.h.

Referenced by importDelimited(), and Importer().

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > > import_export::Importer::import_buffers_vec
private
std::unique_ptr<bool[]> import_export::Importer::is_array_a
private

Definition at line 825 of file Importer.h.

Referenced by get_is_array(), and Importer().

std::unique_ptr<Loader> import_export::Importer::loader
private
size_t import_export::Importer::max_threads
private

Definition at line 821 of file Importer.h.

Referenced by importDelimited(), Importer(), and importGDAL().


The documentation for this class was generated from the following files: