OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export::Importer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::Importer:
+ Collaboration diagram for import_export::Importer:

Classes

struct  GeoFileLayerInfo
 

Public Types

enum  GeoFileLayerContents { GeoFileLayerContents::EMPTY, GeoFileLayerContents::GEO, GeoFileLayerContents::NON_GEO, GeoFileLayerContents::UNSUPPORTED_GEO }
 

Public Member Functions

 Importer (Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
 
 Importer (Loader *providedLoader, const std::string &f, const CopyParams &p)
 
 ~Importer () override
 
ImportStatus import ()
 
ImportStatus importDelimited (const std::string &file_path, const bool decompressed) override
 
ImportStatus importGDAL (std::map< std::string, std::string > colname_to_src)
 
const CopyParamsget_copy_params () const
 
const std::list< const
ColumnDescriptor * > & 
get_column_descs () const
 
void load (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count)
 
std::vector< std::vector
< std::unique_ptr
< TypedImportBuffer > > > & 
get_import_buffers_vec ()
 
std::vector< std::unique_ptr
< TypedImportBuffer > > & 
get_import_buffers (int i)
 
const bool * get_is_array () const
 
Catalog_Namespace::CataloggetCatalog ()
 
void checkpoint (const int32_t start_epoch)
 
auto getLoader () const
 
- Public Member Functions inherited from import_export::DataStreamSink
 DataStreamSink ()
 
 DataStreamSink (const CopyParams &copy_params, const std::string file_path)
 
virtual ~DataStreamSink ()
 
const CopyParamsget_copy_params () const
 
void import_compressed (std::vector< std::string > &file_paths)
 

Static Public Member Functions

static ImportStatus get_import_status (const std::string &id)
 
static void set_import_status (const std::string &id, const ImportStatus is)
 
static const std::list
< ColumnDescriptor
gdalToColumnDescriptors (const std::string &fileName, const std::string &geoColumnName, const CopyParams &copy_params)
 
static void readMetadataSampleGDAL (const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
 
static bool gdalFileExists (const std::string &path, const CopyParams &copy_params)
 
static bool gdalFileOrDirectoryExists (const std::string &path, const CopyParams &copy_params)
 
static std::vector< std::string > gdalGetAllFilesInArchive (const std::string &archive_path, const CopyParams &copy_params)
 
static std::vector
< GeoFileLayerInfo
gdalGetLayersInGeoFile (const std::string &file_name, const CopyParams &copy_params)
 
static void set_geo_physical_import_buffer (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
 
static void set_geo_physical_import_buffer_columnar (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, int render_group, const int64_t replicate_count=0)
 

Static Private Member Functions

static bool gdalStatInternal (const std::string &path, const CopyParams &copy_params, bool also_dir)
 
static OGRDataSource * openGDALDataset (const std::string &fileName, const CopyParams &copy_params)
 
static void setGDALAuthorizationTokens (const CopyParams &copy_params)
 

Private Attributes

std::string import_id
 
size_t file_size
 
size_t max_threads
 
char * buffer [2]
 
std::vector< std::vector
< std::unique_ptr
< TypedImportBuffer > > > 
import_buffers_vec
 
std::unique_ptr< Loaderloader
 
std::unique_ptr< bool[]> is_array_a
 

Additional Inherited Members

- Protected Member Functions inherited from import_export::DataStreamSink
ImportStatus archivePlumber ()
 
- Protected Attributes inherited from import_export::DataStreamSink
CopyParams copy_params
 
const std::string file_path
 
FILE * p_file = nullptr
 
ImportStatus import_status
 
bool load_failed = false
 
size_t total_file_size {0}
 
std::vector< size_t > file_offsets
 
std::mutex file_offsets_mutex
 

Detailed Description

Definition at line 729 of file Importer.h.

Member Enumeration Documentation

Enumerator
EMPTY 
GEO 
NON_GEO 
UNSUPPORTED_GEO 

Definition at line 775 of file Importer.h.

775 { EMPTY, GEO, NON_GEO, UNSUPPORTED_GEO };

Constructor & Destructor Documentation

import_export::Importer::Importer ( Catalog_Namespace::Catalog c,
const TableDescriptor t,
const std::string &  f,
const CopyParams p 
)

Definition at line 150 of file Importer.cpp.

154  : Importer(new Loader(c, t), f, p) {}
Importer(Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p)
Definition: Importer.cpp:150
import_export::Importer::Importer ( Loader providedLoader,
const std::string &  f,
const CopyParams p 
)

Definition at line 156 of file Importer.cpp.

References buffer, import_export::DataStreamSink::file_path, file_size, import_id, is_array_a, kARRAY, loader, max_threads, and import_export::DataStreamSink::p_file.

157  : DataStreamSink(p, f), loader(providedLoader) {
158  import_id = boost::filesystem::path(file_path).filename().string();
159  file_size = 0;
160  max_threads = 0;
161  p_file = nullptr;
162  buffer[0] = nullptr;
163  buffer[1] = nullptr;
164  // we may be overallocating a little more memory here due to dropping phy cols.
165  // it shouldn't be an issue because iteration of it is not supposed to go OOB.
166  auto is_array = std::unique_ptr<bool[]>(new bool[loader->get_column_descs().size()]);
167  int i = 0;
168  bool has_array = false;
169  // TODO: replace this ugly way of skipping phy cols once if isPhyGeo is defined
170  int skip_physical_cols = 0;
171  for (auto& p : loader->get_column_descs()) {
172  // phy geo columns can't be in input file
173  if (skip_physical_cols-- > 0) {
174  continue;
175  }
176  // neither are rowid or $deleted$
177  // note: columns can be added after rowid/$deleted$
178  if (p->isVirtualCol || p->isDeletedCol) {
179  continue;
180  }
181  skip_physical_cols = p->columnType.get_physical_cols();
182  if (p->columnType.get_type() == kARRAY) {
183  is_array.get()[i] = true;
184  has_array = true;
185  } else {
186  is_array.get()[i] = false;
187  }
188  ++i;
189  }
190  if (has_array) {
191  is_array_a = std::unique_ptr<bool[]>(is_array.release());
192  } else {
193  is_array_a = std::unique_ptr<bool[]>(nullptr);
194  }
195 }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:824
std::string import_id
Definition: Importer.h:818
std::unique_ptr< Loader > loader
Definition: Importer.h:823
const std::string file_path
Definition: Importer.h:647
import_export::Importer::~Importer ( )
override

Definition at line 197 of file Importer.cpp.

References buffer, and import_export::DataStreamSink::p_file.

197  {
198  if (p_file != nullptr) {
199  fclose(p_file);
200  }
201  if (buffer[0] != nullptr) {
202  free(buffer[0]);
203  }
204  if (buffer[1] != nullptr) {
205  free(buffer[1]);
206  }
207 }

Member Function Documentation

void import_export::Importer::checkpoint ( const int32_t  start_epoch)

Definition at line 3266 of file Importer.cpp.

References DEBUG_TIMING, Data_Namespace::DISK_LEVEL, logger::ERROR, measure< TimeT >::execution(), StorageType::FOREIGN_TABLE, import_buffers_vec, logger::INFO, import_export::DataStreamSink::load_failed, loader, and LOG.

Referenced by importDelimited(), and importGDAL().

3266  {
3267  if (loader->getTableDesc()->storageType != StorageType::FOREIGN_TABLE) {
3268  if (load_failed) {
3269  // rollback to starting epoch - undo all the added records
3270  loader->setTableEpoch(start_epoch);
3271  } else {
3272  loader->checkpoint();
3273  }
3274  }
3275 
3276  if (loader->getTableDesc()->persistenceLevel ==
3277  Data_Namespace::MemoryLevel::DISK_LEVEL) { // only checkpoint disk-resident tables
3278  auto ms = measure<>::execution([&]() {
3279  if (!load_failed) {
3280  for (auto& p : import_buffers_vec[0]) {
3281  if (!p->stringDictCheckpoint()) {
3282  LOG(ERROR) << "Checkpointing Dictionary for Column "
3283  << p->getColumnDesc()->columnName << " failed.";
3284  load_failed = true;
3285  break;
3286  }
3287  }
3288  }
3289  });
3290  if (DEBUG_TIMING) {
3291  LOG(INFO) << "Dictionary Checkpointing took " << (double)ms / 1000.0 << " Seconds."
3292  << std::endl;
3293  }
3294  }
3295 }
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
#define LOG(tag)
Definition: Logger.h:188
#define DEBUG_TIMING
Definition: Importer.cpp:139
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:822
static constexpr char const * FOREIGN_TABLE
std::unique_ptr< Loader > loader
Definition: Importer.h:823

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalFileExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4554 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::check_geospatial_files(), DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4554  {
4555  return gdalStatInternal(path, copy_params, false);
4556 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4521

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalFileOrDirectoryExists ( const std::string &  path,
const CopyParams copy_params 
)
static

Definition at line 4559 of file Importer.cpp.

References gdalStatInternal().

Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4560  {
4561  return gdalStatInternal(path, copy_params, true);
4562 }
static bool gdalStatInternal(const std::string &path, const CopyParams &copy_params, bool also_dir)
Definition: Importer.cpp:4521

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::string > import_export::Importer::gdalGetAllFilesInArchive ( const std::string &  archive_path,
const CopyParams copy_params 
)
static

Definition at line 4631 of file Importer.cpp.

References import_export::gdalGatherFilesInArchiveRecursive(), import_export::GDAL::init(), and setGDALAuthorizationTokens().

Referenced by find_first_geo_file_in_archive(), and DBHandler::get_all_files_in_archive().

4633  {
4634  // lazy init GDAL
4635  GDAL::init();
4636 
4637  // set authorization tokens
4639 
4640  // prepare to gather files
4641  std::vector<std::string> files;
4642 
4643  // gather the files recursively
4644  gdalGatherFilesInArchiveRecursive(archive_path, files);
4645 
4646  // convert to relative paths inside archive
4647  for (auto& file : files) {
4648  file.erase(0, archive_path.size() + 1); // remove archive_path and the slash
4649  }
4650 
4651  // done
4652  return files;
4653 }
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4171
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4564

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< Importer::GeoFileLayerInfo > import_export::Importer::gdalGetLayersInGeoFile ( const std::string &  file_name,
const CopyParams copy_params 
)
static

Definition at line 4656 of file Importer.cpp.

References CHECK(), EMPTY, GEO, import_export::CopyParams::geo_explode_collections, import_export::GDAL::init(), NON_GEO, openGDALDataset(), setGDALAuthorizationTokens(), and UNSUPPORTED_GEO.

Referenced by DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().

4658  {
4659  // lazy init GDAL
4660  GDAL::init();
4661 
4662  // set authorization tokens
4664 
4665  // prepare to gather layer info
4666  std::vector<GeoFileLayerInfo> layer_info;
4667 
4668  // open the data set
4670  if (poDS == nullptr) {
4671  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4672  file_name);
4673  }
4674 
4675  // enumerate the layers
4676  for (auto&& poLayer : poDS->GetLayers()) {
4678  // prepare to read this layer
4679  poLayer->ResetReading();
4680  // skip layer if empty
4681  if (poLayer->GetFeatureCount() > 0) {
4682  // get first feature
4683  OGRFeatureUqPtr first_feature(poLayer->GetNextFeature());
4684  CHECK(first_feature);
4685  // check feature for geometry
4686  const OGRGeometry* geometry = first_feature->GetGeometryRef();
4687  if (!geometry) {
4688  // layer has no geometry
4689  contents = GeoFileLayerContents::NON_GEO;
4690  } else {
4691  // check the geometry type
4692  const OGRwkbGeometryType geometry_type = geometry->getGeometryType();
4693  switch (wkbFlatten(geometry_type)) {
4694  case wkbPoint:
4695  case wkbLineString:
4696  case wkbPolygon:
4697  case wkbMultiPolygon:
4698  // layer has supported geo
4699  contents = GeoFileLayerContents::GEO;
4700  break;
4701  case wkbMultiPoint:
4702  case wkbMultiLineString:
4703  // supported if geo_explode_collections is specified
4707  break;
4708  default:
4709  // layer has unsupported geometry
4711  break;
4712  }
4713  }
4714  }
4715  // store info for this layer
4716  layer_info.emplace_back(poLayer->GetName(), contents);
4717  }
4718 
4719  // done
4720  return layer_info;
4721 }
static void init()
Definition: GDAL.cpp:56
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:94
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:103
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4171
CHECK(cgen_state)
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4241

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Importer::gdalStatInternal ( const std::string &  path,
const CopyParams copy_params,
bool  also_dir 
)
staticprivate

Definition at line 4521 of file Importer.cpp.

References import_export::GDAL::init(), run_benchmark_import::result, and setGDALAuthorizationTokens().

Referenced by gdalFileExists(), and gdalFileOrDirectoryExists().

4523  {
4524  // lazy init GDAL
4525  GDAL::init();
4526 
4527  // set authorization tokens
4529 
4530 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4531  // clear GDAL stat cache
4532  // without this, file existence will be cached, even if authentication changes
4533  // supposed to be available from GDAL 2.2.1 but our CentOS build disagrees
4534  VSICurlClearCache();
4535 #endif
4536 
4537  // stat path
4538  VSIStatBufL sb;
4539  int result = VSIStatExL(path.c_str(), &sb, VSI_STAT_EXISTS_FLAG);
4540  if (result < 0) {
4541  return false;
4542  }
4543 
4544  // exists?
4545  if (also_dir && (VSI_ISREG(sb.st_mode) || VSI_ISDIR(sb.st_mode))) {
4546  return true;
4547  } else if (VSI_ISREG(sb.st_mode)) {
4548  return true;
4549  }
4550  return false;
4551 }
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4171

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::list< ColumnDescriptor > import_export::Importer::gdalToColumnDescriptors ( const std::string &  fileName,
const std::string &  geoColumnName,
const CopyParams copy_params 
)
static

Definition at line 4431 of file Importer.cpp.

References CHECK(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::CopyParams::geo_coords_comp_param, import_export::CopyParams::geo_coords_encoding, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_coords_type, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), kARRAY, kENCODING_DICT, kMULTIPOLYGON, kPOLYGON, kTEXT, import_export::ogr_to_type(), openGDALDataset(), import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), and ColumnDescriptor::sourceName.

Referenced by DBHandler::detect_column_types().

4434  {
4435  std::list<ColumnDescriptor> cds;
4436 
4438  if (poDS == nullptr) {
4439  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4440  file_name);
4441  }
4442 
4443  OGRLayer& layer =
4445 
4446  layer.ResetReading();
4447  // TODO(andrewseidl): support multiple features
4448  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4449  if (poFeature == nullptr) {
4450  throw std::runtime_error("No features found in " + file_name);
4451  }
4452  // get fields as regular columns
4453  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4454  CHECK(poFDefn);
4455  int iField;
4456  for (iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4457  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4458  auto typePair = ogr_to_type(poFieldDefn->GetType());
4459  ColumnDescriptor cd;
4460  cd.columnName = poFieldDefn->GetNameRef();
4461  cd.sourceName = poFieldDefn->GetNameRef();
4462  SQLTypeInfo ti;
4463  if (typePair.second) {
4464  ti.set_type(kARRAY);
4465  ti.set_subtype(typePair.first);
4466  } else {
4467  ti.set_type(typePair.first);
4468  }
4469  if (typePair.first == kTEXT) {
4471  ti.set_comp_param(32);
4472  }
4473  ti.set_fixed_size();
4474  cd.columnType = ti;
4475  cds.push_back(cd);
4476  }
4477  // get geo column, if any
4478  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4479  if (poGeometry) {
4480  ColumnDescriptor cd;
4481  cd.columnName = geo_column_name;
4482  cd.sourceName = geo_column_name;
4483 
4484  // get GDAL type
4485  auto ogr_type = wkbFlatten(poGeometry->getGeometryType());
4486 
4487  // if exploding, override any collection type to child type
4489  if (ogr_type == wkbMultiPolygon) {
4490  ogr_type = wkbPolygon;
4491  } else if (ogr_type == wkbMultiLineString) {
4492  ogr_type = wkbLineString;
4493  } else if (ogr_type == wkbMultiPoint) {
4494  ogr_type = wkbPoint;
4495  }
4496  }
4497 
4498  // convert to internal type
4499  SQLTypes geoType = ogr_to_type(ogr_type);
4500 
4501  // for now, we promote POLYGON to MULTIPOLYGON (unless exploding)
4503  geoType = (geoType == kPOLYGON) ? kMULTIPOLYGON : geoType;
4504  }
4505 
4506  // build full internal type
4507  SQLTypeInfo ti;
4508  ti.set_type(geoType);
4514  cd.columnType = ti;
4515 
4516  cds.push_back(cd);
4517  }
4518  return cds;
4519 }
void set_compression(EncodingType c)
Definition: sqltypes.h:358
SQLTypes
Definition: sqltypes.h:39
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4274
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:349
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:94
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:103
std::string sourceName
void set_input_srid(int d)
Definition: sqltypes.h:352
CHECK(cgen_state)
void set_fixed_size()
Definition: sqltypes.h:357
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:145
specifies the content in-memory of a row in the column metadata table
void set_output_srid(int s)
Definition: sqltypes.h:354
void set_comp_param(int p)
Definition: sqltypes.h:359
std::string geo_layer_name
Definition: CopyParams.h:78
Definition: sqltypes.h:53
SQLTypeInfo columnType
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4241
std::string columnName
std::pair< SQLTypes, bool > ogr_to_type(const OGRFieldType &ogr_type)
Definition: Importer.cpp:4376
EncodingType geo_coords_encoding
Definition: CopyParams.h:73
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:348

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::list<const ColumnDescriptor*>& import_export::Importer::get_column_descs ( ) const
inline

Definition at line 742 of file Importer.h.

References loader.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

742  {
743  return loader->get_column_descs();
744  }
std::unique_ptr< Loader > loader
Definition: Importer.h:823

+ Here is the caller graph for this function:

const CopyParams& import_export::Importer::get_copy_params ( ) const
inline

Definition at line 741 of file Importer.h.

References import_export::DataStreamSink::copy_params.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

741 { return copy_params; }

+ Here is the caller graph for this function:

std::vector<std::unique_ptr<TypedImportBuffer> >& import_export::Importer::get_import_buffers ( int  i)
inline

Definition at line 750 of file Importer.h.

References import_buffers_vec.

Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

750  {
751  return import_buffers_vec[i];
752  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:822

+ Here is the caller graph for this function:

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > >& import_export::Importer::get_import_buffers_vec ( )
inline

Definition at line 747 of file Importer.h.

References import_buffers_vec.

Referenced by foreign_storage::LazyParquetImporter::partialImport().

747  {
748  return import_buffers_vec;
749  }
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:822

+ Here is the caller graph for this function:

ImportStatus import_export::Importer::get_import_status ( const std::string &  id)
static

Definition at line 209 of file Importer.cpp.

References import_export::import_status_map, and import_export::status_mutex.

Referenced by DBHandler::import_table_status().

209  {
210  mapd_shared_lock<mapd_shared_mutex> read_lock(status_mutex);
211  return import_status_map.at(import_id);
212 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:148
std::string import_id
Definition: Importer.h:818
mapd_shared_lock< mapd_shared_mutex > read_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:147

+ Here is the caller graph for this function:

const bool* import_export::Importer::get_is_array ( ) const
inline

Definition at line 753 of file Importer.h.

References is_array_a.

Referenced by import_export::import_thread_delimited().

753 { return is_array_a.get(); }
std::unique_ptr< bool[]> is_array_a
Definition: Importer.h:824

+ Here is the caller graph for this function:

Catalog_Namespace::Catalog& import_export::Importer::getCatalog ( )
inline

Definition at line 785 of file Importer.h.

References loader.

Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), and import_export::import_thread_delimited().

785 { return loader->getCatalog(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:823

+ Here is the caller graph for this function:

auto import_export::Importer::getLoader ( ) const
inline

Definition at line 809 of file Importer.h.

References loader.

Referenced by foreign_storage::anonymous_namespace{LazyParquetImporter.cpp}::import_row_group(), and foreign_storage::LazyParquetImporter::partialImport().

809 { return loader.get(); }
std::unique_ptr< Loader > loader
Definition: Importer.h:823

+ Here is the caller graph for this function:

ImportStatus import_export::Importer::import ( )

Definition at line 3941 of file Importer.cpp.

References import_export::DataStreamSink::archivePlumber().

3941  {
3943 }

+ Here is the call graph for this function:

ImportStatus import_export::Importer::importDelimited ( const std::string &  file_path,
const bool  decompressed 
)
overridevirtual

Implements import_export::DataStreamSink.

Definition at line 3945 of file Importer.cpp.

References import_export::CopyParams::buffer_size, CHECK(), checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, import_export::DataStreamSink::file_offsets, import_export::DataStreamSink::file_offsets_mutex, file_size, import_export::delimited_parser::find_end(), import_export::CopyParams::geo_assign_render_groups, SQLTypeInfo::get_type(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_delimited(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, max_threads, import_export::DataStreamSink::p_file, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), import_export::status_mutex, logger::thread_id(), import_export::CopyParams::threads, import_export::DataStreamSink::total_file_size, and VLOG.

3946  {
3947  bool load_truncated = false;
3949 
3950  if (!p_file) {
3951  p_file = fopen(file_path.c_str(), "rb");
3952  }
3953  if (!p_file) {
3954  throw std::runtime_error("failed to open file '" + file_path +
3955  "': " + strerror(errno));
3956  }
3957 
3958  if (!decompressed) {
3959  (void)fseek(p_file, 0, SEEK_END);
3960  file_size = ftell(p_file);
3961  }
3962 
3963  if (copy_params.threads == 0) {
3964  max_threads = static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF));
3965  } else {
3966  max_threads = static_cast<size_t>(copy_params.threads);
3967  }
3968 
3969  // deal with small files
3970  size_t alloc_size = copy_params.buffer_size;
3971  if (!decompressed && file_size < alloc_size) {
3972  alloc_size = file_size;
3973  }
3974 
3975  for (size_t i = 0; i < max_threads; i++) {
3976  import_buffers_vec.emplace_back();
3977  for (const auto cd : loader->get_column_descs()) {
3978  import_buffers_vec[i].emplace_back(
3979  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
3980  }
3981  }
3982 
3983  auto scratch_buffer = std::make_unique<char[]>(alloc_size);
3984  size_t current_pos = 0;
3985  size_t end_pos;
3986  size_t begin_pos = 0;
3987 
3988  (void)fseek(p_file, current_pos, SEEK_SET);
3989  size_t size =
3990  fread(reinterpret_cast<void*>(scratch_buffer.get()), 1, alloc_size, p_file);
3991 
3992  // make render group analyzers for each poly column
3993  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
3995  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
3996  loader->getTableDesc()->tableId, false, false, false);
3997  for (auto cd : columnDescriptors) {
3998  SQLTypes ct = cd->columnType.get_type();
3999  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4000  auto rga = std::make_shared<RenderGroupAnalyzer>();
4001  rga->seedFromExistingTableContents(loader, cd->columnName);
4002  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4003  }
4004  }
4005  }
4006 
4007  ChunkKey chunkKey = {loader->getCatalog().getCurrentDB().dbId,
4008  loader->getTableDesc()->tableId};
4009  auto start_epoch = loader->getTableEpoch();
4010  {
4011  std::list<std::future<ImportStatus>> threads;
4012 
4013  // use a stack to track thread_ids which must not overlap among threads
4014  // because thread_id is used to index import_buffers_vec[]
4015  std::stack<size_t> stack_thread_ids;
4016  for (size_t i = 0; i < max_threads; i++) {
4017  stack_thread_ids.push(i);
4018  }
4019  // added for true row index on error
4020  size_t first_row_index_this_buffer = 0;
4021 
4022  while (size > 0) {
4023  unsigned int num_rows_this_buffer = 0;
4024  CHECK(scratch_buffer);
4025  end_pos = delimited_parser::find_end(
4026  scratch_buffer.get(), size, copy_params, num_rows_this_buffer);
4027 
4028  // unput residual
4029  int nresidual = size - end_pos;
4030  std::unique_ptr<char[]> unbuf;
4031  if (nresidual > 0) {
4032  unbuf = std::make_unique<char[]>(nresidual);
4033  memcpy(unbuf.get(), scratch_buffer.get() + end_pos, nresidual);
4034  }
4035 
4036  // get a thread_id not in use
4037  auto thread_id = stack_thread_ids.top();
4038  stack_thread_ids.pop();
4039  // LOG(INFO) << " stack_thread_ids.pop " << thread_id << std::endl;
4040 
4041  threads.push_back(std::async(std::launch::async,
4043  thread_id,
4044  this,
4045  std::move(scratch_buffer),
4046  begin_pos,
4047  end_pos,
4048  end_pos,
4049  columnIdToRenderGroupAnalyzerMap,
4050  first_row_index_this_buffer));
4051 
4052  first_row_index_this_buffer += num_rows_this_buffer;
4053 
4054  current_pos += end_pos;
4055  scratch_buffer = std::make_unique<char[]>(alloc_size);
4056  CHECK(scratch_buffer);
4057  memcpy(scratch_buffer.get(), unbuf.get(), nresidual);
4058  size = nresidual + fread(scratch_buffer.get() + nresidual,
4059  1,
4060  copy_params.buffer_size - nresidual,
4061  p_file);
4062 
4063  begin_pos = 0;
4064 
4065  while (threads.size() > 0) {
4066  int nready = 0;
4067  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4068  it != threads.end();) {
4069  auto& p = *it;
4070  std::chrono::milliseconds span(
4071  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4072  if (p.wait_for(span) == std::future_status::ready) {
4073  auto ret_import_status = p.get();
4074  import_status += ret_import_status;
4075  // sum up current total file offsets
4076  size_t total_file_offset{0};
4077  if (decompressed) {
4078  std::unique_lock<std::mutex> lock(file_offsets_mutex);
4079  for (const auto file_offset : file_offsets) {
4080  total_file_offset += file_offset;
4081  }
4082  }
4083  // estimate number of rows per current total file offset
4084  if (decompressed ? total_file_offset : current_pos) {
4086  (decompressed ? (float)total_file_size / total_file_offset
4087  : (float)file_size / current_pos) *
4088  import_status.rows_completed;
4089  }
4090  VLOG(3) << "rows_completed " << import_status.rows_completed
4091  << ", rows_estimated " << import_status.rows_estimated
4092  << ", total_file_size " << total_file_size << ", total_file_offset "
4093  << total_file_offset;
4095  // recall thread_id for reuse
4096  stack_thread_ids.push(ret_import_status.thread_id);
4097  threads.erase(it++);
4098  ++nready;
4099  } else {
4100  ++it;
4101  }
4102  }
4103 
4104  if (nready == 0) {
4105  std::this_thread::yield();
4106  }
4107 
4108  // on eof, wait all threads to finish
4109  if (0 == size) {
4110  continue;
4111  }
4112 
4113  // keep reading if any free thread slot
4114  // this is one of the major difference from old threading model !!
4115  if (threads.size() < max_threads) {
4116  break;
4117  }
4118  }
4119 
4120  if (import_status.rows_rejected > copy_params.max_reject) {
4121  load_truncated = true;
4122  load_failed = true;
4123  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4124  break;
4125  }
4126  if (load_failed) {
4127  load_truncated = true;
4128  LOG(ERROR) << "A call to the Loader::load failed, Please review the logs for "
4129  "more details";
4130  break;
4131  }
4132  }
4133 
4134  // join dangling threads in case of LOG(ERROR) above
4135  for (auto& p : threads) {
4136  p.wait();
4137  }
4138  }
4139 
4140  checkpoint(start_epoch);
4141 
4142  // must set import_status.load_truncated before closing this end of pipe
4143  // otherwise, the thread on the other end would throw an unwanted 'write()'
4144  // exception
4145  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
4146  import_status.load_truncated = load_truncated;
4147 
4148  fclose(p_file);
4149  p_file = nullptr;
4150  return import_status;
4151 }
std::vector< int > ChunkKey
Definition: types.h:35
SQLTypes
Definition: sqltypes.h:39
#define LOG(tag)
Definition: Logger.h:188
void checkpoint(const int32_t start_epoch)
Definition: Importer.cpp:3266
CHECK(cgen_state)
size_t find_end(const char *buffer, size_t size, const import_export::CopyParams &copy_params, unsigned int &num_rows_this_buffer)
Finds the closest possible row ending to the end of the given buffer.
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:822
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:214
static ImportStatus import_thread_delimited(int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
Definition: Importer.cpp:1804
std::string import_id
Definition: Importer.h:818
ThreadId thread_id()
Definition: Logger.cpp:715
std::map< int, std::shared_ptr< RenderGroupAnalyzer >> ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:136
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:147
std::vector< size_t > file_offsets
Definition: Importer.h:652
#define VLOG(n)
Definition: Logger.h:291
std::unique_ptr< Loader > loader
Definition: Importer.h:823
const std::string file_path
Definition: Importer.h:647

+ Here is the call graph for this function:

ImportStatus import_export::Importer::importGDAL ( std::map< std::string, std::string >  colname_to_src)

Definition at line 4723 of file Importer.cpp.

References CHECK(), CHECK_EQ, checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, import_export::CopyParams::geo_assign_render_groups, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_layer_name, SQLTypeInfo::get_type(), import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_shapefile(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, openGDALDataset(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, set_import_status(), logger::thread_id(), and import_export::CopyParams::threads.

Referenced by QueryRunner::ImportDriver::importGeoTable().

4724  {
4725  // initial status
4726  bool load_truncated = false;
4728 
4730  if (poDS == nullptr) {
4731  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4732  file_path);
4733  }
4734 
4735  OGRLayer& layer =
4737 
4738  // get the number of features in this layer
4739  size_t numFeatures = layer.GetFeatureCount();
4740 
4741  // build map of metadata field (additional columns) name to index
4742  // use shared_ptr since we need to pass it to the worker
4743  FieldNameToIndexMapType fieldNameToIndexMap;
4744  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4745  CHECK(poFDefn);
4746  size_t numFields = poFDefn->GetFieldCount();
4747  for (size_t iField = 0; iField < numFields; iField++) {
4748  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4749  fieldNameToIndexMap.emplace(std::make_pair(poFieldDefn->GetNameRef(), iField));
4750  }
4751 
4752  // the geographic spatial reference we want to put everything in
4753  OGRSpatialReferenceUqPtr poGeographicSR(new OGRSpatialReference());
4754  poGeographicSR->importFromEPSG(copy_params.geo_coords_srid);
4755 
4756 #if GDAL_VERSION_MAJOR >= 3
4757  // GDAL 3.x (really Proj.4 6.x) now enforces lat, lon order
4758  // this results in X and Y being transposed for angle-based
4759  // coordinate systems. This restores the previous behavior.
4760  poGeographicSR->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
4761 #endif
4762 
4763 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4764  // just one "thread"
4765  size_t max_threads = 1;
4766 #else
4767  // how many threads to use
4768  size_t max_threads = 0;
4769  if (copy_params.threads == 0) {
4770  max_threads = sysconf(_SC_NPROCESSORS_CONF);
4771  } else {
4772  max_threads = copy_params.threads;
4773  }
4774 #endif
4775 
4776  // make an import buffer for each thread
4777  CHECK_EQ(import_buffers_vec.size(), 0u);
4778  import_buffers_vec.resize(max_threads);
4779  for (size_t i = 0; i < max_threads; i++) {
4780  for (const auto cd : loader->get_column_descs()) {
4781  import_buffers_vec[i].emplace_back(
4782  new TypedImportBuffer(cd, loader->getStringDict(cd)));
4783  }
4784  }
4785 
4786  // make render group analyzers for each poly column
4787  ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap;
4789  auto columnDescriptors = loader->getCatalog().getAllColumnMetadataForTable(
4790  loader->getTableDesc()->tableId, false, false, false);
4791  for (auto cd : columnDescriptors) {
4792  SQLTypes ct = cd->columnType.get_type();
4793  if (ct == kPOLYGON || ct == kMULTIPOLYGON) {
4794  auto rga = std::make_shared<RenderGroupAnalyzer>();
4795  rga->seedFromExistingTableContents(loader, cd->columnName);
4796  columnIdToRenderGroupAnalyzerMap[cd->columnId] = rga;
4797  }
4798  }
4799  }
4800 
4801 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4802  // threads
4803  std::list<std::future<ImportStatus>> threads;
4804 
4805  // use a stack to track thread_ids which must not overlap among threads
4806  // because thread_id is used to index import_buffers_vec[]
4807  std::stack<size_t> stack_thread_ids;
4808  for (size_t i = 0; i < max_threads; i++) {
4809  stack_thread_ids.push(i);
4810  }
4811 #endif
4812 
4813  // checkpoint the table
4814  auto start_epoch = loader->getTableEpoch();
4815 
4816  // reset the layer
4817  layer.ResetReading();
4818 
4819  static const size_t MAX_FEATURES_PER_CHUNK = 1000;
4820 
4821  // make a features buffer for each thread
4822  std::vector<FeaturePtrVector> features(max_threads);
4823 
4824  // for each feature...
4825  size_t firstFeatureThisChunk = 0;
4826  while (firstFeatureThisChunk < numFeatures) {
4827  // how many features this chunk
4828  size_t numFeaturesThisChunk =
4829  std::min(MAX_FEATURES_PER_CHUNK, numFeatures - firstFeatureThisChunk);
4830 
4831 // get a thread_id not in use
4832 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4833  size_t thread_id = 0;
4834 #else
4835  auto thread_id = stack_thread_ids.top();
4836  stack_thread_ids.pop();
4837  CHECK(thread_id < max_threads);
4838 #endif
4839 
4840  // fill features buffer for new thread
4841  for (size_t i = 0; i < numFeaturesThisChunk; i++) {
4842  features[thread_id].emplace_back(layer.GetNextFeature());
4843  }
4844 
4845 #if DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4846  // call worker function directly
4847  auto ret_import_status = import_thread_shapefile(0,
4848  this,
4849  poGeographicSR.get(),
4850  std::move(features[thread_id]),
4851  firstFeatureThisChunk,
4852  numFeaturesThisChunk,
4853  fieldNameToIndexMap,
4854  columnNameToSourceNameMap,
4855  columnIdToRenderGroupAnalyzerMap);
4856  import_status += ret_import_status;
4857  import_status.rows_estimated = ((float)firstFeatureThisChunk / (float)numFeatures) *
4858  import_status.rows_completed;
4859  set_import_status(import_id, import_status);
4860 #else
4861  // fire up that thread to import this geometry
4862  threads.push_back(std::async(std::launch::async,
4864  thread_id,
4865  this,
4866  poGeographicSR.get(),
4867  std::move(features[thread_id]),
4868  firstFeatureThisChunk,
4869  numFeaturesThisChunk,
4870  fieldNameToIndexMap,
4871  columnNameToSourceNameMap,
4872  columnIdToRenderGroupAnalyzerMap));
4873 
4874  // let the threads run
4875  while (threads.size() > 0) {
4876  int nready = 0;
4877  for (std::list<std::future<ImportStatus>>::iterator it = threads.begin();
4878  it != threads.end();) {
4879  auto& p = *it;
4880  std::chrono::milliseconds span(
4881  0); //(std::distance(it, threads.end()) == 1? 1: 0);
4882  if (p.wait_for(span) == std::future_status::ready) {
4883  auto ret_import_status = p.get();
4884  import_status += ret_import_status;
4885  import_status.rows_estimated =
4886  ((float)firstFeatureThisChunk / (float)numFeatures) *
4887  import_status.rows_completed;
4888  set_import_status(import_id, import_status);
4889 
4890  // recall thread_id for reuse
4891  stack_thread_ids.push(ret_import_status.thread_id);
4892 
4893  threads.erase(it++);
4894  ++nready;
4895  } else {
4896  ++it;
4897  }
4898  }
4899 
4900  if (nready == 0) {
4901  std::this_thread::yield();
4902  }
4903 
4904  // keep reading if any free thread slot
4905  // this is one of the major difference from old threading model !!
4906  if (threads.size() < max_threads) {
4907  break;
4908  }
4909  }
4910 #endif
4911 
4912  // out of rows?
4913  if (import_status.rows_rejected > copy_params.max_reject) {
4914  load_truncated = true;
4915  load_failed = true;
4916  LOG(ERROR) << "Maximum rows rejected exceeded. Halting load";
4917  break;
4918  }
4919 
4920  // failed?
4921  if (load_failed) {
4922  load_truncated = true;
4923  LOG(ERROR)
4924  << "A call to the Loader::load failed, Please review the logs for more details";
4925  break;
4926  }
4927 
4928  firstFeatureThisChunk += numFeaturesThisChunk;
4929  }
4930 
4931 #if !DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT
4932  // wait for any remaining threads
4933  if (threads.size()) {
4934  for (auto& p : threads) {
4935  // wait for the thread
4936  p.wait();
4937  // get the result and update the final import status
4938  auto ret_import_status = p.get();
4939  import_status += ret_import_status;
4940  import_status.rows_estimated = import_status.rows_completed;
4941  set_import_status(import_id, import_status);
4942  }
4943  }
4944 #endif
4945 
4946  checkpoint(start_epoch);
4947 
4948  // must set import_status.load_truncated before closing this end of pipe
4949  // otherwise, the thread on the other end would throw an unwanted 'write()'
4950  // exception
4951  import_status.load_truncated = load_truncated;
4952  return import_status;
4953 }
std::map< std::string, size_t > FieldNameToIndexMapType
Definition: Importer.cpp:133
#define CHECK_EQ(x, y)
Definition: Logger.h:205
SQLTypes
Definition: sqltypes.h:39
std::unique_ptr< OGRSpatialReference, OGRSpatialReferenceDeleter > OGRSpatialReferenceUqPtr
Definition: Importer.cpp:113
#define LOG(tag)
Definition: Logger.h:188
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4274
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:94
void checkpoint(const int32_t start_epoch)
Definition: Importer.cpp:3266
CHECK(cgen_state)
std::vector< std::vector< std::unique_ptr< TypedImportBuffer > > > import_buffers_vec
Definition: Importer.h:822
std::string geo_layer_name
Definition: CopyParams.h:78
static void set_import_status(const std::string &id, const ImportStatus is)
Definition: Importer.cpp:214
std::string import_id
Definition: Importer.h:818
ThreadId thread_id()
Definition: Logger.cpp:715
std::map< int, std::shared_ptr< RenderGroupAnalyzer >> ColumnIdToRenderGroupAnalyzerMapType
Definition: Importer.cpp:136
import_export::TypedImportBuffer TypedImportBuffer
static ImportStatus import_thread_shapefile(int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
Definition: Importer.cpp:2132
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4241
std::unique_ptr< Loader > loader
Definition: Importer.h:823
const std::string file_path
Definition: Importer.h:647

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::load ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t  row_count 
)

Definition at line 3259 of file Importer.cpp.

References import_export::DataStreamSink::load_failed, and loader.

Referenced by foreign_storage::anonymous_namespace{LazyParquetImporter.cpp}::import_row_group(), import_export::import_thread_delimited(), and import_export::import_thread_shapefile().

3260  {
3261  if (!loader->loadNoCheckpoint(import_buffers, row_count)) {
3262  load_failed = true;
3263  }
3264 }
std::unique_ptr< Loader > loader
Definition: Importer.h:823

+ Here is the caller graph for this function:

OGRDataSource * import_export::Importer::openGDALDataset ( const std::string &  fileName,
const CopyParams copy_params 
)
staticprivate

Definition at line 4241 of file Importer.cpp.

References logger::ERROR, logger::INFO, import_export::GDAL::init(), LOG, and setGDALAuthorizationTokens().

Referenced by gdalGetLayersInGeoFile(), gdalToColumnDescriptors(), importGDAL(), and readMetadataSampleGDAL().

4242  {
4243  // lazy init GDAL
4244  GDAL::init();
4245 
4246  // set authorization tokens
4248 
4249  // open the file
4250  OGRDataSource* poDS;
4251 #if GDAL_VERSION_MAJOR == 1
4252  poDS = (OGRDataSource*)OGRSFDriverRegistrar::Open(file_name.c_str(), false);
4253 #else
4254  poDS = (OGRDataSource*)GDALOpenEx(
4255  file_name.c_str(), GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4256  if (poDS == nullptr) {
4257  poDS = (OGRDataSource*)GDALOpenEx(
4258  file_name.c_str(), GDAL_OF_READONLY | GDAL_OF_VECTOR, nullptr, nullptr, nullptr);
4259  if (poDS) {
4260  LOG(INFO) << "openGDALDataset had to open as read-only";
4261  }
4262  }
4263 #endif
4264  if (poDS == nullptr) {
4265  LOG(ERROR) << "openGDALDataset Error: " << CPLGetLastErrorMsg();
4266  }
4267  // NOTE(adb): If extending this function, refactor to ensure any errors will not result
4268  // in a memory leak if GDAL successfully opened the input dataset.
4269  return poDS;
4270 }
#define LOG(tag)
Definition: Logger.h:188
static void init()
Definition: GDAL.cpp:56
static void setGDALAuthorizationTokens(const CopyParams &copy_params)
Definition: Importer.cpp:4171

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::readMetadataSampleGDAL ( const std::string &  fileName,
const std::string &  geoColumnName,
std::map< std::string, std::vector< std::string >> &  metadata,
int  rowLimit,
const CopyParams copy_params 
)
static

Definition at line 4297 of file Importer.cpp.

References CHECK(), import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), and openGDALDataset().

Referenced by DBHandler::detect_column_types().

4302  {
4304  if (poDS == nullptr) {
4305  throw std::runtime_error("openGDALDataset Error: Unable to open geo file " +
4306  file_name);
4307  }
4308 
4309  OGRLayer& layer =
4311 
4312  OGRFeatureDefn* poFDefn = layer.GetLayerDefn();
4313  CHECK(poFDefn);
4314 
4315  // typeof GetFeatureCount() is different between GDAL 1.x (int32_t) and 2.x (int64_t)
4316  auto nFeats = layer.GetFeatureCount();
4317  size_t numFeatures =
4318  std::max(static_cast<decltype(nFeats)>(0),
4319  std::min(static_cast<decltype(nFeats)>(rowLimit), nFeats));
4320  for (auto iField = 0; iField < poFDefn->GetFieldCount(); iField++) {
4321  OGRFieldDefn* poFieldDefn = poFDefn->GetFieldDefn(iField);
4322  // FIXME(andrewseidl): change this to the faster one used by readVerticesFromGDAL
4323  metadata.emplace(poFieldDefn->GetNameRef(), std::vector<std::string>(numFeatures));
4324  }
4325  metadata.emplace(geo_column_name, std::vector<std::string>(numFeatures));
4326  layer.ResetReading();
4327  size_t iFeature = 0;
4328  while (iFeature < numFeatures) {
4329  OGRFeatureUqPtr poFeature(layer.GetNextFeature());
4330  if (!poFeature) {
4331  break;
4332  }
4333 
4334  OGRGeometry* poGeometry = poFeature->GetGeometryRef();
4335  if (poGeometry != nullptr) {
4336  // validate geom type (again?)
4337  switch (wkbFlatten(poGeometry->getGeometryType())) {
4338  case wkbPoint:
4339  case wkbLineString:
4340  case wkbPolygon:
4341  case wkbMultiPolygon:
4342  break;
4343  case wkbMultiPoint:
4344  case wkbMultiLineString:
4345  // supported if geo_explode_collections is specified
4347  throw std::runtime_error("Unsupported geometry type: " +
4348  std::string(poGeometry->getGeometryName()));
4349  }
4350  break;
4351  default:
4352  throw std::runtime_error("Unsupported geometry type: " +
4353  std::string(poGeometry->getGeometryName()));
4354  }
4355 
4356  // populate metadata for regular fields
4357  for (auto i : metadata) {
4358  auto iField = poFeature->GetFieldIndex(i.first.c_str());
4359  if (iField >= 0) { // geom is -1
4360  metadata[i.first].at(iFeature) =
4361  std::string(poFeature->GetFieldAsString(iField));
4362  }
4363  }
4364 
4365  // populate metadata for geo column with WKT string
4366  char* wkts = nullptr;
4367  poGeometry->exportToWkt(&wkts);
4368  CHECK(wkts);
4369  metadata[geo_column_name].at(iFeature) = wkts;
4370  CPLFree(wkts);
4371  }
4372  iFeature++;
4373  }
4374 }
OGRLayer & getLayerWithSpecifiedName(const std::string &geo_layer_name, const OGRDataSourceUqPtr &poDS, const std::string &file_name)
Definition: Importer.cpp:4274
std::unique_ptr< OGRDataSource, OGRDataSourceDeleter > OGRDataSourceUqPtr
Definition: Importer.cpp:94
std::unique_ptr< OGRFeature, OGRFeatureDeleter > OGRFeatureUqPtr
Definition: Importer.cpp:103
CHECK(cgen_state)
std::string geo_layer_name
Definition: CopyParams.h:78
static OGRDataSource * openGDALDataset(const std::string &fileName, const CopyParams &copy_params)
Definition: Importer.cpp:4241

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_geo_physical_import_buffer ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< double > &  coords,
std::vector< double > &  bounds,
std::vector< int > &  ring_sizes,
std::vector< int > &  poly_rings,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1421 of file Importer.cpp.

References ColumnDescriptor::columnId, ColumnDescriptor::columnType, geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), Parser::AddColumnStmt::execute(), import_export::import_thread_delimited(), DBHandler::load_table(), and foreign_storage::csv_file_buffer_parser::process_geo_column().

1431  {
1432  const auto col_ti = cd->columnType;
1433  const auto col_type = col_ti.get_type();
1434  auto columnId = cd->columnId;
1435  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1436  bool is_null_geo = false;
1437  bool is_null_point = false;
1438  if (!col_ti.get_notnull()) {
1439  // Check for NULL geo
1440  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1441  is_null_point = true;
1442  coords.clear();
1443  }
1444  is_null_geo = coords.empty();
1445  if (is_null_point) {
1446  coords.push_back(NULL_ARRAY_DOUBLE);
1447  coords.push_back(NULL_DOUBLE);
1448  // Treating POINT coords as notnull, need to store actual encoding
1449  // [un]compressed+[not]null
1450  is_null_geo = false;
1451  }
1452  }
1453  std::vector<TDatum> td_coords_data;
1454  // Get the raw data representing [optionally compressed] non-NULL geo's coords.
1455  // One exception - NULL POINT geo: coords need to be processed to encode nullness
1456  // in a fixlen array, compressed and uncompressed.
1457  if (!is_null_geo) {
1458  std::vector<uint8_t> compressed_coords = geospatial::compress_coords(coords, col_ti);
1459  for (auto cc : compressed_coords) {
1460  TDatum td_byte;
1461  td_byte.val.int_val = cc;
1462  td_coords_data.push_back(td_byte);
1463  }
1464  }
1465  TDatum tdd_coords;
1466  tdd_coords.val.arr_val = td_coords_data;
1467  tdd_coords.is_null = is_null_geo;
1468  import_buffers[col_idx++]->add_value(cd_coords, tdd_coords, false, replicate_count);
1469 
1470  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1471  // Create ring_sizes array value and add it to the physical column
1472  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1473  std::vector<TDatum> td_ring_sizes;
1474  if (!is_null_geo) {
1475  for (auto ring_size : ring_sizes) {
1476  TDatum td_ring_size;
1477  td_ring_size.val.int_val = ring_size;
1478  td_ring_sizes.push_back(td_ring_size);
1479  }
1480  }
1481  TDatum tdd_ring_sizes;
1482  tdd_ring_sizes.val.arr_val = td_ring_sizes;
1483  tdd_ring_sizes.is_null = is_null_geo;
1484  import_buffers[col_idx++]->add_value(
1485  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1486  }
1487 
1488  if (col_type == kMULTIPOLYGON) {
1489  // Create poly_rings array value and add it to the physical column
1490  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1491  std::vector<TDatum> td_poly_rings;
1492  if (!is_null_geo) {
1493  for (auto num_rings : poly_rings) {
1494  TDatum td_num_rings;
1495  td_num_rings.val.int_val = num_rings;
1496  td_poly_rings.push_back(td_num_rings);
1497  }
1498  }
1499  TDatum tdd_poly_rings;
1500  tdd_poly_rings.val.arr_val = td_poly_rings;
1501  tdd_poly_rings.is_null = is_null_geo;
1502  import_buffers[col_idx++]->add_value(
1503  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1504  }
1505 
1506  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1507  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1508  std::vector<TDatum> td_bounds_data;
1509  if (!is_null_geo) {
1510  for (auto b : bounds) {
1511  TDatum td_double;
1512  td_double.val.real_val = b;
1513  td_bounds_data.push_back(td_double);
1514  }
1515  }
1516  TDatum tdd_bounds;
1517  tdd_bounds.val.arr_val = td_bounds_data;
1518  tdd_bounds.is_null = is_null_geo;
1519  import_buffers[col_idx++]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1520  }
1521 
1522  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1523  // Create render_group value and add it to the physical column
1524  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1525  TDatum td_render_group;
1526  td_render_group.val.int_val = render_group;
1527  td_render_group.is_null = is_null_geo;
1528  import_buffers[col_idx++]->add_value(
1529  cd_render_group, td_render_group, false, replicate_count);
1530  }
1531 }
#define NULL_DOUBLE
Definition: sqltypes.h:185
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_geo_physical_import_buffer_columnar ( const Catalog_Namespace::Catalog catalog,
const ColumnDescriptor cd,
std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers,
size_t &  col_idx,
std::vector< std::vector< double >> &  coords_column,
std::vector< std::vector< double >> &  bounds_column,
std::vector< std::vector< int >> &  ring_sizes_column,
std::vector< std::vector< int >> &  poly_rings_column,
int  render_group,
const int64_t  replicate_count = 0 
)
static

Definition at line 1533 of file Importer.cpp.

References CHECK(), ColumnDescriptor::columnId, ColumnDescriptor::columnType, geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.

Referenced by DBHandler::load_table_binary_columnar().

1543  {
1544  const auto col_ti = cd->columnType;
1545  const auto col_type = col_ti.get_type();
1546  auto columnId = cd->columnId;
1547 
1548  auto coords_row_count = coords_column.size();
1549  auto cd_coords = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1550  for (auto coords : coords_column) {
1551  bool is_null_geo = false;
1552  bool is_null_point = false;
1553  if (!col_ti.get_notnull()) {
1554  // Check for NULL geo
1555  if (col_type == kPOINT && (coords.empty() || coords[0] == NULL_ARRAY_DOUBLE)) {
1556  is_null_point = true;
1557  coords.clear();
1558  }
1559  is_null_geo = coords.empty();
1560  if (is_null_point) {
1561  coords.push_back(NULL_ARRAY_DOUBLE);
1562  coords.push_back(NULL_DOUBLE);
1563  // Treating POINT coords as notnull, need to store actual encoding
1564  // [un]compressed+[not]null
1565  is_null_geo = false;
1566  }
1567  }
1568  std::vector<TDatum> td_coords_data;
1569  if (!is_null_geo) {
1570  std::vector<uint8_t> compressed_coords =
1571  geospatial::compress_coords(coords, col_ti);
1572  for (auto cc : compressed_coords) {
1573  TDatum td_byte;
1574  td_byte.val.int_val = cc;
1575  td_coords_data.push_back(td_byte);
1576  }
1577  }
1578  TDatum tdd_coords;
1579  tdd_coords.val.arr_val = td_coords_data;
1580  tdd_coords.is_null = is_null_geo;
1581  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false, replicate_count);
1582  }
1583  col_idx++;
1584 
1585  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1586  if (ring_sizes_column.size() != coords_row_count) {
1587  CHECK(false) << "Geometry import columnar: ring sizes column size mismatch";
1588  }
1589  // Create ring_sizes array value and add it to the physical column
1590  auto cd_ring_sizes = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1591  for (auto ring_sizes : ring_sizes_column) {
1592  bool is_null_geo = false;
1593  if (!col_ti.get_notnull()) {
1594  // Check for NULL geo
1595  is_null_geo = ring_sizes.empty();
1596  }
1597  std::vector<TDatum> td_ring_sizes;
1598  for (auto ring_size : ring_sizes) {
1599  TDatum td_ring_size;
1600  td_ring_size.val.int_val = ring_size;
1601  td_ring_sizes.push_back(td_ring_size);
1602  }
1603  TDatum tdd_ring_sizes;
1604  tdd_ring_sizes.val.arr_val = td_ring_sizes;
1605  tdd_ring_sizes.is_null = is_null_geo;
1606  import_buffers[col_idx]->add_value(
1607  cd_ring_sizes, tdd_ring_sizes, false, replicate_count);
1608  }
1609  col_idx++;
1610  }
1611 
1612  if (col_type == kMULTIPOLYGON) {
1613  if (poly_rings_column.size() != coords_row_count) {
1614  CHECK(false) << "Geometry import columnar: poly rings column size mismatch";
1615  }
1616  // Create poly_rings array value and add it to the physical column
1617  auto cd_poly_rings = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1618  for (auto poly_rings : poly_rings_column) {
1619  bool is_null_geo = false;
1620  if (!col_ti.get_notnull()) {
1621  // Check for NULL geo
1622  is_null_geo = poly_rings.empty();
1623  }
1624  std::vector<TDatum> td_poly_rings;
1625  for (auto num_rings : poly_rings) {
1626  TDatum td_num_rings;
1627  td_num_rings.val.int_val = num_rings;
1628  td_poly_rings.push_back(td_num_rings);
1629  }
1630  TDatum tdd_poly_rings;
1631  tdd_poly_rings.val.arr_val = td_poly_rings;
1632  tdd_poly_rings.is_null = is_null_geo;
1633  import_buffers[col_idx]->add_value(
1634  cd_poly_rings, tdd_poly_rings, false, replicate_count);
1635  }
1636  col_idx++;
1637  }
1638 
1639  if (col_type == kLINESTRING || col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1640  if (bounds_column.size() != coords_row_count) {
1641  CHECK(false) << "Geometry import columnar: bounds column size mismatch";
1642  }
1643  auto cd_bounds = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1644  for (auto bounds : bounds_column) {
1645  bool is_null_geo = false;
1646  if (!col_ti.get_notnull()) {
1647  // Check for NULL geo
1648  is_null_geo = (bounds.empty() || bounds[0] == NULL_ARRAY_DOUBLE);
1649  }
1650  std::vector<TDatum> td_bounds_data;
1651  for (auto b : bounds) {
1652  TDatum td_double;
1653  td_double.val.real_val = b;
1654  td_bounds_data.push_back(td_double);
1655  }
1656  TDatum tdd_bounds;
1657  tdd_bounds.val.arr_val = td_bounds_data;
1658  tdd_bounds.is_null = is_null_geo;
1659  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false, replicate_count);
1660  }
1661  col_idx++;
1662  }
1663 
1664  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1665  // Create render_group value and add it to the physical column
1666  auto cd_render_group = catalog.getMetadataForColumn(cd->tableId, ++columnId);
1667  TDatum td_render_group;
1668  td_render_group.val.int_val = render_group;
1669  td_render_group.is_null = false;
1670  for (decltype(coords_row_count) i = 0; i < coords_row_count; i++) {
1671  import_buffers[col_idx]->add_value(
1672  cd_render_group, td_render_group, false, replicate_count);
1673  }
1674  col_idx++;
1675  }
1676 }
#define NULL_DOUBLE
Definition: sqltypes.h:185
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
CHECK(cgen_state)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Importer::set_import_status ( const std::string &  id,
const ImportStatus  is 
)
static

Definition at line 214 of file Importer.cpp.

References import_export::ImportStatus::elapsed, import_export::ImportStatus::end, import_id, import_export::import_status_map, import_export::ImportStatus::start, and import_export::status_mutex.

Referenced by importDelimited(), and importGDAL().

214  {
215  mapd_lock_guard<mapd_shared_mutex> write_lock(status_mutex);
216  is.end = std::chrono::steady_clock::now();
217  is.elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(is.end - is.start);
219 }
static std::map< std::string, ImportStatus > import_status_map
Definition: Importer.cpp:148
std::string import_id
Definition: Importer.h:818
mapd_unique_lock< mapd_shared_mutex > write_lock
static mapd_shared_mutex status_mutex
Definition: Importer.cpp:147

+ Here is the caller graph for this function:

void import_export::Importer::setGDALAuthorizationTokens ( const CopyParams copy_params)
staticprivate

Definition at line 4171 of file Importer.cpp.

References logger::INFO, LOG, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, and import_export::CopyParams::s3_secret_key.

Referenced by gdalGetAllFilesInArchive(), gdalGetLayersInGeoFile(), gdalStatInternal(), and openGDALDataset().

4171  {
4172  // for now we only support S3
4173  // @TODO generalize CopyParams to have a dictionary of GDAL tokens
4174  // only set if non-empty to allow GDAL defaults to persist
4175  // explicitly clear if empty to revert to default and not reuse a previous session's
4176  // keys
4177  if (copy_params.s3_region.size()) {
4178 #if DEBUG_AWS_AUTHENTICATION
4179  LOG(INFO) << "GDAL: Setting AWS_REGION to '" << copy_params.s3_region << "'";
4180 #endif
4181  CPLSetConfigOption("AWS_REGION", copy_params.s3_region.c_str());
4182  } else {
4183 #if DEBUG_AWS_AUTHENTICATION
4184  LOG(INFO) << "GDAL: Clearing AWS_REGION";
4185 #endif
4186  CPLSetConfigOption("AWS_REGION", nullptr);
4187  }
4188  if (copy_params.s3_endpoint.size()) {
4189 #if DEBUG_AWS_AUTHENTICATION
4190  LOG(INFO) << "GDAL: Setting AWS_S3_ENDPOINT to '" << copy_params.s3_endpoint << "'";
4191 #endif
4192  CPLSetConfigOption("AWS_S3_ENDPOINT", copy_params.s3_endpoint.c_str());
4193  } else {
4194 #if DEBUG_AWS_AUTHENTICATION
4195  LOG(INFO) << "GDAL: Clearing AWS_S3_ENDPOINT";
4196 #endif
4197  CPLSetConfigOption("AWS_S3_ENDPOINT", nullptr);
4198  }
4199  if (copy_params.s3_access_key.size()) {
4200 #if DEBUG_AWS_AUTHENTICATION
4201  LOG(INFO) << "GDAL: Setting AWS_ACCESS_KEY_ID to '" << copy_params.s3_access_key
4202  << "'";
4203 #endif
4204  CPLSetConfigOption("AWS_ACCESS_KEY_ID", copy_params.s3_access_key.c_str());
4205  } else {
4206 #if DEBUG_AWS_AUTHENTICATION
4207  LOG(INFO) << "GDAL: Clearing AWS_ACCESS_KEY_ID";
4208 #endif
4209  CPLSetConfigOption("AWS_ACCESS_KEY_ID", nullptr);
4210  }
4211  if (copy_params.s3_secret_key.size()) {
4212 #if DEBUG_AWS_AUTHENTICATION
4213  LOG(INFO) << "GDAL: Setting AWS_SECRET_ACCESS_KEY to '" << copy_params.s3_secret_key
4214  << "'";
4215 #endif
4216  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", copy_params.s3_secret_key.c_str());
4217  } else {
4218 #if DEBUG_AWS_AUTHENTICATION
4219  LOG(INFO) << "GDAL: Clearing AWS_SECRET_ACCESS_KEY";
4220 #endif
4221  CPLSetConfigOption("AWS_SECRET_ACCESS_KEY", nullptr);
4222  }
4223 
4224 #if (GDAL_VERSION_MAJOR > 2) || (GDAL_VERSION_MAJOR == 2 && GDAL_VERSION_MINOR >= 3)
4225  // if we haven't set keys, we need to disable signed access
4226  if (copy_params.s3_access_key.size() || copy_params.s3_secret_key.size()) {
4227 #if DEBUG_AWS_AUTHENTICATION
4228  LOG(INFO) << "GDAL: Clearing AWS_NO_SIGN_REQUEST";
4229 #endif
4230  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", nullptr);
4231  } else {
4232 #if DEBUG_AWS_AUTHENTICATION
4233  LOG(INFO) << "GDAL: Setting AWS_NO_SIGN_REQUEST to 'YES'";
4234 #endif
4235  CPLSetConfigOption("AWS_NO_SIGN_REQUEST", "YES");
4236  }
4237 #endif
4238 }
std::string s3_secret_key
Definition: CopyParams.h:63
#define LOG(tag)
Definition: Logger.h:188
std::string s3_access_key
Definition: CopyParams.h:62

+ Here is the caller graph for this function:

Member Data Documentation

char* import_export::Importer::buffer[2]
private

Definition at line 821 of file Importer.h.

Referenced by Importer(), and ~Importer().

size_t import_export::Importer::file_size
private

Definition at line 819 of file Importer.h.

Referenced by importDelimited(), and Importer().

std::vector<std::vector<std::unique_ptr<TypedImportBuffer> > > import_export::Importer::import_buffers_vec
private
std::string import_export::Importer::import_id
private

Definition at line 818 of file Importer.h.

Referenced by importDelimited(), Importer(), importGDAL(), and set_import_status().

std::unique_ptr<bool[]> import_export::Importer::is_array_a
private

Definition at line 824 of file Importer.h.

Referenced by get_is_array(), and Importer().

std::unique_ptr<Loader> import_export::Importer::loader
private
size_t import_export::Importer::max_threads
private

Definition at line 820 of file Importer.h.

Referenced by importDelimited(), Importer(), and importGDAL().


The documentation for this class was generated from the following files: