OmniSciDB  2e3a973ef4
import_export Namespace Reference

Namespaces

 anonymous_namespace{GDAL.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 delimited_parser
 

Classes

struct  BadRowsTracker
 
struct  CopyParams
 
class  DataStreamSink
 
class  Detector
 
class  GDAL
 
struct  GeoImportException
 
class  Importer
 
class  ImporterUtils
 
struct  ImportStatus
 
class  Loader
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  RenderGroupAnalyzer
 
class  TypedImportBuffer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer > >
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders (const TableDescriptor *td, Loader *loader)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string, ImportStatusimport_status_map
 

Typedef Documentation

◆ ArraySliceRange

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 72 of file Importer.h.

◆ ColumnIdToRenderGroupAnalyzerMapType

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer> >

Definition at line 135 of file Importer.cpp.

◆ ColumnNameToSourceNameMapType

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 133 of file Importer.cpp.

◆ FeaturePtrVector

using import_export::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 136 of file Importer.cpp.

◆ FieldNameToIndexMapType

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 132 of file Importer.cpp.

Enumeration Type Documentation

◆ FileType

Enumerator
DELIMITED 
POLYGON 

Definition at line 34 of file CopyParams.h.

34  {
35  DELIMITED,
36  POLYGON
37 #ifdef ENABLE_IMPORT_PARQUET
38  ,
39  PARQUET
40 #endif
41 };

◆ ImportHeaderRow

Function Documentation

◆ addBinaryStringArray()

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 399 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

399  {
400  const auto& arr = datum.val.arr_val;
401  for (const auto& elem_datum : arr) {
402  string_vec.push_back(elem_datum.val.str_val);
403  }
404 }
+ Here is the caller graph for this function:

◆ gdalGatherFilesInArchiveRecursive()

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4571 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

4572  {
4573  // prepare to gather subdirectories
4574  std::vector<std::string> subdirectories;
4575 
4576  // get entries
4577  char** entries = VSIReadDir(archive_path.c_str());
4578  if (!entries) {
4579  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4580  return;
4581  }
4582 
4583  // force scope
4584  {
4585  // request clean-up
4586  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4587 
4588  // check all the entries
4589  int index = 0;
4590  while (true) {
4591  // get next entry, or drop out if there isn't one
4592  char* entry_c = entries[index++];
4593  if (!entry_c) {
4594  break;
4595  }
4596  std::string entry(entry_c);
4597 
4598  // ignore '.' and '..'
4599  if (entry == "." || entry == "..") {
4600  continue;
4601  }
4602 
4603  // build the full path
4604  std::string entry_path = archive_path + std::string("/") + entry;
4605 
4606  // is it a file or a sub-folder
4607  VSIStatBufL sb;
4608  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4609  if (result < 0) {
4610  break;
4611  }
4612 
4613  if (VSI_ISDIR(sb.st_mode)) {
4614  // a directory that ends with .gdb could be a Geodatabase bundle
4615  // arguably dangerous to decide this purely by name, but any further
4616  // validation would be very complex especially at this scope
4617  if (boost::iends_with(entry_path, ".gdb")) {
4618  // add the directory as if it was a file and don't recurse into it
4619  files.push_back(entry_path);
4620  } else {
4621  // add subdirectory to be recursed into
4622  subdirectories.push_back(entry_path);
4623  }
4624  } else {
4625  // add this file
4626  files.push_back(entry_path);
4627  }
4628  }
4629  }
4630 
4631  // recurse into each subdirectories we found
4632  for (const auto& subdirectory : subdirectories) {
4633  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4634  }
4635 }
#define LOG(tag)
Definition: Logger.h:188
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4571
+ Here is the caller graph for this function:

◆ import_thread_delimited()

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1796 of file Importer.cpp.

References import_export::Importer::buffer, CHECK, CHECK_LT, import_export::DataStreamSink::copy_params, Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_export::DataStreamSink::import_status, importGeoFromLonLat(), logger::INFO, IS_GEO, anonymous_namespace{TypedDataAccessors.h}::is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::ImportStatus::thread_id, logger::thread_id(), and to_string().

Referenced by import_export::Importer::importDelimited().

1804  {
1805  ImportStatus import_status;
1806  int64_t total_get_row_time_us = 0;
1807  int64_t total_str_to_val_time_us = 0;
1808  CHECK(scratch_buffer);
1809  auto buffer = scratch_buffer.get();
1810  auto load_ms = measure<>::execution([]() {});
1811  auto ms = measure<>::execution([&]() {
1812  const CopyParams& copy_params = importer->get_copy_params();
1813  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1814  size_t begin =
1815  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
1816  const char* thread_buf = buffer + begin_pos + begin;
1817  const char* thread_buf_end = buffer + end_pos;
1818  const char* buf_end = buffer + total_size;
1819  bool try_single_thread = false;
1820  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1821  importer->get_import_buffers(thread_id);
1823  int phys_cols = 0;
1824  int point_cols = 0;
1825  for (const auto cd : col_descs) {
1826  const auto& col_ti = cd->columnType;
1827  phys_cols += col_ti.get_physical_cols();
1828  if (cd->columnType.get_type() == kPOINT) {
1829  point_cols++;
1830  }
1831  }
1832  auto num_cols = col_descs.size() - phys_cols;
1833  for (const auto& p : import_buffers) {
1834  p->clear();
1835  }
1836  std::vector<std::string_view> row;
1837  size_t row_index_plus_one = 0;
1838  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1839  row.clear();
1840  std::vector<std::unique_ptr<char[]>>
1841  tmp_buffers; // holds string w/ removed escape chars, etc
1842  if (DEBUG_TIMING) {
1845  thread_buf_end,
1846  buf_end,
1847  copy_params,
1848  importer->get_is_array(),
1849  row,
1850  tmp_buffers,
1851  try_single_thread);
1852  });
1853  total_get_row_time_us += us;
1854  } else {
1856  thread_buf_end,
1857  buf_end,
1858  copy_params,
1859  importer->get_is_array(),
1860  row,
1861  tmp_buffers,
1862  try_single_thread);
1863  }
1864  row_index_plus_one++;
1865  // Each POINT could consume two separate coords instead of a single WKT
1866  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1867  import_status.rows_rejected++;
1868  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1869  << row.size() << "): " << shared::printContainer(row);
1870  if (import_status.rows_rejected > copy_params.max_reject) {
1871  break;
1872  }
1873  continue;
1874  }
1875 
1876  //
1877  // lambda for importing a row (perhaps multiple times if exploding a collection)
1878  //
1879 
1880  auto execute_import_row = [&](OGRGeometry* import_geometry) {
1881  size_t import_idx = 0;
1882  size_t col_idx = 0;
1883  try {
1884  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1885  auto cd = *cd_it;
1886  const auto& col_ti = cd->columnType;
1887 
1888  bool is_null =
1889  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1890  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1891  // So initially nullness may be missed and not passed to add_value,
1892  // which then might also check and still decide it's actually a NULL, e.g.
1893  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
1894  // So "NULL" is not recognized as NULL but then it's not recognized as
1895  // a valid kINT, so it's a NULL after all.
1896  // Checking for "NULL" here too, as a widely accepted notation for NULL.
1897 
1898  // Treating empty as NULL
1899  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1900  is_null = true;
1901  }
1902 
1903  if (col_ti.get_physical_cols() == 0) {
1904  // not geo
1905 
1906  import_buffers[col_idx]->add_value(
1907  cd, row[import_idx], is_null, copy_params);
1908 
1909  // next
1910  ++import_idx;
1911  ++col_idx;
1912  } else {
1913  // geo
1914 
1915  // store null string in the base column
1916  import_buffers[col_idx]->add_value(
1917  cd, copy_params.null_str, true, copy_params);
1918 
1919  // WKT from string we're not storing
1920  auto const& geo_string = row[import_idx];
1921 
1922  // next
1923  ++import_idx;
1924  ++col_idx;
1925 
1926  SQLTypes col_type = col_ti.get_type();
1927  CHECK(IS_GEO(col_type));
1928 
1929  std::vector<double> coords;
1930  std::vector<double> bounds;
1931  std::vector<int> ring_sizes;
1932  std::vector<int> poly_rings;
1933  int render_group = 0;
1934 
1935  // if this is a POINT column, and the field is not null, and
1936  // looks like a scalar numeric value (and not a hex blob)
1937  // attempt to import two columns as lon/lat (or lat/lon)
1938  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
1939  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
1940  geo_string[0] == '-') &&
1941  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
1942  double lon = std::atof(std::string(geo_string).c_str());
1943  double lat = NAN;
1944  auto lat_str = row[import_idx];
1945  ++import_idx;
1946  if (lat_str.size() > 0 &&
1947  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1948  lat = std::atof(std::string(lat_str).c_str());
1949  }
1950  // Swap coordinates if this table uses a reverse order: lat/lon
1951  if (!copy_params.lonlat) {
1952  std::swap(lat, lon);
1953  }
1954  // TODO: should check if POINT column should have been declared with
1955  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1956  // throw std::runtime_error("POINT column " + cd->columnName + " is
1957  // not WGS84, cannot insert lon/lat");
1958  // }
1959  if (!importGeoFromLonLat(lon, lat, coords)) {
1960  throw std::runtime_error(
1961  "Cannot read lon/lat to insert into POINT column " +
1962  cd->columnName);
1963  }
1964  } else {
1965  // import it
1966  SQLTypeInfo import_ti{col_ti};
1967  if (is_null) {
1968  if (col_ti.get_notnull()) {
1969  throw std::runtime_error("NULL geo for column " + cd->columnName);
1970  }
1972  import_ti,
1973  coords,
1974  bounds,
1975  ring_sizes,
1976  poly_rings,
1978  } else {
1979  if (import_geometry) {
1980  // geometry already exploded
1982  import_geometry,
1983  import_ti,
1984  coords,
1985  bounds,
1986  ring_sizes,
1987  poly_rings,
1989  std::string msg =
1990  "Failed to extract valid geometry from exploded row " +
1991  std::to_string(first_row_index_this_buffer +
1992  row_index_plus_one) +
1993  " for column " + cd->columnName;
1994  throw std::runtime_error(msg);
1995  }
1996  } else {
1997  // extract geometry directly from WKT
1999  std::string(geo_string),
2000  import_ti,
2001  coords,
2002  bounds,
2003  ring_sizes,
2004  poly_rings,
2006  std::string msg = "Failed to extract valid geometry from row " +
2007  std::to_string(first_row_index_this_buffer +
2008  row_index_plus_one) +
2009  " for column " + cd->columnName;
2010  throw std::runtime_error(msg);
2011  }
2012  }
2013 
2014  // validate types
2015  if (col_type != import_ti.get_type()) {
2017  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2018  col_type == SQLTypes::kMULTIPOLYGON)) {
2019  throw std::runtime_error(
2020  "Imported geometry doesn't match the type of column " +
2021  cd->columnName);
2022  }
2023  }
2024  }
2025 
2026  // assign render group?
2027  if (columnIdToRenderGroupAnalyzerMap.size()) {
2028  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2029  if (ring_sizes.size()) {
2030  // get a suitable render group for these poly coords
2031  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2032  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2033  render_group =
2034  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2035  } else {
2036  // empty poly
2037  render_group = -1;
2038  }
2039  }
2040  }
2041  }
2042 
2043  // import extracted geo
2044  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2045  cd,
2046  import_buffers,
2047  col_idx,
2048  coords,
2049  bounds,
2050  ring_sizes,
2051  poly_rings,
2052  render_group);
2053 
2054  // skip remaining physical columns
2055  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2056  ++cd_it;
2057  }
2058  }
2059  }
2060  import_status.rows_completed++;
2061  } catch (const std::exception& e) {
2062  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2063  import_buffers[col_idx_to_pop]->pop_value();
2064  }
2065  import_status.rows_rejected++;
2066  LOG(ERROR) << "Input exception thrown: " << e.what()
2067  << ". Row discarded. Data: " << shared::printContainer(row);
2068  }
2069  };
2070 
2071  if (copy_params.geo_explode_collections) {
2072  // explode and import
2073  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2074  explode_collections_step1(col_descs);
2075  // pull out the collection WKT or WKB hex
2076  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2077  auto const& collection_geo_string = row[collection_col_idx];
2078  // convert to OGR
2079  OGRGeometry* ogr_geometry = nullptr;
2080  ScopeGuard destroy_ogr_geometry = [&] {
2081  if (ogr_geometry) {
2082  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2083  }
2084  };
2086  std::string(collection_geo_string));
2087  // do the explode and import
2088  us = explode_collections_step2(ogr_geometry,
2089  collection_child_type,
2090  collection_col_name,
2091  first_row_index_this_buffer + row_index_plus_one,
2092  execute_import_row);
2093  } else {
2094  // import non-collection row just once
2096  [&] { execute_import_row(nullptr); });
2097  }
2098  total_str_to_val_time_us += us;
2099  } // end thread
2100  if (import_status.rows_completed > 0) {
2101  load_ms = measure<>::execution(
2102  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2103  }
2104  });
2105  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2106  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2107  << import_status.rows_completed << " rows inserted in "
2108  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2109  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2110  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2111  << "sec" << std::endl;
2112  }
2113 
2114  import_status.thread_id = thread_id;
2115  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2116 
2117  return import_status;
2118 }
SQLTypes
Definition: sqltypes.h:40
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor *> &col_descs)
Definition: Importer.cpp:1672
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1706
#define LOG(tag)
Definition: Logger.h:188
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:908
std::string to_string(char const *&&v)
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords)
Definition: Importer.cpp:1409
#define DEBUG_TIMING
Definition: Importer.cpp:138
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:701
#define CHECK_LT(x, y)
Definition: Logger.h:207
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:667
bool is_null(const T &v, const SQLTypeInfo &t)
ThreadId thread_id()
Definition: Logger.cpp:731
#define CHECK(condition)
Definition: Logger.h:197
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:64
#define IS_GEO(T)
Definition: sqltypes.h:174
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ import_thread_shapefile()

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType fieldNameToIndexMap,
const ColumnNameToSourceNameMapType columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2120 of file Importer.cpp.

References CHECK, Geospatial::compress_coords(), import_export::DataStreamSink::copy_params, DEBUG_TIMING, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_export::DataStreamSink::import_status, logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::null_str, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), timer_stop(), and to_string().

Referenced by import_export::Importer::importGDAL().

2129  {
2130  ImportStatus import_status;
2131  const CopyParams& copy_params = importer->get_copy_params();
2132  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2133  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2134  importer->get_import_buffers(thread_id);
2135 
2136  for (const auto& p : import_buffers) {
2137  p->clear();
2138  }
2139 
2140  auto convert_timer = timer_start();
2141 
2142  // we create this on the fly based on the first feature's SR
2143  std::unique_ptr<OGRCoordinateTransformation> coordinate_transformation;
2144 
2145  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2146  if (!features[iFeature]) {
2147  continue;
2148  }
2149 
2150  // get this feature's geometry
2151  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2152  if (pGeometry) {
2153  // for geodatabase, we need to consider features with no geometry
2154  // as we still want to create a table, even if it has no geo column
2155 
2156  // transform it
2157  // avoid GDAL error if not transformable
2158  auto geometry_sr = pGeometry->getSpatialReference();
2159  if (geometry_sr) {
2160  // create an OGRCoordinateTransformation (CT) on the fly
2161  // we must assume that all geo in this file will have
2162  // the same source SR, so the CT will be valid for all
2163  // transforming to a reusable CT is faster than to an SR
2164  if (coordinate_transformation == nullptr) {
2165  coordinate_transformation.reset(
2166  OGRCreateCoordinateTransformation(geometry_sr, poGeographicSR));
2167  if (coordinate_transformation == nullptr) {
2168  throw std::runtime_error(
2169  "Failed to create a GDAL CoordinateTransformation for incoming geo");
2170  }
2171  }
2172  pGeometry->transform(coordinate_transformation.get());
2173  }
2174  }
2175 
2176  //
2177  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2178  //
2179 
2180  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2181  size_t col_idx = 0;
2182  try {
2183  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2184  auto cd = *cd_it;
2185 
2186  // is this a geo column?
2187  const auto& col_ti = cd->columnType;
2188  if (col_ti.is_geometry()) {
2189  // Note that this assumes there is one and only one geo column in the table.
2190  // Currently, the importer only supports reading a single geospatial feature
2191  // from an input shapefile / geojson file, but this code will need to be
2192  // modified if that changes
2193  SQLTypes col_type = col_ti.get_type();
2194 
2195  // store null string in the base column
2196  import_buffers[col_idx]->add_value(
2197  cd, copy_params.null_str, true, copy_params);
2198  ++col_idx;
2199 
2200  // the data we now need to extract for the other columns
2201  std::vector<double> coords;
2202  std::vector<double> bounds;
2203  std::vector<int> ring_sizes;
2204  std::vector<int> poly_rings;
2205  int render_group = 0;
2206 
2207  // extract it
2208  SQLTypeInfo import_ti{col_ti};
2209  bool is_null_geo = !import_geometry;
2210  if (is_null_geo) {
2211  if (col_ti.get_notnull()) {
2212  throw std::runtime_error("NULL geo for column " + cd->columnName);
2213  }
2215  import_ti,
2216  coords,
2217  bounds,
2218  ring_sizes,
2219  poly_rings,
2221  } else {
2223  import_geometry,
2224  import_ti,
2225  coords,
2226  bounds,
2227  ring_sizes,
2228  poly_rings,
2230  std::string msg = "Failed to extract valid geometry from feature " +
2231  std::to_string(firstFeature + iFeature + 1) +
2232  " for column " + cd->columnName;
2233  throw std::runtime_error(msg);
2234  }
2235 
2236  // validate types
2237  if (col_type != import_ti.get_type()) {
2239  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2240  col_type == SQLTypes::kMULTIPOLYGON)) {
2241  throw std::runtime_error(
2242  "Imported geometry doesn't match the type of column " +
2243  cd->columnName);
2244  }
2245  }
2246  }
2247 
2248  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2249  if (ring_sizes.size()) {
2250  // get a suitable render group for these poly coords
2251  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2252  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2253  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2254  } else {
2255  // empty poly
2256  render_group = -1;
2257  }
2258  }
2259 
2260  // create coords array value and add it to the physical column
2261  ++cd_it;
2262  auto cd_coords = *cd_it;
2263  std::vector<TDatum> td_coord_data;
2264  if (!is_null_geo) {
2265  std::vector<uint8_t> compressed_coords =
2266  Geospatial::compress_coords(coords, col_ti);
2267  for (auto cc : compressed_coords) {
2268  TDatum td_byte;
2269  td_byte.val.int_val = cc;
2270  td_coord_data.push_back(td_byte);
2271  }
2272  }
2273  TDatum tdd_coords;
2274  tdd_coords.val.arr_val = td_coord_data;
2275  tdd_coords.is_null = is_null_geo;
2276  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2277  ++col_idx;
2278 
2279  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2280  // Create ring_sizes array value and add it to the physical column
2281  ++cd_it;
2282  auto cd_ring_sizes = *cd_it;
2283  std::vector<TDatum> td_ring_sizes;
2284  if (!is_null_geo) {
2285  for (auto ring_size : ring_sizes) {
2286  TDatum td_ring_size;
2287  td_ring_size.val.int_val = ring_size;
2288  td_ring_sizes.push_back(td_ring_size);
2289  }
2290  }
2291  TDatum tdd_ring_sizes;
2292  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2293  tdd_ring_sizes.is_null = is_null_geo;
2294  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2295  ++col_idx;
2296  }
2297 
2298  if (col_type == kMULTIPOLYGON) {
2299  // Create poly_rings array value and add it to the physical column
2300  ++cd_it;
2301  auto cd_poly_rings = *cd_it;
2302  std::vector<TDatum> td_poly_rings;
2303  if (!is_null_geo) {
2304  for (auto num_rings : poly_rings) {
2305  TDatum td_num_rings;
2306  td_num_rings.val.int_val = num_rings;
2307  td_poly_rings.push_back(td_num_rings);
2308  }
2309  }
2310  TDatum tdd_poly_rings;
2311  tdd_poly_rings.val.arr_val = td_poly_rings;
2312  tdd_poly_rings.is_null = is_null_geo;
2313  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2314  ++col_idx;
2315  }
2316 
2317  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2318  col_type == kMULTIPOLYGON) {
2319  // Create bounds array value and add it to the physical column
2320  ++cd_it;
2321  auto cd_bounds = *cd_it;
2322  std::vector<TDatum> td_bounds_data;
2323  if (!is_null_geo) {
2324  for (auto b : bounds) {
2325  TDatum td_double;
2326  td_double.val.real_val = b;
2327  td_bounds_data.push_back(td_double);
2328  }
2329  }
2330  TDatum tdd_bounds;
2331  tdd_bounds.val.arr_val = td_bounds_data;
2332  tdd_bounds.is_null = is_null_geo;
2333  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2334  ++col_idx;
2335  }
2336 
2337  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2338  // Create render_group value and add it to the physical column
2339  ++cd_it;
2340  auto cd_render_group = *cd_it;
2341  TDatum td_render_group;
2342  td_render_group.val.int_val = render_group;
2343  td_render_group.is_null = is_null_geo;
2344  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2345  ++col_idx;
2346  }
2347  } else {
2348  // regular column
2349  // pull from GDAL metadata
2350  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2351  CHECK(cit != columnNameToSourceNameMap.end());
2352  auto const& field_name = cit->second;
2353 
2354  auto const fit = fieldNameToIndexMap.find(field_name);
2355  CHECK(fit != fieldNameToIndexMap.end());
2356  auto const& field_index = fit->second;
2357  CHECK(field_index < fieldNameToIndexMap.size());
2358 
2359  auto const& feature = features[iFeature];
2360 
2361  auto field_defn = feature->GetFieldDefnRef(field_index);
2362  CHECK(field_defn);
2363 
2364  // OGRFeature::GetFieldAsString() can only return 80 characters
2365  // so for array columns, we are obliged to fetch the actual values
2366  // and construct the concatenated string ourselves
2367 
2368  std::string value_string;
2369  int array_index = 0, array_size = 0;
2370 
2371  auto stringify_numeric_list = [&](auto* values) {
2372  value_string = "{";
2373  while (array_index < array_size) {
2374  auto separator = (array_index > 0) ? "," : "";
2375  value_string += separator + std::to_string(values[array_index]);
2376  array_index++;
2377  }
2378  value_string += "}";
2379  };
2380 
2381  auto field_type = field_defn->GetType();
2382  switch (field_type) {
2383  case OFTInteger:
2384  case OFTInteger64:
2385  case OFTReal:
2386  case OFTString:
2387  case OFTBinary:
2388  case OFTDate:
2389  case OFTTime:
2390  case OFTDateTime: {
2391  value_string = feature->GetFieldAsString(field_index);
2392  } break;
2393  case OFTIntegerList: {
2394  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2395  stringify_numeric_list(values);
2396  } break;
2397  case OFTInteger64List: {
2398  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2399  stringify_numeric_list(values);
2400  } break;
2401  case OFTRealList: {
2402  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2403  stringify_numeric_list(values);
2404  } break;
2405  case OFTStringList: {
2406  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2407  value_string = "{";
2408  if (array_of_strings) {
2409  while (auto* this_string = array_of_strings[array_index]) {
2410  auto separator = (array_index > 0) ? "," : "";
2411  value_string += separator + std::string(this_string);
2412  array_index++;
2413  }
2414  }
2415  value_string += "}";
2416  } break;
2417  default:
2418  throw std::runtime_error("Unsupported geo file field type (" +
2419  std::to_string(static_cast<int>(field_type)) +
2420  ")");
2421  }
2422 
2423  static CopyParams default_copy_params;
2424  import_buffers[col_idx]->add_value(
2425  cd, value_string, false, default_copy_params);
2426  ++col_idx;
2427  }
2428  }
2429  import_status.rows_completed++;
2430  } catch (const std::exception& e) {
2431  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2432  import_buffers[col_idx_to_pop]->pop_value();
2433  }
2434  import_status.rows_rejected++;
2435  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2436  }
2437  };
2438 
2439  if (pGeometry && copy_params.geo_explode_collections) {
2440  // explode and import
2441  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2442  explode_collections_step1(col_descs);
2443  explode_collections_step2(pGeometry,
2444  collection_child_type,
2445  collection_col_name,
2446  firstFeature + iFeature + 1,
2447  execute_import_feature);
2448  } else {
2449  // import non-collection or null feature just once
2450  execute_import_feature(pGeometry);
2451  }
2452  } // end features
2453 
2454  float convert_ms =
2455  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2456  convert_timer)) /
2457  1000.0f;
2458 
2459  float load_ms = 0.0f;
2460  if (import_status.rows_completed > 0) {
2461  auto load_timer = timer_start();
2462  importer->load(import_buffers, import_status.rows_completed);
2463  load_ms =
2464  float(
2465  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2466  load_timer)) /
2467  1000.0f;
2468  }
2469 
2470  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2471  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2472  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2473  }
2474 
2475  import_status.thread_id = thread_id;
2476 
2477  if (DEBUG_TIMING) {
2478  LOG(INFO) << "DEBUG: Total "
2479  << float(timer_stop<std::chrono::steady_clock::time_point,
2480  std::chrono::microseconds>(convert_timer)) /
2481  1000.0f
2482  << "ms";
2483  }
2484 
2485  return import_status;
2486 }
SQLTypes
Definition: sqltypes.h:40
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor *> &col_descs)
Definition: Importer.cpp:1672
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1706
#define LOG(tag)
Definition: Logger.h:188
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:908
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:138
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:701
ThreadId thread_id()
Definition: Logger.cpp:731
#define CHECK(condition)
Definition: Logger.h:197
Type timer_start()
Definition: measure.h:42
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ importGeoFromLonLat()

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords 
)

Definition at line 1409 of file Importer.cpp.

Referenced by import_thread_delimited().

1409  {
1410  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1411  return false;
1412  }
1413  // we don't need to do any coordinate-system transformation
1414  // here (yet) so we don't need to use any OGR API or types
1415  // just use the values directly (assumed to be in 4326)
1416  coords.push_back(lon);
1417  coords.push_back(lat);
1418  return true;
1419 }
+ Here is the caller graph for this function:

◆ NullArray()

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 367 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

367  {
368  SQLTypeInfo elem_ti = ti.get_elem_type();
369  auto len = ti.get_size();
370 
371  if (elem_ti.is_string()) {
372  // must not be called for array of strings
373  CHECK(false);
374  return ArrayDatum(0, NULL, true);
375  }
376 
377  if (len > 0) {
378  // Compose a NULL fixlen array
379  int8_t* buf = (int8_t*)checked_malloc(len);
380  // First scalar is a NULL_ARRAY sentinel
381  Datum d = NullArrayDatum(elem_ti);
382  int8_t* p = appendDatum(buf, d, elem_ti);
383  // Rest is filled with normal NULL sentinels
384  Datum d0 = NullDatum(elem_ti);
385  while ((p - buf) < len) {
386  p = appendDatum(p, d0, elem_ti);
387  }
388  CHECK((p - buf) == len);
389  return ArrayDatum(len, buf, true);
390  }
391  // NULL varlen array
392  return ArrayDatum(0, NULL, true);
393 }
bool is_string() const
Definition: sqltypes.h:417
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:232
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:871
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:273
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullArrayDatum()

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 273 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

273  {
274  Datum d;
275  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
276  switch (type) {
277  case kBOOLEAN:
279  break;
280  case kBIGINT:
282  break;
283  case kINT:
285  break;
286  case kSMALLINT:
288  break;
289  case kTINYINT:
291  break;
292  case kFLOAT:
294  break;
295  case kDOUBLE:
297  break;
298  case kTIME:
299  case kTIMESTAMP:
300  case kDATE:
302  break;
303  case kPOINT:
304  case kLINESTRING:
305  case kPOLYGON:
306  case kMULTIPOLYGON:
307  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
308  default:
309  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
310  }
311  return d;
312 }
int8_t tinyintval
Definition: sqltypes.h:135
Definition: sqltypes.h:51
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
bool boolval
Definition: sqltypes.h:134
bool is_decimal() const
Definition: sqltypes.h:420
int32_t intval
Definition: sqltypes.h:137
float floatval
Definition: sqltypes.h:139
int64_t bigintval
Definition: sqltypes.h:138
int16_t smallintval
Definition: sqltypes.h:136
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:140
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:193
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullDatum()

Datum import_export::NullDatum ( SQLTypeInfo ti)

Definition at line 232 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

232  {
233  Datum d;
234  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
235  switch (type) {
236  case kBOOLEAN:
238  break;
239  case kBIGINT:
241  break;
242  case kINT:
244  break;
245  case kSMALLINT:
247  break;
248  case kTINYINT:
250  break;
251  case kFLOAT:
252  d.floatval = NULL_FLOAT;
253  break;
254  case kDOUBLE:
256  break;
257  case kTIME:
258  case kTIMESTAMP:
259  case kDATE:
261  break;
262  case kPOINT:
263  case kLINESTRING:
264  case kPOLYGON:
265  case kMULTIPOLYGON:
266  throw std::runtime_error("Internal error: geometry type in NullDatum.");
267  default:
268  throw std::runtime_error("Internal error: invalid type in NullDatum.");
269  }
270  return d;
271 }
int8_t tinyintval
Definition: sqltypes.h:135
#define NULL_DOUBLE
Definition: sqltypes.h:186
Definition: sqltypes.h:51
bool boolval
Definition: sqltypes.h:134
bool is_decimal() const
Definition: sqltypes.h:420
int32_t intval
Definition: sqltypes.h:137
float floatval
Definition: sqltypes.h:139
int64_t bigintval
Definition: sqltypes.h:138
#define NULL_FLOAT
Definition: sqltypes.h:185
int16_t smallintval
Definition: sqltypes.h:136
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:140
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [1/2]

std::pair<SQLTypes, bool> import_export::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4383 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptors().

4383  {
4384  switch (ogr_type) {
4385  case OFTInteger:
4386  return std::make_pair(kINT, false);
4387  case OFTIntegerList:
4388  return std::make_pair(kINT, true);
4389 #if GDAL_VERSION_MAJOR > 1
4390  case OFTInteger64:
4391  return std::make_pair(kBIGINT, false);
4392  case OFTInteger64List:
4393  return std::make_pair(kBIGINT, true);
4394 #endif
4395  case OFTReal:
4396  return std::make_pair(kDOUBLE, false);
4397  case OFTRealList:
4398  return std::make_pair(kDOUBLE, true);
4399  case OFTString:
4400  return std::make_pair(kTEXT, false);
4401  case OFTStringList:
4402  return std::make_pair(kTEXT, true);
4403  case OFTDate:
4404  return std::make_pair(kDATE, false);
4405  case OFTTime:
4406  return std::make_pair(kTIME, false);
4407  case OFTDateTime:
4408  return std::make_pair(kTIMESTAMP, false);
4409  case OFTBinary:
4410  // Interpret binary blobs as byte arrays here
4411  // but actual import will store NULL as GDAL will not
4412  // extract the blob (OGRFeature::GetFieldAsString will
4413  // result in the import buffers having an empty string)
4414  return std::make_pair(kTINYINT, true);
4415  default:
4416  break;
4417  }
4418  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4419 }
Definition: sqltypes.h:51
std::string to_string(char const *&&v)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [2/2]

SQLTypes import_export::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4421 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4421  {
4422  switch (ogr_type) {
4423  case wkbPoint:
4424  return kPOINT;
4425  case wkbLineString:
4426  return kLINESTRING;
4427  case wkbPolygon:
4428  return kPOLYGON;
4429  case wkbMultiPolygon:
4430  return kMULTIPOLYGON;
4431  default:
4432  break;
4433  }
4434  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4435 }
std::string to_string(char const *&&v)
+ Here is the call graph for this function:

◆ setup_column_loaders()

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader loader 
)

Definition at line 5149 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by Parser::AddColumnStmt::execute(), and DBHandler::prepare_columnar_loader().

5151  {
5152  CHECK(td);
5153  auto col_descs = loader->get_column_descs();
5154 
5155  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
5156  for (auto cd : col_descs) {
5157  import_buffers.emplace_back(
5158  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
5159  }
5160 
5161  return import_buffers;
5162 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ StringToArray()

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams copy_params 
)

Definition at line 314 of file Importer.cpp.

References appendDatum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value().

316  {
317  SQLTypeInfo elem_ti = ti.get_elem_type();
318  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
319  return ArrayDatum(0, NULL, true);
320  }
321  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
322  LOG(WARNING) << "Malformed array: " << s;
323  return ArrayDatum(0, NULL, true);
324  }
325  std::vector<std::string> elem_strs;
326  size_t last = 1;
327  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
328  i = s.find(copy_params.array_delim, last)) {
329  elem_strs.push_back(s.substr(last, i - last));
330  last = i + 1;
331  }
332  if (last + 1 <= s.size()) {
333  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
334  }
335  if (elem_strs.size() == 1) {
336  auto str = elem_strs.front();
337  auto str_trimmed = trim_space(str.c_str(), str.length());
338  if (str_trimmed == "") {
339  elem_strs.clear(); // Empty array
340  }
341  }
342  if (!elem_ti.is_string()) {
343  size_t len = elem_strs.size() * elem_ti.get_size();
344  int8_t* buf = (int8_t*)checked_malloc(len);
345  int8_t* p = buf;
346  for (auto& es : elem_strs) {
347  auto e = trim_space(es.c_str(), es.length());
348  bool is_null = (e == copy_params.null_str) || e == "NULL";
349  if (!elem_ti.is_string() && e == "") {
350  is_null = true;
351  }
352  if (elem_ti.is_number() || elem_ti.is_time()) {
353  if (!isdigit(e[0]) && e[0] != '-') {
354  is_null = true;
355  }
356  }
357  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
358  p = appendDatum(p, d, elem_ti);
359  }
360  return ArrayDatum(len, buf, false);
361  }
362  // must not be called for array of strings
363  CHECK(false);
364  return ArrayDatum(0, NULL, true);
365 }
bool is_time() const
Definition: sqltypes.h:423
bool is_string() const
Definition: sqltypes.h:417
#define LOG(tag)
Definition: Logger.h:188
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:232
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:871
bool is_null(const T &v, const SQLTypeInfo &t)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
static const std::string trim_space(const char *field, const size_t len)
Definition: Importer.cpp:220
bool is_number() const
Definition: sqltypes.h:422
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToArrayDatum()

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 451 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

451  {
452  SQLTypeInfo elem_ti = ti.get_elem_type();
453 
454  CHECK(!elem_ti.is_string());
455 
456  if (datum.is_null) {
457  return NullArray(ti);
458  }
459 
460  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
461  int8_t* buf = (int8_t*)checked_malloc(len);
462  int8_t* p = buf;
463  for (auto& e : datum.val.arr_val) {
464  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
465  }
466 
467  return ArrayDatum(len, buf, false);
468 }
bool is_string() const
Definition: sqltypes.h:417
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:367
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:406
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:871
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToDatum()

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 406 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

406  {
407  Datum d;
408  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
409  switch (type) {
410  case kBOOLEAN:
411  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
412  break;
413  case kBIGINT:
414  d.bigintval =
415  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
416  break;
417  case kINT:
418  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
419  break;
420  case kSMALLINT:
421  d.smallintval =
422  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
423  break;
424  case kTINYINT:
425  d.tinyintval =
426  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
427  break;
428  case kFLOAT:
429  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
430  break;
431  case kDOUBLE:
432  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
433  break;
434  case kTIME:
435  case kTIMESTAMP:
436  case kDATE:
437  d.bigintval =
438  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
439  break;
440  case kPOINT:
441  case kLINESTRING:
442  case kPOLYGON:
443  case kMULTIPOLYGON:
444  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
445  default:
446  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
447  }
448  return d;
449 }
int8_t tinyintval
Definition: sqltypes.h:135
#define NULL_DOUBLE
Definition: sqltypes.h:186
Definition: sqltypes.h:51
bool boolval
Definition: sqltypes.h:134
bool is_decimal() const
Definition: sqltypes.h:420
int32_t intval
Definition: sqltypes.h:137
float floatval
Definition: sqltypes.h:139
int64_t bigintval
Definition: sqltypes.h:138
#define NULL_FLOAT
Definition: sqltypes.h:185
int16_t smallintval
Definition: sqltypes.h:136
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:140
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ trim_space()

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 220 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

220  {
221  size_t i = 0;
222  size_t j = len;
223  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
224  i++;
225  }
226  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
227  j--;
228  }
229  return std::string(field + i, j - i);
230 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
+ Here is the caller graph for this function:

◆ try_cast()

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 2969 of file Importer.cpp.

2969  {
2970  try {
2971  boost::lexical_cast<T>(str);
2972  } catch (const boost::bad_lexical_cast& e) {
2973  return false;
2974  }
2975  return true;
2976 }

◆ try_strptimes()

char* import_export::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 2978 of file Importer.cpp.

Referenced by import_export::Detector::detect_sqltype().

2978  {
2979  std::tm tm_struct;
2980  char* buf;
2981  for (auto format : formats) {
2982  buf = strptime(str, format.c_str(), &tm_struct);
2983  if (buf) {
2984  return buf;
2985  }
2986  }
2987  return nullptr;
2988 }
+ Here is the caller graph for this function:

Variable Documentation

◆ import_status_map

std::map<std::string, ImportStatus> import_export::import_status_map
static

Definition at line 147 of file Importer.cpp.

◆ kImportFileBufferSize

constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 32 of file CopyParams.h.

◆ PROMOTE_POLYGON_TO_MULTIPOLYGON

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static

Definition at line 144 of file Importer.cpp.

◆ status_mutex

mapd_shared_mutex import_export::status_mutex
static

Definition at line 146 of file Importer.cpp.