OmniSciDB  ab4938a6a3
Importer_NS Namespace Reference

Namespaces

 anonymous_namespace{Importer.cpp}
 
 delimited_parser
 

Classes

struct  BadRowsTracker
 
struct  CopyParams
 
class  DataStreamSink
 
class  Detector
 
struct  GeoImportException
 
class  Importer
 
class  ImporterUtils
 
struct  ImportStatus
 
class  Loader
 
class  RenderGroupAnalyzer
 
class  TypedImportBuffer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer > >
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
void GDALErrorHandler (CPLErr eErrClass, int err_no, const char *msg)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders (const TableDescriptor *td, Loader *loader)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string, ImportStatusimport_status_map
 

Typedef Documentation

◆ ArraySliceRange

using Importer_NS::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 72 of file Importer.h.

◆ ColumnIdToRenderGroupAnalyzerMapType

using Importer_NS::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer> >

Definition at line 135 of file Importer.cpp.

◆ ColumnNameToSourceNameMapType

using Importer_NS::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 133 of file Importer.cpp.

◆ FeaturePtrVector

using Importer_NS::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 136 of file Importer.cpp.

◆ FieldNameToIndexMapType

using Importer_NS::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 132 of file Importer.cpp.

Enumeration Type Documentation

◆ FileType

enum Importer_NS::FileType
strong
Enumerator
DELIMITED 
POLYGON 

Definition at line 34 of file CopyParams.h.

34  {
35  DELIMITED,
36  POLYGON
37 #ifdef ENABLE_IMPORT_PARQUET
38  ,
39  PARQUET
40 #endif
41 };

◆ ImportHeaderRow

Function Documentation

◆ addBinaryStringArray()

void Importer_NS::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 399 of file Importer.cpp.

Referenced by Importer_NS::TypedImportBuffer::add_value().

399  {
400  const auto& arr = datum.val.arr_val;
401  for (const auto& elem_datum : arr) {
402  string_vec.push_back(elem_datum.val.str_val);
403  }
404 }
+ Here is the caller graph for this function:

◆ GDALErrorHandler()

void Importer_NS::GDALErrorHandler ( CPLErr  eErrClass,
int  err_no,
const char *  msg 
)

Definition at line 4097 of file Importer.cpp.

References CHECK, logger::INFO, Importer_NS::Importer::init_gdal_mutex, LOG, and to_string().

Referenced by Importer_NS::Importer::initGDAL().

4097  {
4098  CHECK(eErrClass >= CE_None && eErrClass <= CE_Fatal);
4099  static const char* errClassStrings[5] = {
4100  "Info",
4101  "Debug",
4102  "Warning",
4103  "Failure",
4104  "Fatal",
4105  };
4106  std::string log_msg = std::string("GDAL ") + errClassStrings[eErrClass] +
4107  std::string(": ") + msg + std::string(" (") +
4108  std::to_string(err_no) + std::string(")");
4109  if (eErrClass >= CE_Failure) {
4110  throw std::runtime_error(log_msg);
4111  } else {
4112  LOG(INFO) << log_msg;
4113  }
4114 }
#define LOG(tag)
Definition: Logger.h:188
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalGatherFilesInArchiveRecursive()

void Importer_NS::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4552 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by Importer_NS::Importer::gdalGetAllFilesInArchive().

4553  {
4554  // prepare to gather subdirectories
4555  std::vector<std::string> subdirectories;
4556 
4557  // get entries
4558  char** entries = VSIReadDir(archive_path.c_str());
4559  if (!entries) {
4560  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4561  return;
4562  }
4563 
4564  // force scope
4565  {
4566  // request clean-up
4567  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4568 
4569  // check all the entries
4570  int index = 0;
4571  while (true) {
4572  // get next entry, or drop out if there isn't one
4573  char* entry_c = entries[index++];
4574  if (!entry_c) {
4575  break;
4576  }
4577  std::string entry(entry_c);
4578 
4579  // ignore '.' and '..'
4580  if (entry == "." || entry == "..") {
4581  continue;
4582  }
4583 
4584  // build the full path
4585  std::string entry_path = archive_path + std::string("/") + entry;
4586 
4587  // is it a file or a sub-folder
4588  VSIStatBufL sb;
4589  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4590  if (result < 0) {
4591  break;
4592  }
4593 
4594  if (VSI_ISDIR(sb.st_mode)) {
4595  // a directory that ends with .gdb could be a Geodatabase bundle
4596  // arguably dangerous to decide this purely by name, but any further
4597  // validation would be very complex especially at this scope
4598  if (boost::iends_with(entry_path, ".gdb")) {
4599  // add the directory as if it was a file and don't recurse into it
4600  files.push_back(entry_path);
4601  } else {
4602  // add subdirectory to be recursed into
4603  subdirectories.push_back(entry_path);
4604  }
4605  } else {
4606  // add this file
4607  files.push_back(entry_path);
4608  }
4609  }
4610  }
4611 
4612  // recurse into each subdirectories we found
4613  for (const auto& subdirectory : subdirectories) {
4614  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4615  }
4616 }
#define LOG(tag)
Definition: Logger.h:188
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4552
+ Here is the caller graph for this function:

◆ import_thread_delimited()

static ImportStatus Importer_NS::import_thread_delimited ( int  thread_id,
Importer importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1803 of file Importer.cpp.

References Importer_NS::Importer::buffer, CHECK, CHECK_LT, Importer_NS::DataStreamSink::copy_params, DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step1(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step2(), Importer_NS::delimited_parser::find_beginning(), Importer_NS::CopyParams::geo_explode_collections, Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), Importer_NS::Importer::get_is_array(), Importer_NS::delimited_parser::get_row(), Importer_NS::Importer::getCatalog(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Geo_namespace::GeoTypesFactory::getNullGeoColumns(), Importer_NS::DataStreamSink::import_status, importGeoFromLonLat(), logger::INFO, IS_GEO, anonymous_namespace{TypedDataAccessors.h}::is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::lonlat, Importer_NS::CopyParams::max_reject, Importer_NS::CopyParams::null_str, shared::printContainer(), Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::Importer::set_geo_physical_import_buffer(), Importer_NS::ImportStatus::thread_id, logger::thread_id(), and to_string().

Referenced by Importer_NS::Importer::importDelimited().

1811  {
1812  ImportStatus import_status;
1813  int64_t total_get_row_time_us = 0;
1814  int64_t total_str_to_val_time_us = 0;
1815  CHECK(scratch_buffer);
1816  auto buffer = scratch_buffer.get();
1817  auto load_ms = measure<>::execution([]() {});
1818  auto ms = measure<>::execution([&]() {
1819  const CopyParams& copy_params = importer->get_copy_params();
1820  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1821  size_t begin =
1822  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
1823  const char* thread_buf = buffer + begin_pos + begin;
1824  const char* thread_buf_end = buffer + end_pos;
1825  const char* buf_end = buffer + total_size;
1826  bool try_single_thread = false;
1827  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1828  importer->get_import_buffers(thread_id);
1830  int phys_cols = 0;
1831  int point_cols = 0;
1832  for (const auto cd : col_descs) {
1833  const auto& col_ti = cd->columnType;
1834  phys_cols += col_ti.get_physical_cols();
1835  if (cd->columnType.get_type() == kPOINT) {
1836  point_cols++;
1837  }
1838  }
1839  auto num_cols = col_descs.size() - phys_cols;
1840  for (const auto& p : import_buffers) {
1841  p->clear();
1842  }
1843  std::vector<std::string_view> row;
1844  size_t row_index_plus_one = 0;
1845  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1846  row.clear();
1847  std::vector<std::unique_ptr<char[]>>
1848  tmp_buffers; // holds string w/ removed escape chars, etc
1849  if (DEBUG_TIMING) {
1852  thread_buf_end,
1853  buf_end,
1854  copy_params,
1855  importer->get_is_array(),
1856  row,
1857  tmp_buffers,
1858  try_single_thread);
1859  });
1860  total_get_row_time_us += us;
1861  } else {
1863  thread_buf_end,
1864  buf_end,
1865  copy_params,
1866  importer->get_is_array(),
1867  row,
1868  tmp_buffers,
1869  try_single_thread);
1870  }
1871  row_index_plus_one++;
1872  // Each POINT could consume two separate coords instead of a single WKT
1873  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1874  import_status.rows_rejected++;
1875  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1876  << row.size() << "): " << shared::printContainer(row);
1877  if (import_status.rows_rejected > copy_params.max_reject) {
1878  break;
1879  }
1880  continue;
1881  }
1882 
1883  //
1884  // lambda for importing a row (perhaps multiple times if exploding a collection)
1885  //
1886 
1887  auto execute_import_row = [&](OGRGeometry* import_geometry) {
1888  size_t import_idx = 0;
1889  size_t col_idx = 0;
1890  try {
1891  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1892  auto cd = *cd_it;
1893  const auto& col_ti = cd->columnType;
1894 
1895  bool is_null =
1896  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1897  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1898  // So initially nullness may be missed and not passed to add_value,
1899  // which then might also check and still decide it's actually a NULL, e.g.
1900  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
1901  // So "NULL" is not recognized as NULL but then it's not recognized as
1902  // a valid kINT, so it's a NULL after all.
1903  // Checking for "NULL" here too, as a widely accepted notation for NULL.
1904 
1905  // Treating empty as NULL
1906  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1907  is_null = true;
1908  }
1909 
1910  if (col_ti.get_physical_cols() == 0) {
1911  // not geo
1912 
1913  import_buffers[col_idx]->add_value(
1914  cd, row[import_idx], is_null, copy_params);
1915 
1916  // next
1917  ++import_idx;
1918  ++col_idx;
1919  } else {
1920  // geo
1921 
1922  // store null string in the base column
1923  import_buffers[col_idx]->add_value(
1924  cd, copy_params.null_str, true, copy_params);
1925 
1926  // WKT from string we're not storing
1927  auto const& wkt = row[import_idx];
1928 
1929  // next
1930  ++import_idx;
1931  ++col_idx;
1932 
1933  SQLTypes col_type = col_ti.get_type();
1934  CHECK(IS_GEO(col_type));
1935 
1936  std::vector<double> coords;
1937  std::vector<double> bounds;
1938  std::vector<int> ring_sizes;
1939  std::vector<int> poly_rings;
1940  int render_group = 0;
1941 
1942  if (!is_null && col_type == kPOINT && wkt.size() > 0 &&
1943  (wkt[0] == '.' || isdigit(wkt[0]) || wkt[0] == '-')) {
1944  // Invalid WKT, looks more like a scalar.
1945  // Try custom POINT import: from two separate scalars rather than WKT
1946  // string
1947  double lon = std::atof(std::string(wkt).c_str());
1948  double lat = NAN;
1949  auto lat_str = row[import_idx];
1950  ++import_idx;
1951  if (lat_str.size() > 0 &&
1952  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1953  lat = std::atof(std::string(lat_str).c_str());
1954  }
1955  // Swap coordinates if this table uses a reverse order: lat/lon
1956  if (!copy_params.lonlat) {
1957  std::swap(lat, lon);
1958  }
1959  // TODO: should check if POINT column should have been declared with
1960  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1961  // throw std::runtime_error("POINT column " + cd->columnName + " is
1962  // not WGS84, cannot insert lon/lat");
1963  // }
1964  if (!importGeoFromLonLat(lon, lat, coords)) {
1965  throw std::runtime_error(
1966  "Cannot read lon/lat to insert into POINT column " +
1967  cd->columnName);
1968  }
1969  } else {
1970  // import it
1971  SQLTypeInfo import_ti{col_ti};
1972  if (is_null) {
1973  if (col_ti.get_notnull()) {
1974  throw std::runtime_error("NULL geo for column " + cd->columnName);
1975  }
1977  import_ti,
1978  coords,
1979  bounds,
1980  ring_sizes,
1981  poly_rings,
1983  } else {
1984  if (import_geometry) {
1985  // geometry already exploded
1987  import_geometry,
1988  import_ti,
1989  coords,
1990  bounds,
1991  ring_sizes,
1992  poly_rings,
1994  std::string msg =
1995  "Failed to extract valid geometry from exploded row " +
1996  std::to_string(first_row_index_this_buffer +
1997  row_index_plus_one) +
1998  " for column " + cd->columnName;
1999  throw std::runtime_error(msg);
2000  }
2001  } else {
2002  // extract geometry directly from WKT
2004  std::string(wkt),
2005  import_ti,
2006  coords,
2007  bounds,
2008  ring_sizes,
2009  poly_rings,
2011  std::string msg = "Failed to extract valid geometry from row " +
2012  std::to_string(first_row_index_this_buffer +
2013  row_index_plus_one) +
2014  " for column " + cd->columnName;
2015  throw std::runtime_error(msg);
2016  }
2017  }
2018 
2019  // validate types
2020  if (col_type != import_ti.get_type()) {
2022  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2023  col_type == SQLTypes::kMULTIPOLYGON)) {
2024  throw std::runtime_error(
2025  "Imported geometry doesn't match the type of column " +
2026  cd->columnName);
2027  }
2028  }
2029  }
2030 
2031  // assign render group?
2032  if (columnIdToRenderGroupAnalyzerMap.size()) {
2033  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2034  if (ring_sizes.size()) {
2035  // get a suitable render group for these poly coords
2036  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2037  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2038  render_group =
2039  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2040  } else {
2041  // empty poly
2042  render_group = -1;
2043  }
2044  }
2045  }
2046  }
2047 
2048  // import extracted geo
2049  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2050  cd,
2051  import_buffers,
2052  col_idx,
2053  coords,
2054  bounds,
2055  ring_sizes,
2056  poly_rings,
2057  render_group);
2058 
2059  // skip remaining physical columns
2060  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2061  ++cd_it;
2062  }
2063  }
2064  }
2065  import_status.rows_completed++;
2066  } catch (const std::exception& e) {
2067  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2068  import_buffers[col_idx_to_pop]->pop_value();
2069  }
2070  import_status.rows_rejected++;
2071  LOG(ERROR) << "Input exception thrown: " << e.what()
2072  << ". Row discarded. Data: " << shared::printContainer(row);
2073  }
2074  };
2075 
2076  if (copy_params.geo_explode_collections) {
2077  // explode and import
2078  // @TODO(se) convert to structure-bindings when we can use C++17 here
2079  auto collection_idx_type_name = explode_collections_step1(col_descs);
2080  int collection_col_idx = std::get<0>(collection_idx_type_name);
2081  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2082  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2083  // pull out the collection WKT
2084  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2085  auto const& collection_wkt = row[collection_col_idx];
2086  // convert to OGR
2087  OGRGeometry* ogr_geometry = nullptr;
2088  ScopeGuard destroy_ogr_geometry = [&] {
2089  if (ogr_geometry) {
2090  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2091  }
2092  };
2093  OGRErr ogr_status = OGRGeometryFactory::createFromWkt(
2094  collection_wkt.data(), nullptr, &ogr_geometry);
2095  if (ogr_status != OGRERR_NONE) {
2096  throw std::runtime_error("Failed to convert WKT to geometry");
2097  }
2098  // do the explode and import
2099  us = explode_collections_step2(ogr_geometry,
2100  collection_child_type,
2101  collection_col_name,
2102  first_row_index_this_buffer + row_index_plus_one,
2103  execute_import_row);
2104  } else {
2105  // import non-collection row just once
2107  [&] { execute_import_row(nullptr); });
2108  }
2109  total_str_to_val_time_us += us;
2110  } // end thread
2111  if (import_status.rows_completed > 0) {
2112  load_ms = measure<>::execution(
2113  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2114  }
2115  });
2116  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2117  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2118  << import_status.rows_completed << " rows inserted in "
2119  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2120  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2121  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2122  << "sec" << std::endl;
2123  }
2124 
2125  import_status.thread_id = thread_id;
2126  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2127 
2128  return import_status;
2129 }
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const Importer_NS::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor *> &col_descs)
Definition: Importer.cpp:1679
SQLTypes
Definition: sqltypes.h:39
#define LOG(tag)
Definition: Logger.h:188
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:138
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1713
size_t find_beginning(const char *buffer, size_t begin, size_t end, const Importer_NS::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords)
Definition: Importer.cpp:1408
#define CHECK_LT(x, y)
Definition: Logger.h:207
bool is_null(const T &v, const SQLTypeInfo &t)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:626
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
ThreadId thread_id()
Definition: Logger.cpp:715
#define CHECK(condition)
Definition: Logger.h:197
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:60
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
#define IS_GEO(T)
Definition: sqltypes.h:173
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ import_thread_shapefile()

static ImportStatus Importer_NS::import_thread_shapefile ( int  thread_id,
Importer importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType fieldNameToIndexMap,
const ColumnNameToSourceNameMapType columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2131 of file Importer.cpp.

References CHECK, geospatial::compress_coords(), Importer_NS::DataStreamSink::copy_params, DEBUG_TIMING, logger::ERROR, Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step1(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step2(), Importer_NS::CopyParams::geo_explode_collections, Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Geo_namespace::GeoTypesFactory::getNullGeoColumns(), Importer_NS::DataStreamSink::import_status, logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::null_str, Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::ImportStatus::thread_id, logger::thread_id(), timer_start(), timer_stop(), and to_string().

Referenced by Importer_NS::Importer::importGDAL().

2140  {
2141  ImportStatus import_status;
2142  const CopyParams& copy_params = importer->get_copy_params();
2143  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2144  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2145  importer->get_import_buffers(thread_id);
2146 
2147  for (const auto& p : import_buffers) {
2148  p->clear();
2149  }
2150 
2151  auto convert_timer = timer_start();
2152 
2153  // we create this on the fly based on the first feature's SR
2154  std::unique_ptr<OGRCoordinateTransformation> coordinate_transformation;
2155 
2156  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2157  if (!features[iFeature]) {
2158  continue;
2159  }
2160 
2161  // get this feature's geometry
2162  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2163  if (pGeometry) {
2164  // for geodatabase, we need to consider features with no geometry
2165  // as we still want to create a table, even if it has no geo column
2166 
2167  // transform it
2168  // avoid GDAL error if not transformable
2169  auto geometry_sr = pGeometry->getSpatialReference();
2170  if (geometry_sr) {
2171  // create an OGRCoordinateTransformation (CT) on the fly
2172  // we must assume that all geo in this file will have
2173  // the same source SR, so the CT will be valid for all
2174  // transforming to a reusable CT is faster than to an SR
2175  if (coordinate_transformation == nullptr) {
2176  coordinate_transformation.reset(
2177  OGRCreateCoordinateTransformation(geometry_sr, poGeographicSR));
2178  if (coordinate_transformation == nullptr) {
2179  throw std::runtime_error(
2180  "Failed to create a GDAL CoordinateTransformation for incoming geo");
2181  }
2182  }
2183  pGeometry->transform(coordinate_transformation.get());
2184  }
2185  }
2186 
2187  //
2188  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2189  //
2190 
2191  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2192  size_t col_idx = 0;
2193  try {
2194  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2195  auto cd = *cd_it;
2196 
2197  // is this a geo column?
2198  const auto& col_ti = cd->columnType;
2199  if (col_ti.is_geometry()) {
2200  // Note that this assumes there is one and only one geo column in the table.
2201  // Currently, the importer only supports reading a single geospatial feature
2202  // from an input shapefile / geojson file, but this code will need to be
2203  // modified if that changes
2204  SQLTypes col_type = col_ti.get_type();
2205 
2206  // store null string in the base column
2207  import_buffers[col_idx]->add_value(
2208  cd, copy_params.null_str, true, copy_params);
2209  ++col_idx;
2210 
2211  // the data we now need to extract for the other columns
2212  std::vector<double> coords;
2213  std::vector<double> bounds;
2214  std::vector<int> ring_sizes;
2215  std::vector<int> poly_rings;
2216  int render_group = 0;
2217 
2218  // extract it
2219  SQLTypeInfo import_ti{col_ti};
2220  bool is_null_geo = !import_geometry;
2221  if (is_null_geo) {
2222  if (col_ti.get_notnull()) {
2223  throw std::runtime_error("NULL geo for column " + cd->columnName);
2224  }
2226  import_ti,
2227  coords,
2228  bounds,
2229  ring_sizes,
2230  poly_rings,
2232  } else {
2234  import_geometry,
2235  import_ti,
2236  coords,
2237  bounds,
2238  ring_sizes,
2239  poly_rings,
2241  std::string msg = "Failed to extract valid geometry from feature " +
2242  std::to_string(firstFeature + iFeature + 1) +
2243  " for column " + cd->columnName;
2244  throw std::runtime_error(msg);
2245  }
2246 
2247  // validate types
2248  if (col_type != import_ti.get_type()) {
2250  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2251  col_type == SQLTypes::kMULTIPOLYGON)) {
2252  throw std::runtime_error(
2253  "Imported geometry doesn't match the type of column " +
2254  cd->columnName);
2255  }
2256  }
2257  }
2258 
2259  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2260  if (ring_sizes.size()) {
2261  // get a suitable render group for these poly coords
2262  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2263  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2264  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2265  } else {
2266  // empty poly
2267  render_group = -1;
2268  }
2269  }
2270 
2271  // create coords array value and add it to the physical column
2272  ++cd_it;
2273  auto cd_coords = *cd_it;
2274  std::vector<TDatum> td_coord_data;
2275  if (!is_null_geo) {
2276  std::vector<uint8_t> compressed_coords =
2277  geospatial::compress_coords(coords, col_ti);
2278  for (auto cc : compressed_coords) {
2279  TDatum td_byte;
2280  td_byte.val.int_val = cc;
2281  td_coord_data.push_back(td_byte);
2282  }
2283  }
2284  TDatum tdd_coords;
2285  tdd_coords.val.arr_val = td_coord_data;
2286  tdd_coords.is_null = is_null_geo;
2287  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2288  ++col_idx;
2289 
2290  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2291  // Create ring_sizes array value and add it to the physical column
2292  ++cd_it;
2293  auto cd_ring_sizes = *cd_it;
2294  std::vector<TDatum> td_ring_sizes;
2295  if (!is_null_geo) {
2296  for (auto ring_size : ring_sizes) {
2297  TDatum td_ring_size;
2298  td_ring_size.val.int_val = ring_size;
2299  td_ring_sizes.push_back(td_ring_size);
2300  }
2301  }
2302  TDatum tdd_ring_sizes;
2303  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2304  tdd_ring_sizes.is_null = is_null_geo;
2305  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2306  ++col_idx;
2307  }
2308 
2309  if (col_type == kMULTIPOLYGON) {
2310  // Create poly_rings array value and add it to the physical column
2311  ++cd_it;
2312  auto cd_poly_rings = *cd_it;
2313  std::vector<TDatum> td_poly_rings;
2314  if (!is_null_geo) {
2315  for (auto num_rings : poly_rings) {
2316  TDatum td_num_rings;
2317  td_num_rings.val.int_val = num_rings;
2318  td_poly_rings.push_back(td_num_rings);
2319  }
2320  }
2321  TDatum tdd_poly_rings;
2322  tdd_poly_rings.val.arr_val = td_poly_rings;
2323  tdd_poly_rings.is_null = is_null_geo;
2324  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2325  ++col_idx;
2326  }
2327 
2328  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2329  col_type == kMULTIPOLYGON) {
2330  // Create bounds array value and add it to the physical column
2331  ++cd_it;
2332  auto cd_bounds = *cd_it;
2333  std::vector<TDatum> td_bounds_data;
2334  if (!is_null_geo) {
2335  for (auto b : bounds) {
2336  TDatum td_double;
2337  td_double.val.real_val = b;
2338  td_bounds_data.push_back(td_double);
2339  }
2340  }
2341  TDatum tdd_bounds;
2342  tdd_bounds.val.arr_val = td_bounds_data;
2343  tdd_bounds.is_null = is_null_geo;
2344  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2345  ++col_idx;
2346  }
2347 
2348  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2349  // Create render_group value and add it to the physical column
2350  ++cd_it;
2351  auto cd_render_group = *cd_it;
2352  TDatum td_render_group;
2353  td_render_group.val.int_val = render_group;
2354  td_render_group.is_null = is_null_geo;
2355  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2356  ++col_idx;
2357  }
2358  } else {
2359  // regular column
2360  // pull from GDAL metadata
2361  const auto cit = columnNameToSourceNameMap.find(cd->columnName);
2362  CHECK(cit != columnNameToSourceNameMap.end());
2363  const std::string& fieldName = cit->second;
2364  const auto fit = fieldNameToIndexMap.find(fieldName);
2365  CHECK(fit != fieldNameToIndexMap.end());
2366  size_t iField = fit->second;
2367  CHECK(iField < fieldNameToIndexMap.size());
2368  std::string fieldContents = features[iFeature]->GetFieldAsString(iField);
2369  import_buffers[col_idx]->add_value(cd, fieldContents, false, copy_params);
2370  ++col_idx;
2371  }
2372  }
2373  import_status.rows_completed++;
2374  } catch (const std::exception& e) {
2375  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2376  import_buffers[col_idx_to_pop]->pop_value();
2377  }
2378  import_status.rows_rejected++;
2379  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2380  }
2381  };
2382 
2383  if (pGeometry && copy_params.geo_explode_collections) {
2384  // explode and import
2385  // @TODO(se) convert to structure-bindings when we can use C++17 here
2386  auto collection_idx_type_name = explode_collections_step1(col_descs);
2387  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2388  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2389  explode_collections_step2(pGeometry,
2390  collection_child_type,
2391  collection_col_name,
2392  firstFeature + iFeature + 1,
2393  execute_import_feature);
2394  } else {
2395  // import non-collection or null feature just once
2396  execute_import_feature(pGeometry);
2397  }
2398  } // end features
2399 
2400  float convert_ms =
2401  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2402  convert_timer)) /
2403  1000.0f;
2404 
2405  float load_ms = 0.0f;
2406  if (import_status.rows_completed > 0) {
2407  auto load_timer = timer_start();
2408  importer->load(import_buffers, import_status.rows_completed);
2409  load_ms =
2410  float(
2411  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2412  load_timer)) /
2413  1000.0f;
2414  }
2415 
2416  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2417  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2418  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2419  }
2420 
2421  import_status.thread_id = thread_id;
2422 
2423  if (DEBUG_TIMING) {
2424  LOG(INFO) << "DEBUG: Total "
2425  << float(timer_stop<std::chrono::steady_clock::time_point,
2426  std::chrono::microseconds>(convert_timer)) /
2427  1000.0f
2428  << "ms";
2429  }
2430 
2431  return import_status;
2432 }
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor *> &col_descs)
Definition: Importer.cpp:1679
SQLTypes
Definition: sqltypes.h:39
#define LOG(tag)
Definition: Logger.h:188
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:138
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1713
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:626
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
ThreadId thread_id()
Definition: Logger.cpp:715
#define CHECK(condition)
Definition: Logger.h:197
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
Type timer_start()
Definition: measure.h:40
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ importGeoFromLonLat()

bool Importer_NS::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords 
)

Definition at line 1408 of file Importer.cpp.

Referenced by import_thread_delimited().

1408  {
1409  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1410  return false;
1411  }
1412  // we don't need to do any coordinate-system transformation
1413  // here (yet) so we don't need to use any OGR API or types
1414  // just use the values directly (assumed to be in 4326)
1415  coords.push_back(lon);
1416  coords.push_back(lat);
1417  return true;
1418 }
+ Here is the caller graph for this function:

◆ NullArray()

ArrayDatum Importer_NS::NullArray ( const SQLTypeInfo ti)

Definition at line 367 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArrayDatum(), and NullDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value(), Importer_NS::TypedImportBuffer::add_values(), Importer_NS::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

367  {
368  SQLTypeInfo elem_ti = ti.get_elem_type();
369  auto len = ti.get_size();
370 
371  if (elem_ti.is_string()) {
372  // must not be called for array of strings
373  CHECK(false);
374  return ArrayDatum(0, NULL, true);
375  }
376 
377  if (len > 0) {
378  // Compose a NULL fixlen array
379  int8_t* buf = (int8_t*)checked_malloc(len);
380  // First scalar is a NULL_ARRAY sentinel
381  Datum d = NullArrayDatum(elem_ti);
382  int8_t* p = appendDatum(buf, d, elem_ti);
383  // Rest is filled with normal NULL sentinels
384  Datum d0 = NullDatum(elem_ti);
385  while ((p - buf) < len) {
386  p = appendDatum(p, d0, elem_ti);
387  }
388  CHECK((p - buf) == len);
389  return ArrayDatum(len, buf, true);
390  }
391  // NULL varlen array
392  return ArrayDatum(0, NULL, true);
393 }
bool is_string() const
Definition: sqltypes.h:409
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:129
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:232
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:859
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:273
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:617
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullArrayDatum()

Datum Importer_NS::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 273 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

273  {
274  Datum d;
275  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
276  switch (type) {
277  case kBOOLEAN:
279  break;
280  case kBIGINT:
282  break;
283  case kINT:
285  break;
286  case kSMALLINT:
288  break;
289  case kTINYINT:
291  break;
292  case kFLOAT:
294  break;
295  case kDOUBLE:
297  break;
298  case kTIME:
299  case kTIMESTAMP:
300  case kDATE:
302  break;
303  case kPOINT:
304  case kLINESTRING:
305  case kPOLYGON:
306  case kMULTIPOLYGON:
307  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
308  default:
309  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
310  }
311  return d;
312 }
int8_t tinyintval
Definition: sqltypes.h:133
Definition: sqltypes.h:50
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
bool boolval
Definition: sqltypes.h:132
bool is_decimal() const
Definition: sqltypes.h:412
int32_t intval
Definition: sqltypes.h:135
float floatval
Definition: sqltypes.h:137
int64_t bigintval
Definition: sqltypes.h:136
int16_t smallintval
Definition: sqltypes.h:134
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
double doubleval
Definition: sqltypes.h:138
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:192
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullDatum()

Datum Importer_NS::NullDatum ( SQLTypeInfo ti)

Definition at line 232 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

232  {
233  Datum d;
234  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
235  switch (type) {
236  case kBOOLEAN:
238  break;
239  case kBIGINT:
241  break;
242  case kINT:
244  break;
245  case kSMALLINT:
247  break;
248  case kTINYINT:
250  break;
251  case kFLOAT:
252  d.floatval = NULL_FLOAT;
253  break;
254  case kDOUBLE:
256  break;
257  case kTIME:
258  case kTIMESTAMP:
259  case kDATE:
261  break;
262  case kPOINT:
263  case kLINESTRING:
264  case kPOLYGON:
265  case kMULTIPOLYGON:
266  throw std::runtime_error("Internal error: geometry type in NullDatum.");
267  default:
268  throw std::runtime_error("Internal error: invalid type in NullDatum.");
269  }
270  return d;
271 }
int8_t tinyintval
Definition: sqltypes.h:133
#define NULL_DOUBLE
Definition: sqltypes.h:185
Definition: sqltypes.h:50
bool boolval
Definition: sqltypes.h:132
bool is_decimal() const
Definition: sqltypes.h:412
int32_t intval
Definition: sqltypes.h:135
float floatval
Definition: sqltypes.h:137
int64_t bigintval
Definition: sqltypes.h:136
#define NULL_FLOAT
Definition: sqltypes.h:184
int16_t smallintval
Definition: sqltypes.h:134
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
double doubleval
Definition: sqltypes.h:138
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [1/2]

std::pair<SQLTypes, bool> Importer_NS::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4364 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and to_string().

Referenced by Importer_NS::Importer::gdalToColumnDescriptors().

4364  {
4365  switch (ogr_type) {
4366  case OFTInteger:
4367  return std::make_pair(kINT, false);
4368  case OFTIntegerList:
4369  return std::make_pair(kINT, true);
4370 #if GDAL_VERSION_MAJOR > 1
4371  case OFTInteger64:
4372  return std::make_pair(kBIGINT, false);
4373  case OFTInteger64List:
4374  return std::make_pair(kBIGINT, true);
4375 #endif
4376  case OFTReal:
4377  return std::make_pair(kDOUBLE, false);
4378  case OFTRealList:
4379  return std::make_pair(kDOUBLE, true);
4380  case OFTString:
4381  return std::make_pair(kTEXT, false);
4382  case OFTStringList:
4383  return std::make_pair(kTEXT, true);
4384  case OFTDate:
4385  return std::make_pair(kDATE, false);
4386  case OFTTime:
4387  return std::make_pair(kTIME, false);
4388  case OFTDateTime:
4389  return std::make_pair(kTIMESTAMP, false);
4390  case OFTBinary:
4391  // Interpret binary blobs as byte arrays here
4392  // but actual import will store NULL as GDAL will not
4393  // extract the blob (OGRFeature::GetFieldAsString will
4394  // result in the import buffers having an empty string)
4395  return std::make_pair(kTINYINT, true);
4396  default:
4397  break;
4398  }
4399  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4400 }
Definition: sqltypes.h:50
std::string to_string(char const *&&v)
Definition: sqltypes.h:53
Definition: sqltypes.h:54
Definition: sqltypes.h:46
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [2/2]

SQLTypes Importer_NS::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4402 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4402  {
4403  switch (ogr_type) {
4404  case wkbPoint:
4405  return kPOINT;
4406  case wkbLineString:
4407  return kLINESTRING;
4408  case wkbPolygon:
4409  return kPOLYGON;
4410  case wkbMultiPolygon:
4411  return kMULTIPOLYGON;
4412  default:
4413  break;
4414  }
4415  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4416 }
std::string to_string(char const *&&v)
+ Here is the call graph for this function:

◆ setup_column_loaders()

std::vector< std::unique_ptr< TypedImportBuffer > > Importer_NS::setup_column_loaders ( const TableDescriptor td,
Loader loader 
)

Definition at line 5128 of file Importer.cpp.

References CHECK, Importer_NS::Loader::get_column_descs(), and Importer_NS::Loader::getStringDict().

Referenced by Parser::AddColumnStmt::execute(), and DBHandler::prepare_columnar_loader().

5130  {
5131  CHECK(td);
5132  auto col_descs = loader->get_column_descs();
5133 
5134  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
5135  for (auto cd : col_descs) {
5136  import_buffers.emplace_back(
5137  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
5138  }
5139 
5140  return import_buffers;
5141 }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ StringToArray()

ArrayDatum Importer_NS::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams copy_params 
)

Definition at line 314 of file Importer.cpp.

References appendDatum(), Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_delim, Importer_NS::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, Importer_NS::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by Importer_NS::TypedImportBuffer::add_value().

316  {
317  SQLTypeInfo elem_ti = ti.get_elem_type();
318  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
319  return ArrayDatum(0, NULL, true);
320  }
321  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
322  LOG(WARNING) << "Malformed array: " << s;
323  return ArrayDatum(0, NULL, true);
324  }
325  std::vector<std::string> elem_strs;
326  size_t last = 1;
327  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
328  i = s.find(copy_params.array_delim, last)) {
329  elem_strs.push_back(s.substr(last, i - last));
330  last = i + 1;
331  }
332  if (last + 1 <= s.size()) {
333  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
334  }
335  if (elem_strs.size() == 1) {
336  auto str = elem_strs.front();
337  auto str_trimmed = trim_space(str.c_str(), str.length());
338  if (str_trimmed == "") {
339  elem_strs.clear(); // Empty array
340  }
341  }
342  if (!elem_ti.is_string()) {
343  size_t len = elem_strs.size() * elem_ti.get_size();
344  int8_t* buf = (int8_t*)checked_malloc(len);
345  int8_t* p = buf;
346  for (auto& es : elem_strs) {
347  auto e = trim_space(es.c_str(), es.length());
348  bool is_null = (e == copy_params.null_str) || e == "NULL";
349  if (!elem_ti.is_string() && e == "") {
350  is_null = true;
351  }
352  if (elem_ti.is_number() || elem_ti.is_time()) {
353  if (!isdigit(e[0]) && e[0] != '-') {
354  is_null = true;
355  }
356  }
357  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
358  p = appendDatum(p, d, elem_ti);
359  }
360  return ArrayDatum(len, buf, false);
361  }
362  // must not be called for array of strings
363  CHECK(false);
364  return ArrayDatum(0, NULL, true);
365 }
bool is_time() const
Definition: sqltypes.h:415
bool is_string() const
Definition: sqltypes.h:409
#define LOG(tag)
Definition: Logger.h:188
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
static const std::string trim_space(const char *field, const size_t len)
Definition: Importer.cpp:220
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:129
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:232
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:122
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:859
bool is_null(const T &v, const SQLTypeInfo &t)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:617
#define CHECK(condition)
Definition: Logger.h:197
bool is_number() const
Definition: sqltypes.h:414
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToArrayDatum()

ArrayDatum Importer_NS::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 451 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value().

451  {
452  SQLTypeInfo elem_ti = ti.get_elem_type();
453 
454  CHECK(!elem_ti.is_string());
455 
456  if (datum.is_null) {
457  return NullArray(ti);
458  }
459 
460  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
461  int8_t* buf = (int8_t*)checked_malloc(len);
462  int8_t* p = buf;
463  for (auto& e : datum.val.arr_val) {
464  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
465  }
466 
467  return ArrayDatum(len, buf, false);
468 }
bool is_string() const
Definition: sqltypes.h:409
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:129
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:859
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:367
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:617
#define CHECK(condition)
Definition: Logger.h:197
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:406
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToDatum()

Datum Importer_NS::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 406 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

406  {
407  Datum d;
408  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
409  switch (type) {
410  case kBOOLEAN:
411  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
412  break;
413  case kBIGINT:
414  d.bigintval =
415  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
416  break;
417  case kINT:
418  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
419  break;
420  case kSMALLINT:
421  d.smallintval =
422  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
423  break;
424  case kTINYINT:
425  d.tinyintval =
426  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
427  break;
428  case kFLOAT:
429  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
430  break;
431  case kDOUBLE:
432  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
433  break;
434  case kTIME:
435  case kTIMESTAMP:
436  case kDATE:
437  d.bigintval =
438  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
439  break;
440  case kPOINT:
441  case kLINESTRING:
442  case kPOLYGON:
443  case kMULTIPOLYGON:
444  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
445  default:
446  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
447  }
448  return d;
449 }
int8_t tinyintval
Definition: sqltypes.h:133
#define NULL_DOUBLE
Definition: sqltypes.h:185
Definition: sqltypes.h:50
bool boolval
Definition: sqltypes.h:132
bool is_decimal() const
Definition: sqltypes.h:412
int32_t intval
Definition: sqltypes.h:135
float floatval
Definition: sqltypes.h:137
int64_t bigintval
Definition: sqltypes.h:136
#define NULL_FLOAT
Definition: sqltypes.h:184
int16_t smallintval
Definition: sqltypes.h:134
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
double doubleval
Definition: sqltypes.h:138
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ trim_space()

static const std::string Importer_NS::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 220 of file Importer.cpp.

Referenced by Importer_NS::delimited_parser::get_row(), and StringToArray().

220  {
221  size_t i = 0;
222  size_t j = len;
223  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
224  i++;
225  }
226  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
227  j--;
228  }
229  return std::string(field + i, j - i);
230 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
+ Here is the caller graph for this function:

◆ try_cast()

template<class T >
bool Importer_NS::try_cast ( const std::string &  str)

Definition at line 2912 of file Importer.cpp.

2912  {
2913  try {
2914  boost::lexical_cast<T>(str);
2915  } catch (const boost::bad_lexical_cast& e) {
2916  return false;
2917  }
2918  return true;
2919 }

◆ try_strptimes()

char* Importer_NS::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 2921 of file Importer.cpp.

Referenced by Importer_NS::Detector::detect_sqltype().

2921  {
2922  std::tm tm_struct;
2923  char* buf;
2924  for (auto format : formats) {
2925  buf = strptime(str, format.c_str(), &tm_struct);
2926  if (buf) {
2927  return buf;
2928  }
2929  }
2930  return nullptr;
2931 }
+ Here is the caller graph for this function:

Variable Documentation

◆ import_status_map

std::map<std::string, ImportStatus> Importer_NS::import_status_map
static

Definition at line 147 of file Importer.cpp.

◆ kImportFileBufferSize

constexpr size_t Importer_NS::kImportFileBufferSize = (1 << 23)
static

Definition at line 32 of file CopyParams.h.

◆ PROMOTE_POLYGON_TO_MULTIPOLYGON

constexpr bool Importer_NS::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static

Definition at line 144 of file Importer.cpp.

◆ status_mutex

mapd_shared_mutex Importer_NS::status_mutex
static

Definition at line 146 of file Importer.cpp.