OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{GDAL.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 delimited_parser
 

Classes

struct  CopyParams
 
class  GDAL
 
struct  GeoImportException
 
struct  BadRowsTracker
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  ImporterUtils
 
class  RenderGroupAnalyzer
 
class  Importer
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 72 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 136 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 134 of file Importer.cpp.

using import_export::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 137 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 133 of file Importer.cpp.

Enumeration Type Documentation

Enumerator
DELIMITED 
POLYGON 

Definition at line 34 of file CopyParams.h.

34  {
35  DELIMITED,
36  POLYGON
37 #ifdef ENABLE_IMPORT_PARQUET
38  ,
39  PARQUET
40 #endif
41 };

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 400 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

400  {
401  const auto& arr = datum.val.arr_val;
402  for (const auto& elem_datum : arr) {
403  string_vec.push_back(elem_datum.val.str_val);
404  }
405 }

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4564 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

4565  {
4566  // prepare to gather subdirectories
4567  std::vector<std::string> subdirectories;
4568 
4569  // get entries
4570  char** entries = VSIReadDir(archive_path.c_str());
4571  if (!entries) {
4572  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4573  return;
4574  }
4575 
4576  // force scope
4577  {
4578  // request clean-up
4579  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4580 
4581  // check all the entries
4582  int index = 0;
4583  while (true) {
4584  // get next entry, or drop out if there isn't one
4585  char* entry_c = entries[index++];
4586  if (!entry_c) {
4587  break;
4588  }
4589  std::string entry(entry_c);
4590 
4591  // ignore '.' and '..'
4592  if (entry == "." || entry == "..") {
4593  continue;
4594  }
4595 
4596  // build the full path
4597  std::string entry_path = archive_path + std::string("/") + entry;
4598 
4599  // is it a file or a sub-folder
4600  VSIStatBufL sb;
4601  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4602  if (result < 0) {
4603  break;
4604  }
4605 
4606  if (VSI_ISDIR(sb.st_mode)) {
4607  // a directory that ends with .gdb could be a Geodatabase bundle
4608  // arguably dangerous to decide this purely by name, but any further
4609  // validation would be very complex especially at this scope
4610  if (boost::iends_with(entry_path, ".gdb")) {
4611  // add the directory as if it was a file and don't recurse into it
4612  files.push_back(entry_path);
4613  } else {
4614  // add subdirectory to be recursed into
4615  subdirectories.push_back(entry_path);
4616  }
4617  } else {
4618  // add this file
4619  files.push_back(entry_path);
4620  }
4621  }
4622  }
4623 
4624  // recurse into each subdirectories we found
4625  for (const auto& subdirectory : subdirectories) {
4626  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4627  }
4628 }
#define LOG(tag)
Definition: Logger.h:188
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4564

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1804 of file Importer.cpp.

References CHECK(), CHECK_LT, DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), import_export::Importer::getCatalog(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Geo_namespace::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, anonymous_namespace{TypedDataAccessors.h}::is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::ImportStatus::thread_id, logger::thread_id(), and to_string().

Referenced by import_export::Importer::importDelimited().

1812  {
1813  ImportStatus import_status;
1814  int64_t total_get_row_time_us = 0;
1815  int64_t total_str_to_val_time_us = 0;
1816  CHECK(scratch_buffer);
1817  auto buffer = scratch_buffer.get();
1818  auto load_ms = measure<>::execution([]() {});
1819  auto ms = measure<>::execution([&]() {
1820  const CopyParams& copy_params = importer->get_copy_params();
1821  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1822  size_t begin =
1823  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
1824  const char* thread_buf = buffer + begin_pos + begin;
1825  const char* thread_buf_end = buffer + end_pos;
1826  const char* buf_end = buffer + total_size;
1827  bool try_single_thread = false;
1828  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1829  importer->get_import_buffers(thread_id);
1831  int phys_cols = 0;
1832  int point_cols = 0;
1833  for (const auto cd : col_descs) {
1834  const auto& col_ti = cd->columnType;
1835  phys_cols += col_ti.get_physical_cols();
1836  if (cd->columnType.get_type() == kPOINT) {
1837  point_cols++;
1838  }
1839  }
1840  auto num_cols = col_descs.size() - phys_cols;
1841  for (const auto& p : import_buffers) {
1842  p->clear();
1843  }
1844  std::vector<std::string_view> row;
1845  size_t row_index_plus_one = 0;
1846  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1847  row.clear();
1848  std::vector<std::unique_ptr<char[]>>
1849  tmp_buffers; // holds string w/ removed escape chars, etc
1850  if (DEBUG_TIMING) {
1853  thread_buf_end,
1854  buf_end,
1855  copy_params,
1856  importer->get_is_array(),
1857  row,
1858  tmp_buffers,
1859  try_single_thread);
1860  });
1861  total_get_row_time_us += us;
1862  } else {
1864  thread_buf_end,
1865  buf_end,
1866  copy_params,
1867  importer->get_is_array(),
1868  row,
1869  tmp_buffers,
1870  try_single_thread);
1871  }
1872  row_index_plus_one++;
1873  // Each POINT could consume two separate coords instead of a single WKT
1874  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1875  import_status.rows_rejected++;
1876  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1877  << row.size() << "): " << shared::printContainer(row);
1878  if (import_status.rows_rejected > copy_params.max_reject) {
1879  break;
1880  }
1881  continue;
1882  }
1883 
1884  //
1885  // lambda for importing a row (perhaps multiple times if exploding a collection)
1886  //
1887 
1888  auto execute_import_row = [&](OGRGeometry* import_geometry) {
1889  size_t import_idx = 0;
1890  size_t col_idx = 0;
1891  try {
1892  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1893  auto cd = *cd_it;
1894  const auto& col_ti = cd->columnType;
1895 
1896  bool is_null =
1897  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1898  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1899  // So initially nullness may be missed and not passed to add_value,
1900  // which then might also check and still decide it's actually a NULL, e.g.
1901  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
1902  // So "NULL" is not recognized as NULL but then it's not recognized as
1903  // a valid kINT, so it's a NULL after all.
1904  // Checking for "NULL" here too, as a widely accepted notation for NULL.
1905 
1906  // Treating empty as NULL
1907  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1908  is_null = true;
1909  }
1910 
1911  if (col_ti.get_physical_cols() == 0) {
1912  // not geo
1913 
1914  import_buffers[col_idx]->add_value(
1915  cd, row[import_idx], is_null, copy_params);
1916 
1917  // next
1918  ++import_idx;
1919  ++col_idx;
1920  } else {
1921  // geo
1922 
1923  // store null string in the base column
1924  import_buffers[col_idx]->add_value(
1925  cd, copy_params.null_str, true, copy_params);
1926 
1927  // WKT from string we're not storing
1928  auto const& wkt = row[import_idx];
1929 
1930  // next
1931  ++import_idx;
1932  ++col_idx;
1933 
1934  SQLTypes col_type = col_ti.get_type();
1935  CHECK(IS_GEO(col_type));
1936 
1937  std::vector<double> coords;
1938  std::vector<double> bounds;
1939  std::vector<int> ring_sizes;
1940  std::vector<int> poly_rings;
1941  int render_group = 0;
1942 
1943  if (!is_null && col_type == kPOINT && wkt.size() > 0 &&
1944  (wkt[0] == '.' || isdigit(wkt[0]) || wkt[0] == '-')) {
1945  // Invalid WKT, looks more like a scalar.
1946  // Try custom POINT import: from two separate scalars rather than WKT
1947  // string
1948  double lon = std::atof(std::string(wkt).c_str());
1949  double lat = NAN;
1950  auto lat_str = row[import_idx];
1951  ++import_idx;
1952  if (lat_str.size() > 0 &&
1953  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1954  lat = std::atof(std::string(lat_str).c_str());
1955  }
1956  // Swap coordinates if this table uses a reverse order: lat/lon
1957  if (!copy_params.lonlat) {
1958  std::swap(lat, lon);
1959  }
1960  // TODO: should check if POINT column should have been declared with
1961  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1962  // throw std::runtime_error("POINT column " + cd->columnName + " is
1963  // not WGS84, cannot insert lon/lat");
1964  // }
1965  if (!importGeoFromLonLat(lon, lat, coords)) {
1966  throw std::runtime_error(
1967  "Cannot read lon/lat to insert into POINT column " +
1968  cd->columnName);
1969  }
1970  } else {
1971  // import it
1972  SQLTypeInfo import_ti{col_ti};
1973  if (is_null) {
1974  if (col_ti.get_notnull()) {
1975  throw std::runtime_error("NULL geo for column " + cd->columnName);
1976  }
1978  import_ti,
1979  coords,
1980  bounds,
1981  ring_sizes,
1982  poly_rings,
1984  } else {
1985  if (import_geometry) {
1986  // geometry already exploded
1988  import_geometry,
1989  import_ti,
1990  coords,
1991  bounds,
1992  ring_sizes,
1993  poly_rings,
1995  std::string msg =
1996  "Failed to extract valid geometry from exploded row " +
1997  std::to_string(first_row_index_this_buffer +
1998  row_index_plus_one) +
1999  " for column " + cd->columnName;
2000  throw std::runtime_error(msg);
2001  }
2002  } else {
2003  // extract geometry directly from WKT
2005  std::string(wkt),
2006  import_ti,
2007  coords,
2008  bounds,
2009  ring_sizes,
2010  poly_rings,
2012  std::string msg = "Failed to extract valid geometry from row " +
2013  std::to_string(first_row_index_this_buffer +
2014  row_index_plus_one) +
2015  " for column " + cd->columnName;
2016  throw std::runtime_error(msg);
2017  }
2018  }
2019 
2020  // validate types
2021  if (col_type != import_ti.get_type()) {
2023  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2024  col_type == SQLTypes::kMULTIPOLYGON)) {
2025  throw std::runtime_error(
2026  "Imported geometry doesn't match the type of column " +
2027  cd->columnName);
2028  }
2029  }
2030  }
2031 
2032  // assign render group?
2033  if (columnIdToRenderGroupAnalyzerMap.size()) {
2034  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2035  if (ring_sizes.size()) {
2036  // get a suitable render group for these poly coords
2037  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2038  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2039  render_group =
2040  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2041  } else {
2042  // empty poly
2043  render_group = -1;
2044  }
2045  }
2046  }
2047  }
2048 
2049  // import extracted geo
2050  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2051  cd,
2052  import_buffers,
2053  col_idx,
2054  coords,
2055  bounds,
2056  ring_sizes,
2057  poly_rings,
2058  render_group);
2059 
2060  // skip remaining physical columns
2061  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2062  ++cd_it;
2063  }
2064  }
2065  }
2066  import_status.rows_completed++;
2067  } catch (const std::exception& e) {
2068  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2069  import_buffers[col_idx_to_pop]->pop_value();
2070  }
2071  import_status.rows_rejected++;
2072  LOG(ERROR) << "Input exception thrown: " << e.what()
2073  << ". Row discarded. Data: " << shared::printContainer(row);
2074  }
2075  };
2076 
2077  if (copy_params.geo_explode_collections) {
2078  // explode and import
2079  // @TODO(se) convert to structure-bindings when we can use C++17 here
2080  auto collection_idx_type_name = explode_collections_step1(col_descs);
2081  int collection_col_idx = std::get<0>(collection_idx_type_name);
2082  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2083  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2084  // pull out the collection WKT
2085  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2086  auto const& collection_wkt = row[collection_col_idx];
2087  // convert to OGR
2088  OGRGeometry* ogr_geometry = nullptr;
2089  ScopeGuard destroy_ogr_geometry = [&] {
2090  if (ogr_geometry) {
2091  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2092  }
2093  };
2094  OGRErr ogr_status = OGRGeometryFactory::createFromWkt(
2095  collection_wkt.data(), nullptr, &ogr_geometry);
2096  if (ogr_status != OGRERR_NONE) {
2097  throw std::runtime_error("Failed to convert WKT to geometry");
2098  }
2099  // do the explode and import
2100  us = explode_collections_step2(ogr_geometry,
2101  collection_child_type,
2102  collection_col_name,
2103  first_row_index_this_buffer + row_index_plus_one,
2104  execute_import_row);
2105  } else {
2106  // import non-collection row just once
2108  [&] { execute_import_row(nullptr); });
2109  }
2110  total_str_to_val_time_us += us;
2111  } // end thread
2112  if (import_status.rows_completed > 0) {
2113  load_ms = measure<>::execution(
2114  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2115  }
2116  });
2117  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2118  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2119  << import_status.rows_completed << " rows inserted in "
2120  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2121  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2122  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2123  << "sec" << std::endl;
2124  }
2125 
2126  import_status.thread_id = thread_id;
2127  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2128 
2129  return import_status;
2130 }
SQLTypes
Definition: sqltypes.h:39
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1714
#define LOG(tag)
Definition: Logger.h:188
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
std::string to_string(char const *&&v)
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords)
Definition: Importer.cpp:1409
#define DEBUG_TIMING
Definition: Importer.cpp:139
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
bool is_null(const T &v, const SQLTypeInfo &t)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:839
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:637
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1680
ThreadId thread_id()
Definition: Logger.cpp:715
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:63
#define IS_GEO(T)
Definition: sqltypes.h:173
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2132 of file Importer.cpp.

References CHECK(), geospatial::compress_coords(), DEBUG_TIMING, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Geo_namespace::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), timer_stop(), and to_string().

Referenced by import_export::Importer::importGDAL().

2141  {
2142  ImportStatus import_status;
2143  const CopyParams& copy_params = importer->get_copy_params();
2144  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2145  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2146  importer->get_import_buffers(thread_id);
2147 
2148  for (const auto& p : import_buffers) {
2149  p->clear();
2150  }
2151 
2152  auto convert_timer = timer_start();
2153 
2154  // we create this on the fly based on the first feature's SR
2155  std::unique_ptr<OGRCoordinateTransformation> coordinate_transformation;
2156 
2157  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2158  if (!features[iFeature]) {
2159  continue;
2160  }
2161 
2162  // get this feature's geometry
2163  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2164  if (pGeometry) {
2165  // for geodatabase, we need to consider features with no geometry
2166  // as we still want to create a table, even if it has no geo column
2167 
2168  // transform it
2169  // avoid GDAL error if not transformable
2170  auto geometry_sr = pGeometry->getSpatialReference();
2171  if (geometry_sr) {
2172  // create an OGRCoordinateTransformation (CT) on the fly
2173  // we must assume that all geo in this file will have
2174  // the same source SR, so the CT will be valid for all
2175  // transforming to a reusable CT is faster than to an SR
2176  if (coordinate_transformation == nullptr) {
2177  coordinate_transformation.reset(
2178  OGRCreateCoordinateTransformation(geometry_sr, poGeographicSR));
2179  if (coordinate_transformation == nullptr) {
2180  throw std::runtime_error(
2181  "Failed to create a GDAL CoordinateTransformation for incoming geo");
2182  }
2183  }
2184  pGeometry->transform(coordinate_transformation.get());
2185  }
2186  }
2187 
2188  //
2189  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2190  //
2191 
2192  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2193  size_t col_idx = 0;
2194  try {
2195  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2196  auto cd = *cd_it;
2197 
2198  // is this a geo column?
2199  const auto& col_ti = cd->columnType;
2200  if (col_ti.is_geometry()) {
2201  // Note that this assumes there is one and only one geo column in the table.
2202  // Currently, the importer only supports reading a single geospatial feature
2203  // from an input shapefile / geojson file, but this code will need to be
2204  // modified if that changes
2205  SQLTypes col_type = col_ti.get_type();
2206 
2207  // store null string in the base column
2208  import_buffers[col_idx]->add_value(
2209  cd, copy_params.null_str, true, copy_params);
2210  ++col_idx;
2211 
2212  // the data we now need to extract for the other columns
2213  std::vector<double> coords;
2214  std::vector<double> bounds;
2215  std::vector<int> ring_sizes;
2216  std::vector<int> poly_rings;
2217  int render_group = 0;
2218 
2219  // extract it
2220  SQLTypeInfo import_ti{col_ti};
2221  bool is_null_geo = !import_geometry;
2222  if (is_null_geo) {
2223  if (col_ti.get_notnull()) {
2224  throw std::runtime_error("NULL geo for column " + cd->columnName);
2225  }
2227  import_ti,
2228  coords,
2229  bounds,
2230  ring_sizes,
2231  poly_rings,
2233  } else {
2235  import_geometry,
2236  import_ti,
2237  coords,
2238  bounds,
2239  ring_sizes,
2240  poly_rings,
2242  std::string msg = "Failed to extract valid geometry from feature " +
2243  std::to_string(firstFeature + iFeature + 1) +
2244  " for column " + cd->columnName;
2245  throw std::runtime_error(msg);
2246  }
2247 
2248  // validate types
2249  if (col_type != import_ti.get_type()) {
2251  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2252  col_type == SQLTypes::kMULTIPOLYGON)) {
2253  throw std::runtime_error(
2254  "Imported geometry doesn't match the type of column " +
2255  cd->columnName);
2256  }
2257  }
2258  }
2259 
2260  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2261  if (ring_sizes.size()) {
2262  // get a suitable render group for these poly coords
2263  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2264  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2265  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2266  } else {
2267  // empty poly
2268  render_group = -1;
2269  }
2270  }
2271 
2272  // create coords array value and add it to the physical column
2273  ++cd_it;
2274  auto cd_coords = *cd_it;
2275  std::vector<TDatum> td_coord_data;
2276  if (!is_null_geo) {
2277  std::vector<uint8_t> compressed_coords =
2278  geospatial::compress_coords(coords, col_ti);
2279  for (auto cc : compressed_coords) {
2280  TDatum td_byte;
2281  td_byte.val.int_val = cc;
2282  td_coord_data.push_back(td_byte);
2283  }
2284  }
2285  TDatum tdd_coords;
2286  tdd_coords.val.arr_val = td_coord_data;
2287  tdd_coords.is_null = is_null_geo;
2288  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2289  ++col_idx;
2290 
2291  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2292  // Create ring_sizes array value and add it to the physical column
2293  ++cd_it;
2294  auto cd_ring_sizes = *cd_it;
2295  std::vector<TDatum> td_ring_sizes;
2296  if (!is_null_geo) {
2297  for (auto ring_size : ring_sizes) {
2298  TDatum td_ring_size;
2299  td_ring_size.val.int_val = ring_size;
2300  td_ring_sizes.push_back(td_ring_size);
2301  }
2302  }
2303  TDatum tdd_ring_sizes;
2304  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2305  tdd_ring_sizes.is_null = is_null_geo;
2306  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2307  ++col_idx;
2308  }
2309 
2310  if (col_type == kMULTIPOLYGON) {
2311  // Create poly_rings array value and add it to the physical column
2312  ++cd_it;
2313  auto cd_poly_rings = *cd_it;
2314  std::vector<TDatum> td_poly_rings;
2315  if (!is_null_geo) {
2316  for (auto num_rings : poly_rings) {
2317  TDatum td_num_rings;
2318  td_num_rings.val.int_val = num_rings;
2319  td_poly_rings.push_back(td_num_rings);
2320  }
2321  }
2322  TDatum tdd_poly_rings;
2323  tdd_poly_rings.val.arr_val = td_poly_rings;
2324  tdd_poly_rings.is_null = is_null_geo;
2325  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2326  ++col_idx;
2327  }
2328 
2329  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2330  col_type == kMULTIPOLYGON) {
2331  // Create bounds array value and add it to the physical column
2332  ++cd_it;
2333  auto cd_bounds = *cd_it;
2334  std::vector<TDatum> td_bounds_data;
2335  if (!is_null_geo) {
2336  for (auto b : bounds) {
2337  TDatum td_double;
2338  td_double.val.real_val = b;
2339  td_bounds_data.push_back(td_double);
2340  }
2341  }
2342  TDatum tdd_bounds;
2343  tdd_bounds.val.arr_val = td_bounds_data;
2344  tdd_bounds.is_null = is_null_geo;
2345  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2346  ++col_idx;
2347  }
2348 
2349  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2350  // Create render_group value and add it to the physical column
2351  ++cd_it;
2352  auto cd_render_group = *cd_it;
2353  TDatum td_render_group;
2354  td_render_group.val.int_val = render_group;
2355  td_render_group.is_null = is_null_geo;
2356  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2357  ++col_idx;
2358  }
2359  } else {
2360  // regular column
2361  // pull from GDAL metadata
2362  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2363  CHECK(cit != columnNameToSourceNameMap.end());
2364  auto const& field_name = cit->second;
2365 
2366  auto const fit = fieldNameToIndexMap.find(field_name);
2367  CHECK(fit != fieldNameToIndexMap.end());
2368  auto const& field_index = fit->second;
2369  CHECK(field_index < fieldNameToIndexMap.size());
2370 
2371  auto const& feature = features[iFeature];
2372 
2373  auto field_defn = feature->GetFieldDefnRef(field_index);
2374  CHECK(field_defn);
2375 
2376  // OGRFeature::GetFieldAsString() can only return 80 characters
2377  // so for array columns, we are obliged to fetch the actual values
2378  // and construct the concatenated string ourselves
2379 
2380  std::string value_string;
2381  int array_index = 0, array_size = 0;
2382 
2383  auto stringify_numeric_list = [&](auto* values) {
2384  value_string = "{";
2385  while (array_index < array_size) {
2386  auto separator = (array_index > 0) ? "," : "";
2387  value_string += separator + std::to_string(values[array_index]);
2388  array_index++;
2389  }
2390  value_string += "}";
2391  };
2392 
2393  auto field_type = field_defn->GetType();
2394  switch (field_type) {
2395  case OFTInteger:
2396  case OFTInteger64:
2397  case OFTReal:
2398  case OFTString:
2399  case OFTBinary:
2400  case OFTDate:
2401  case OFTTime:
2402  case OFTDateTime: {
2403  value_string = feature->GetFieldAsString(field_index);
2404  } break;
2405  case OFTIntegerList: {
2406  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2407  stringify_numeric_list(values);
2408  } break;
2409  case OFTInteger64List: {
2410  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2411  stringify_numeric_list(values);
2412  } break;
2413  case OFTRealList: {
2414  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2415  stringify_numeric_list(values);
2416  } break;
2417  case OFTStringList: {
2418  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2419  value_string = "{";
2420  if (array_of_strings) {
2421  while (auto* this_string = array_of_strings[array_index]) {
2422  auto separator = (array_index > 0) ? "," : "";
2423  value_string += separator + std::string(this_string);
2424  array_index++;
2425  }
2426  }
2427  value_string += "}";
2428  } break;
2429  default:
2430  throw std::runtime_error("Unsupported geo file field type (" +
2431  std::to_string(static_cast<int>(field_type)) +
2432  ")");
2433  }
2434 
2435  static CopyParams default_copy_params;
2436  import_buffers[col_idx]->add_value(
2437  cd, value_string, false, default_copy_params);
2438  ++col_idx;
2439  }
2440  }
2441  import_status.rows_completed++;
2442  } catch (const std::exception& e) {
2443  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2444  import_buffers[col_idx_to_pop]->pop_value();
2445  }
2446  import_status.rows_rejected++;
2447  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2448  }
2449  };
2450 
2451  if (pGeometry && copy_params.geo_explode_collections) {
2452  // explode and import
2453  // @TODO(se) convert to structure-bindings when we can use C++17 here
2454  auto collection_idx_type_name = explode_collections_step1(col_descs);
2455  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2456  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2457  explode_collections_step2(pGeometry,
2458  collection_child_type,
2459  collection_col_name,
2460  firstFeature + iFeature + 1,
2461  execute_import_feature);
2462  } else {
2463  // import non-collection or null feature just once
2464  execute_import_feature(pGeometry);
2465  }
2466  } // end features
2467 
2468  float convert_ms =
2469  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2470  convert_timer)) /
2471  1000.0f;
2472 
2473  float load_ms = 0.0f;
2474  if (import_status.rows_completed > 0) {
2475  auto load_timer = timer_start();
2476  importer->load(import_buffers, import_status.rows_completed);
2477  load_ms =
2478  float(
2479  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2480  load_timer)) /
2481  1000.0f;
2482  }
2483 
2484  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2485  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2486  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2487  }
2488 
2489  import_status.thread_id = thread_id;
2490 
2491  if (DEBUG_TIMING) {
2492  LOG(INFO) << "DEBUG: Total "
2493  << float(timer_stop<std::chrono::steady_clock::time_point,
2494  std::chrono::microseconds>(convert_timer)) /
2495  1000.0f
2496  << "ms";
2497  }
2498 
2499  return import_status;
2500 }
SQLTypes
Definition: sqltypes.h:39
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1714
#define LOG(tag)
Definition: Logger.h:188
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:139
CHECK(cgen_state)
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:839
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:637
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1680
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
ThreadId thread_id()
Definition: Logger.cpp:715
Type timer_start()
Definition: measure.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords 
)

Definition at line 1409 of file Importer.cpp.

Referenced by import_thread_delimited().

1409  {
1410  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1411  return false;
1412  }
1413  // we don't need to do any coordinate-system transformation
1414  // here (yet) so we don't need to use any OGR API or types
1415  // just use the values directly (assumed to be in 4326)
1416  coords.push_back(lon);
1417  coords.push_back(lat);
1418  return true;
1419 }

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 368 of file Importer.cpp.

References appendDatum(), CHECK(), checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

368  {
369  SQLTypeInfo elem_ti = ti.get_elem_type();
370  auto len = ti.get_size();
371 
372  if (elem_ti.is_string()) {
373  // must not be called for array of strings
374  CHECK(false);
375  return ArrayDatum(0, NULL, true);
376  }
377 
378  if (len > 0) {
379  // Compose a NULL fixlen array
380  int8_t* buf = (int8_t*)checked_malloc(len);
381  // First scalar is a NULL_ARRAY sentinel
382  Datum d = NullArrayDatum(elem_ti);
383  int8_t* p = appendDatum(buf, d, elem_ti);
384  // Rest is filled with normal NULL sentinels
385  Datum d0 = NullDatum(elem_ti);
386  while ((p - buf) < len) {
387  p = appendDatum(p, d0, elem_ti);
388  }
389  CHECK((p - buf) == len);
390  return ArrayDatum(len, buf, true);
391  }
392  // NULL varlen array
393  return ArrayDatum(0, NULL, true);
394 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:130
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:233
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:869
bool is_string() const
Definition: sqltypes.h:415
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:274
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:622

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 274 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

274  {
275  Datum d;
276  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
277  switch (type) {
278  case kBOOLEAN:
280  break;
281  case kBIGINT:
283  break;
284  case kINT:
286  break;
287  case kSMALLINT:
289  break;
290  case kTINYINT:
292  break;
293  case kFLOAT:
295  break;
296  case kDOUBLE:
298  break;
299  case kTIME:
300  case kTIMESTAMP:
301  case kDATE:
303  break;
304  case kPOINT:
305  case kLINESTRING:
306  case kPOLYGON:
307  case kMULTIPOLYGON:
308  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
309  default:
310  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
311  }
312  return d;
313 }
int8_t tinyintval
Definition: sqltypes.h:134
Definition: sqltypes.h:50
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
bool boolval
Definition: sqltypes.h:133
int32_t intval
Definition: sqltypes.h:136
float floatval
Definition: sqltypes.h:138
int64_t bigintval
Definition: sqltypes.h:137
int16_t smallintval
Definition: sqltypes.h:135
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:311
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
bool is_decimal() const
Definition: sqltypes.h:418
double doubleval
Definition: sqltypes.h:139
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:192

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullDatum ( SQLTypeInfo ti)

Definition at line 233 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

233  {
234  Datum d;
235  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
236  switch (type) {
237  case kBOOLEAN:
239  break;
240  case kBIGINT:
242  break;
243  case kINT:
245  break;
246  case kSMALLINT:
248  break;
249  case kTINYINT:
251  break;
252  case kFLOAT:
253  d.floatval = NULL_FLOAT;
254  break;
255  case kDOUBLE:
257  break;
258  case kTIME:
259  case kTIMESTAMP:
260  case kDATE:
262  break;
263  case kPOINT:
264  case kLINESTRING:
265  case kPOLYGON:
266  case kMULTIPOLYGON:
267  throw std::runtime_error("Internal error: geometry type in NullDatum.");
268  default:
269  throw std::runtime_error("Internal error: invalid type in NullDatum.");
270  }
271  return d;
272 }
int8_t tinyintval
Definition: sqltypes.h:134
#define NULL_DOUBLE
Definition: sqltypes.h:185
Definition: sqltypes.h:50
bool boolval
Definition: sqltypes.h:133
int32_t intval
Definition: sqltypes.h:136
float floatval
Definition: sqltypes.h:138
int64_t bigintval
Definition: sqltypes.h:137
#define NULL_FLOAT
Definition: sqltypes.h:184
int16_t smallintval
Definition: sqltypes.h:135
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:311
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
bool is_decimal() const
Definition: sqltypes.h:418
double doubleval
Definition: sqltypes.h:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<SQLTypes, bool> import_export::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4376 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptors().

4376  {
4377  switch (ogr_type) {
4378  case OFTInteger:
4379  return std::make_pair(kINT, false);
4380  case OFTIntegerList:
4381  return std::make_pair(kINT, true);
4382 #if GDAL_VERSION_MAJOR > 1
4383  case OFTInteger64:
4384  return std::make_pair(kBIGINT, false);
4385  case OFTInteger64List:
4386  return std::make_pair(kBIGINT, true);
4387 #endif
4388  case OFTReal:
4389  return std::make_pair(kDOUBLE, false);
4390  case OFTRealList:
4391  return std::make_pair(kDOUBLE, true);
4392  case OFTString:
4393  return std::make_pair(kTEXT, false);
4394  case OFTStringList:
4395  return std::make_pair(kTEXT, true);
4396  case OFTDate:
4397  return std::make_pair(kDATE, false);
4398  case OFTTime:
4399  return std::make_pair(kTIME, false);
4400  case OFTDateTime:
4401  return std::make_pair(kTIMESTAMP, false);
4402  case OFTBinary:
4403  // Interpret binary blobs as byte arrays here
4404  // but actual import will store NULL as GDAL will not
4405  // extract the blob (OGRFeature::GetFieldAsString will
4406  // result in the import buffers having an empty string)
4407  return std::make_pair(kTINYINT, true);
4408  default:
4409  break;
4410  }
4411  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4412 }
Definition: sqltypes.h:50
std::string to_string(char const *&&v)
Definition: sqltypes.h:53
Definition: sqltypes.h:54
Definition: sqltypes.h:46

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes import_export::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4414 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4414  {
4415  switch (ogr_type) {
4416  case wkbPoint:
4417  return kPOINT;
4418  case wkbLineString:
4419  return kLINESTRING;
4420  case wkbPolygon:
4421  return kPOLYGON;
4422  case wkbMultiPolygon:
4423  return kMULTIPOLYGON;
4424  default:
4425  break;
4426  }
4427  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4428 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 5138 of file Importer.cpp.

References CHECK(), import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by Parser::AddColumnStmt::execute(), and DBHandler::prepare_columnar_loader().

5140  {
5141  CHECK(td);
5142  auto col_descs = loader->get_column_descs();
5143 
5144  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
5145  for (auto cd : col_descs) {
5146  import_buffers.emplace_back(
5147  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
5148  }
5149 
5150  return import_buffers;
5151 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 315 of file Importer.cpp.

References appendDatum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK(), checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value().

317  {
318  SQLTypeInfo elem_ti = ti.get_elem_type();
319  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
320  return ArrayDatum(0, NULL, true);
321  }
322  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
323  LOG(WARNING) << "Malformed array: " << s;
324  return ArrayDatum(0, NULL, true);
325  }
326  std::vector<std::string> elem_strs;
327  size_t last = 1;
328  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
329  i = s.find(copy_params.array_delim, last)) {
330  elem_strs.push_back(s.substr(last, i - last));
331  last = i + 1;
332  }
333  if (last + 1 <= s.size()) {
334  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
335  }
336  if (elem_strs.size() == 1) {
337  auto str = elem_strs.front();
338  auto str_trimmed = trim_space(str.c_str(), str.length());
339  if (str_trimmed == "") {
340  elem_strs.clear(); // Empty array
341  }
342  }
343  if (!elem_ti.is_string()) {
344  size_t len = elem_strs.size() * elem_ti.get_size();
345  int8_t* buf = (int8_t*)checked_malloc(len);
346  int8_t* p = buf;
347  for (auto& es : elem_strs) {
348  auto e = trim_space(es.c_str(), es.length());
349  bool is_null = (e == copy_params.null_str) || e == "NULL";
350  if (!elem_ti.is_string() && e == "") {
351  is_null = true;
352  }
353  if (elem_ti.is_number() || elem_ti.is_time()) {
354  if (!isdigit(e[0]) && e[0] != '-') {
355  is_null = true;
356  }
357  }
358  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
359  p = appendDatum(p, d, elem_ti);
360  }
361  return ArrayDatum(len, buf, false);
362  }
363  // must not be called for array of strings
364  CHECK(false);
365  return ArrayDatum(0, NULL, true);
366 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
#define LOG(tag)
Definition: Logger.h:188
bool is_number() const
Definition: sqltypes.h:420
bool is_time() const
Definition: sqltypes.h:421
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:130
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:233
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:869
void trim_space(const char *&field_begin, const char *&field_end)
bool is_null(const T &v, const SQLTypeInfo &t)
bool is_string() const
Definition: sqltypes.h:415
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:622

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 452 of file Importer.cpp.

References appendDatum(), CHECK(), checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

452  {
453  SQLTypeInfo elem_ti = ti.get_elem_type();
454 
455  CHECK(!elem_ti.is_string());
456 
457  if (datum.is_null) {
458  return NullArray(ti);
459  }
460 
461  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
462  int8_t* buf = (int8_t*)checked_malloc(len);
463  int8_t* p = buf;
464  for (auto& e : datum.val.arr_val) {
465  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
466  }
467 
468  return ArrayDatum(len, buf, false);
469 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:368
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:407
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:130
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:869
bool is_string() const
Definition: sqltypes.h:415
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:622

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 407 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

407  {
408  Datum d;
409  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
410  switch (type) {
411  case kBOOLEAN:
412  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
413  break;
414  case kBIGINT:
415  d.bigintval =
416  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
417  break;
418  case kINT:
419  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
420  break;
421  case kSMALLINT:
422  d.smallintval =
423  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
424  break;
425  case kTINYINT:
426  d.tinyintval =
427  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
428  break;
429  case kFLOAT:
430  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
431  break;
432  case kDOUBLE:
433  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
434  break;
435  case kTIME:
436  case kTIMESTAMP:
437  case kDATE:
438  d.bigintval =
439  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
440  break;
441  case kPOINT:
442  case kLINESTRING:
443  case kPOLYGON:
444  case kMULTIPOLYGON:
445  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
446  default:
447  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
448  }
449  return d;
450 }
int8_t tinyintval
Definition: sqltypes.h:134
#define NULL_DOUBLE
Definition: sqltypes.h:185
Definition: sqltypes.h:50
bool boolval
Definition: sqltypes.h:133
int32_t intval
Definition: sqltypes.h:136
float floatval
Definition: sqltypes.h:138
int64_t bigintval
Definition: sqltypes.h:137
#define NULL_FLOAT
Definition: sqltypes.h:184
int16_t smallintval
Definition: sqltypes.h:135
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:311
Definition: sqltypes.h:54
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:46
bool is_decimal() const
Definition: sqltypes.h:418
double doubleval
Definition: sqltypes.h:139

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 221 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

221  {
222  size_t i = 0;
223  size_t j = len;
224  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
225  i++;
226  }
227  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
228  j--;
229  }
230  return std::string(field + i, j - i);
231 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 2983 of file Importer.cpp.

2983  {
2984  try {
2985  boost::lexical_cast<T>(str);
2986  } catch (const boost::bad_lexical_cast& e) {
2987  return false;
2988  }
2989  return true;
2990 }
char* import_export::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 2992 of file Importer.cpp.

Referenced by import_export::Detector::detect_sqltype().

2992  {
2993  std::tm tm_struct;
2994  char* buf;
2995  for (auto format : formats) {
2996  buf = strptime(str, format.c_str(), &tm_struct);
2997  if (buf) {
2998  return buf;
2999  }
3000  }
3001  return nullptr;
3002 }

+ Here is the caller graph for this function:

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 32 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static