OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 delimited_parser
 

Classes

struct  CopyParams
 
struct  GeoImportException
 
struct  BadRowsTracker
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  ImporterUtils
 
class  RenderGroupAnalyzer
 
class  Importer
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 72 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 139 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 137 of file Importer.cpp.

using import_export::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 140 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 136 of file Importer.cpp.

Enumeration Type Documentation

Enumerator
DELIMITED 
POLYGON 

Definition at line 34 of file CopyParams.h.

34  {
35  DELIMITED,
36  POLYGON
37 #ifdef ENABLE_IMPORT_PARQUET
38  ,
39  PARQUET
40 #endif
41 };

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 403 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

403  {
404  const auto& arr = datum.val.arr_val;
405  for (const auto& elem_datum : arr) {
406  string_vec.push_back(elem_datum.val.str_val);
407  }
408 }

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4609 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

4610  {
4611  // prepare to gather subdirectories
4612  std::vector<std::string> subdirectories;
4613 
4614  // get entries
4615  char** entries = VSIReadDir(archive_path.c_str());
4616  if (!entries) {
4617  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4618  return;
4619  }
4620 
4621  // force scope
4622  {
4623  // request clean-up
4624  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4625 
4626  // check all the entries
4627  int index = 0;
4628  while (true) {
4629  // get next entry, or drop out if there isn't one
4630  char* entry_c = entries[index++];
4631  if (!entry_c) {
4632  break;
4633  }
4634  std::string entry(entry_c);
4635 
4636  // ignore '.' and '..'
4637  if (entry == "." || entry == "..") {
4638  continue;
4639  }
4640 
4641  // build the full path
4642  std::string entry_path = archive_path + std::string("/") + entry;
4643 
4644  // is it a file or a sub-folder
4645  VSIStatBufL sb;
4646  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4647  if (result < 0) {
4648  break;
4649  }
4650 
4651  if (VSI_ISDIR(sb.st_mode)) {
4652  // a directory that ends with .gdb could be a Geodatabase bundle
4653  // arguably dangerous to decide this purely by name, but any further
4654  // validation would be very complex especially at this scope
4655  if (boost::iends_with(entry_path, ".gdb")) {
4656  // add the directory as if it was a file and don't recurse into it
4657  files.push_back(entry_path);
4658  } else {
4659  // add subdirectory to be recursed into
4660  subdirectories.push_back(entry_path);
4661  }
4662  } else {
4663  // add this file
4664  files.push_back(entry_path);
4665  }
4666  }
4667  }
4668 
4669  // recurse into each subdirectories we found
4670  for (const auto& subdirectory : subdirectories) {
4671  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4672  }
4673 }
#define LOG(tag)
Definition: Logger.h:188
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4609

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1808 of file Importer.cpp.

References CHECK, CHECK_LT, Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, DELIMITED, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::file_type, import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), and to_string().

Referenced by import_export::Importer::importDelimited().

1816  {
1817  ImportStatus import_status;
1818  int64_t total_get_row_time_us = 0;
1819  int64_t total_str_to_val_time_us = 0;
1820  CHECK(scratch_buffer);
1821  auto buffer = scratch_buffer.get();
1822  auto load_ms = measure<>::execution([]() {});
1823  auto ms = measure<>::execution([&]() {
1824  const CopyParams& copy_params = importer->get_copy_params();
1825  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1826  size_t begin =
1827  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
1828  const char* thread_buf = buffer + begin_pos + begin;
1829  const char* thread_buf_end = buffer + end_pos;
1830  const char* buf_end = buffer + total_size;
1831  bool try_single_thread = false;
1832  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1833  importer->get_import_buffers(thread_id);
1835  int phys_cols = 0;
1836  int point_cols = 0;
1837  for (const auto cd : col_descs) {
1838  const auto& col_ti = cd->columnType;
1839  phys_cols += col_ti.get_physical_cols();
1840  if (cd->columnType.get_type() == kPOINT) {
1841  point_cols++;
1842  }
1843  }
1844  auto num_cols = col_descs.size() - phys_cols;
1845  for (const auto& p : import_buffers) {
1846  p->clear();
1847  }
1848  std::vector<std::string_view> row;
1849  size_t row_index_plus_one = 0;
1850  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1851  row.clear();
1852  std::vector<std::unique_ptr<char[]>>
1853  tmp_buffers; // holds string w/ removed escape chars, etc
1854  if (DEBUG_TIMING) {
1857  thread_buf_end,
1858  buf_end,
1859  copy_params,
1860  importer->get_is_array(),
1861  row,
1862  tmp_buffers,
1863  try_single_thread);
1864  });
1865  total_get_row_time_us += us;
1866  } else {
1868  thread_buf_end,
1869  buf_end,
1870  copy_params,
1871  importer->get_is_array(),
1872  row,
1873  tmp_buffers,
1874  try_single_thread);
1875  }
1876  row_index_plus_one++;
1877  // Each POINT could consume two separate coords instead of a single WKT
1878  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1879  import_status.rows_rejected++;
1880  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1881  << row.size() << "): " << shared::printContainer(row);
1882  if (import_status.rows_rejected > copy_params.max_reject) {
1883  break;
1884  }
1885  continue;
1886  }
1887 
1888  //
1889  // lambda for importing a row (perhaps multiple times if exploding a collection)
1890  //
1891 
1892  auto execute_import_row = [&](OGRGeometry* import_geometry) {
1893  size_t import_idx = 0;
1894  size_t col_idx = 0;
1895  try {
1896  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1897  auto cd = *cd_it;
1898  const auto& col_ti = cd->columnType;
1899 
1900  bool is_null =
1901  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1902  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1903  // So initially nullness may be missed and not passed to add_value,
1904  // which then might also check and still decide it's actually a NULL, e.g.
1905  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
1906  // So "NULL" is not recognized as NULL but then it's not recognized as
1907  // a valid kINT, so it's a NULL after all.
1908  // Checking for "NULL" here too, as a widely accepted notation for NULL.
1909 
1910  // Treating empty as NULL
1911  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1912  is_null = true;
1913  }
1914 
1915  if (col_ti.get_physical_cols() == 0) {
1916  // not geo
1917 
1918  import_buffers[col_idx]->add_value(
1919  cd, row[import_idx], is_null, copy_params);
1920 
1921  // next
1922  ++import_idx;
1923  ++col_idx;
1924  } else {
1925  // geo
1926 
1927  // store null string in the base column
1928  import_buffers[col_idx]->add_value(
1929  cd, copy_params.null_str, true, copy_params);
1930 
1931  // WKT from string we're not storing
1932  auto const& geo_string = row[import_idx];
1933 
1934  // next
1935  ++import_idx;
1936  ++col_idx;
1937 
1938  SQLTypes col_type = col_ti.get_type();
1939  CHECK(IS_GEO(col_type));
1940 
1941  std::vector<double> coords;
1942  std::vector<double> bounds;
1943  std::vector<int> ring_sizes;
1944  std::vector<int> poly_rings;
1945  int render_group = 0;
1946 
1947  // if this is a POINT column, and the field is not null, and
1948  // looks like a scalar numeric value (and not a hex blob)
1949  // attempt to import two columns as lon/lat (or lat/lon)
1950  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
1951  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
1952  geo_string[0] == '-') &&
1953  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
1954  double lon = std::atof(std::string(geo_string).c_str());
1955  double lat = NAN;
1956  auto lat_str = row[import_idx];
1957  ++import_idx;
1958  if (lat_str.size() > 0 &&
1959  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1960  lat = std::atof(std::string(lat_str).c_str());
1961  }
1962  // Swap coordinates if this table uses a reverse order: lat/lon
1963  if (!copy_params.lonlat) {
1964  std::swap(lat, lon);
1965  }
1966  // TODO: should check if POINT column should have been declared with
1967  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1968  // throw std::runtime_error("POINT column " + cd->columnName + " is
1969  // not WGS84, cannot insert lon/lat");
1970  // }
1971  SQLTypeInfo import_ti{col_ti};
1972  if (copy_params.file_type == FileType::DELIMITED &&
1973  import_ti.get_output_srid() == 4326) {
1974  auto srid0 = copy_params.source_srid;
1975  if (srid0 > 0) {
1976  // srid0 -> 4326 transform is requested on import
1977  import_ti.set_input_srid(srid0);
1978  }
1979  }
1980  if (!importGeoFromLonLat(lon, lat, coords, import_ti)) {
1981  throw std::runtime_error(
1982  "Cannot read lon/lat to insert into POINT column " +
1983  cd->columnName);
1984  }
1985  } else {
1986  // import it
1987  SQLTypeInfo import_ti{col_ti};
1988  if (copy_params.file_type == FileType::DELIMITED &&
1989  import_ti.get_output_srid() == 4326) {
1990  auto srid0 = copy_params.source_srid;
1991  if (srid0 > 0) {
1992  // srid0 -> 4326 transform is requested on import
1993  import_ti.set_input_srid(srid0);
1994  }
1995  }
1996  if (is_null) {
1997  if (col_ti.get_notnull()) {
1998  throw std::runtime_error("NULL geo for column " + cd->columnName);
1999  }
2001  import_ti,
2002  coords,
2003  bounds,
2004  ring_sizes,
2005  poly_rings,
2007  } else {
2008  if (import_geometry) {
2009  // geometry already exploded
2011  import_geometry,
2012  import_ti,
2013  coords,
2014  bounds,
2015  ring_sizes,
2016  poly_rings,
2018  std::string msg =
2019  "Failed to extract valid geometry from exploded row " +
2020  std::to_string(first_row_index_this_buffer +
2021  row_index_plus_one) +
2022  " for column " + cd->columnName;
2023  throw std::runtime_error(msg);
2024  }
2025  } else {
2026  // extract geometry directly from WKT
2028  std::string(geo_string),
2029  import_ti,
2030  coords,
2031  bounds,
2032  ring_sizes,
2033  poly_rings,
2035  std::string msg = "Failed to extract valid geometry from row " +
2036  std::to_string(first_row_index_this_buffer +
2037  row_index_plus_one) +
2038  " for column " + cd->columnName;
2039  throw std::runtime_error(msg);
2040  }
2041  }
2042 
2043  // validate types
2044  if (col_type != import_ti.get_type()) {
2046  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2047  col_type == SQLTypes::kMULTIPOLYGON)) {
2048  throw std::runtime_error(
2049  "Imported geometry doesn't match the type of column " +
2050  cd->columnName);
2051  }
2052  }
2053  }
2054 
2055  // assign render group?
2056  if (columnIdToRenderGroupAnalyzerMap.size()) {
2057  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2058  if (ring_sizes.size()) {
2059  // get a suitable render group for these poly coords
2060  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2061  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2062  render_group =
2063  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2064  } else {
2065  // empty poly
2066  render_group = -1;
2067  }
2068  }
2069  }
2070  }
2071 
2072  // import extracted geo
2073  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2074  cd,
2075  import_buffers,
2076  col_idx,
2077  coords,
2078  bounds,
2079  ring_sizes,
2080  poly_rings,
2081  render_group);
2082 
2083  // skip remaining physical columns
2084  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2085  ++cd_it;
2086  }
2087  }
2088  }
2089  import_status.rows_completed++;
2090  } catch (const std::exception& e) {
2091  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2092  import_buffers[col_idx_to_pop]->pop_value();
2093  }
2094  import_status.rows_rejected++;
2095  LOG(ERROR) << "Input exception thrown: " << e.what()
2096  << ". Row discarded. Data: " << shared::printContainer(row);
2097  }
2098  };
2099 
2100  if (copy_params.geo_explode_collections) {
2101  // explode and import
2102  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2103  explode_collections_step1(col_descs);
2104  // pull out the collection WKT or WKB hex
2105  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2106  auto const& collection_geo_string = row[collection_col_idx];
2107  // convert to OGR
2108  OGRGeometry* ogr_geometry = nullptr;
2109  ScopeGuard destroy_ogr_geometry = [&] {
2110  if (ogr_geometry) {
2111  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2112  }
2113  };
2115  std::string(collection_geo_string));
2116  // do the explode and import
2117  us = explode_collections_step2(ogr_geometry,
2118  collection_child_type,
2119  collection_col_name,
2120  first_row_index_this_buffer + row_index_plus_one,
2121  execute_import_row);
2122  } else {
2123  // import non-collection row just once
2125  [&] { execute_import_row(nullptr); });
2126  }
2127  total_str_to_val_time_us += us;
2128  } // end thread
2129  if (import_status.rows_completed > 0) {
2130  load_ms = measure<>::execution(
2131  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2132  }
2133  });
2134  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2135  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2136  << import_status.rows_completed << " rows inserted in "
2137  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2138  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2139  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2140  << "sec" << std::endl;
2141  }
2142 
2143  import_status.thread_id = thread_id;
2144  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2145 
2146  return import_status;
2147 }
SQLTypes
Definition: sqltypes.h:37
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1718
#define LOG(tag)
Definition: Logger.h:188
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1114
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:142
void set_input_srid(int d)
Definition: sqltypes.h:405
CONSTEXPR DEVICE bool is_null(const T &value)
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:907
#define CHECK_LT(x, y)
Definition: Logger.h:207
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:873
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1684
ThreadId thread_id()
Definition: Logger.cpp:732
#define CHECK(condition)
Definition: Logger.h:197
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
Definition: Importer.cpp:1413
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:64
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:242
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2149 of file Importer.cpp.

References CHECK, Geospatial::compress_coords(), DEBUG_TIMING, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), timer_stop(), and to_string().

Referenced by import_export::Importer::importGDAL().

2158  {
2159  ImportStatus import_status;
2160  const CopyParams& copy_params = importer->get_copy_params();
2161  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2162  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2163  importer->get_import_buffers(thread_id);
2164 
2165  for (const auto& p : import_buffers) {
2166  p->clear();
2167  }
2168 
2169  auto convert_timer = timer_start();
2170 
2171  // we create this on the fly based on the first feature's SR
2172  std::unique_ptr<OGRCoordinateTransformation> coordinate_transformation;
2173 
2174  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2175  if (!features[iFeature]) {
2176  continue;
2177  }
2178 
2179  // get this feature's geometry
2180  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2181  if (pGeometry) {
2182  // for geodatabase, we need to consider features with no geometry
2183  // as we still want to create a table, even if it has no geo column
2184 
2185  // transform it
2186  // avoid GDAL error if not transformable
2187  auto geometry_sr = pGeometry->getSpatialReference();
2188  if (geometry_sr) {
2189  // create an OGRCoordinateTransformation (CT) on the fly
2190  // we must assume that all geo in this file will have
2191  // the same source SR, so the CT will be valid for all
2192  // transforming to a reusable CT is faster than to an SR
2193  if (coordinate_transformation == nullptr) {
2194  coordinate_transformation.reset(
2195  OGRCreateCoordinateTransformation(geometry_sr, poGeographicSR));
2196  if (coordinate_transformation == nullptr) {
2197  throw std::runtime_error(
2198  "Failed to create a GDAL CoordinateTransformation for incoming geo");
2199  }
2200  }
2201  pGeometry->transform(coordinate_transformation.get());
2202  }
2203  }
2204 
2205  //
2206  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2207  //
2208 
2209  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2210  size_t col_idx = 0;
2211  try {
2212  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2213  auto cd = *cd_it;
2214 
2215  // is this a geo column?
2216  const auto& col_ti = cd->columnType;
2217  if (col_ti.is_geometry()) {
2218  // Note that this assumes there is one and only one geo column in the table.
2219  // Currently, the importer only supports reading a single geospatial feature
2220  // from an input shapefile / geojson file, but this code will need to be
2221  // modified if that changes
2222  SQLTypes col_type = col_ti.get_type();
2223 
2224  // store null string in the base column
2225  import_buffers[col_idx]->add_value(
2226  cd, copy_params.null_str, true, copy_params);
2227  ++col_idx;
2228 
2229  // the data we now need to extract for the other columns
2230  std::vector<double> coords;
2231  std::vector<double> bounds;
2232  std::vector<int> ring_sizes;
2233  std::vector<int> poly_rings;
2234  int render_group = 0;
2235 
2236  // extract it
2237  SQLTypeInfo import_ti{col_ti};
2238  bool is_null_geo = !import_geometry;
2239  if (is_null_geo) {
2240  if (col_ti.get_notnull()) {
2241  throw std::runtime_error("NULL geo for column " + cd->columnName);
2242  }
2244  import_ti,
2245  coords,
2246  bounds,
2247  ring_sizes,
2248  poly_rings,
2250  } else {
2252  import_geometry,
2253  import_ti,
2254  coords,
2255  bounds,
2256  ring_sizes,
2257  poly_rings,
2259  std::string msg = "Failed to extract valid geometry from feature " +
2260  std::to_string(firstFeature + iFeature + 1) +
2261  " for column " + cd->columnName;
2262  throw std::runtime_error(msg);
2263  }
2264 
2265  // validate types
2266  if (col_type != import_ti.get_type()) {
2268  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2269  col_type == SQLTypes::kMULTIPOLYGON)) {
2270  throw std::runtime_error(
2271  "Imported geometry doesn't match the type of column " +
2272  cd->columnName);
2273  }
2274  }
2275  }
2276 
2277  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2278  if (ring_sizes.size()) {
2279  // get a suitable render group for these poly coords
2280  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2281  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2282  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2283  } else {
2284  // empty poly
2285  render_group = -1;
2286  }
2287  }
2288 
2289  // create coords array value and add it to the physical column
2290  ++cd_it;
2291  auto cd_coords = *cd_it;
2292  std::vector<TDatum> td_coord_data;
2293  if (!is_null_geo) {
2294  std::vector<uint8_t> compressed_coords =
2295  Geospatial::compress_coords(coords, col_ti);
2296  for (auto cc : compressed_coords) {
2297  TDatum td_byte;
2298  td_byte.val.int_val = cc;
2299  td_coord_data.push_back(td_byte);
2300  }
2301  }
2302  TDatum tdd_coords;
2303  tdd_coords.val.arr_val = td_coord_data;
2304  tdd_coords.is_null = is_null_geo;
2305  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2306  ++col_idx;
2307 
2308  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2309  // Create ring_sizes array value and add it to the physical column
2310  ++cd_it;
2311  auto cd_ring_sizes = *cd_it;
2312  std::vector<TDatum> td_ring_sizes;
2313  if (!is_null_geo) {
2314  for (auto ring_size : ring_sizes) {
2315  TDatum td_ring_size;
2316  td_ring_size.val.int_val = ring_size;
2317  td_ring_sizes.push_back(td_ring_size);
2318  }
2319  }
2320  TDatum tdd_ring_sizes;
2321  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2322  tdd_ring_sizes.is_null = is_null_geo;
2323  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2324  ++col_idx;
2325  }
2326 
2327  if (col_type == kMULTIPOLYGON) {
2328  // Create poly_rings array value and add it to the physical column
2329  ++cd_it;
2330  auto cd_poly_rings = *cd_it;
2331  std::vector<TDatum> td_poly_rings;
2332  if (!is_null_geo) {
2333  for (auto num_rings : poly_rings) {
2334  TDatum td_num_rings;
2335  td_num_rings.val.int_val = num_rings;
2336  td_poly_rings.push_back(td_num_rings);
2337  }
2338  }
2339  TDatum tdd_poly_rings;
2340  tdd_poly_rings.val.arr_val = td_poly_rings;
2341  tdd_poly_rings.is_null = is_null_geo;
2342  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2343  ++col_idx;
2344  }
2345 
2346  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2347  col_type == kMULTIPOLYGON) {
2348  // Create bounds array value and add it to the physical column
2349  ++cd_it;
2350  auto cd_bounds = *cd_it;
2351  std::vector<TDatum> td_bounds_data;
2352  if (!is_null_geo) {
2353  for (auto b : bounds) {
2354  TDatum td_double;
2355  td_double.val.real_val = b;
2356  td_bounds_data.push_back(td_double);
2357  }
2358  }
2359  TDatum tdd_bounds;
2360  tdd_bounds.val.arr_val = td_bounds_data;
2361  tdd_bounds.is_null = is_null_geo;
2362  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2363  ++col_idx;
2364  }
2365 
2366  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2367  // Create render_group value and add it to the physical column
2368  ++cd_it;
2369  auto cd_render_group = *cd_it;
2370  TDatum td_render_group;
2371  td_render_group.val.int_val = render_group;
2372  td_render_group.is_null = is_null_geo;
2373  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2374  ++col_idx;
2375  }
2376  } else {
2377  // regular column
2378  // pull from GDAL metadata
2379  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2380  CHECK(cit != columnNameToSourceNameMap.end());
2381  auto const& field_name = cit->second;
2382 
2383  auto const fit = fieldNameToIndexMap.find(field_name);
2384  CHECK(fit != fieldNameToIndexMap.end());
2385  auto const& field_index = fit->second;
2386  CHECK(field_index < fieldNameToIndexMap.size());
2387 
2388  auto const& feature = features[iFeature];
2389 
2390  auto field_defn = feature->GetFieldDefnRef(field_index);
2391  CHECK(field_defn);
2392 
2393  // OGRFeature::GetFieldAsString() can only return 80 characters
2394  // so for array columns, we are obliged to fetch the actual values
2395  // and construct the concatenated string ourselves
2396 
2397  std::string value_string;
2398  int array_index = 0, array_size = 0;
2399 
2400  auto stringify_numeric_list = [&](auto* values) {
2401  value_string = "{";
2402  while (array_index < array_size) {
2403  auto separator = (array_index > 0) ? "," : "";
2404  value_string += separator + std::to_string(values[array_index]);
2405  array_index++;
2406  }
2407  value_string += "}";
2408  };
2409 
2410  auto field_type = field_defn->GetType();
2411  switch (field_type) {
2412  case OFTInteger:
2413  case OFTInteger64:
2414  case OFTReal:
2415  case OFTString:
2416  case OFTBinary:
2417  case OFTDate:
2418  case OFTTime:
2419  case OFTDateTime: {
2420  value_string = feature->GetFieldAsString(field_index);
2421  } break;
2422  case OFTIntegerList: {
2423  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2424  stringify_numeric_list(values);
2425  } break;
2426  case OFTInteger64List: {
2427  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2428  stringify_numeric_list(values);
2429  } break;
2430  case OFTRealList: {
2431  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2432  stringify_numeric_list(values);
2433  } break;
2434  case OFTStringList: {
2435  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2436  value_string = "{";
2437  if (array_of_strings) {
2438  while (auto* this_string = array_of_strings[array_index]) {
2439  auto separator = (array_index > 0) ? "," : "";
2440  value_string += separator + std::string(this_string);
2441  array_index++;
2442  }
2443  }
2444  value_string += "}";
2445  } break;
2446  default:
2447  throw std::runtime_error("Unsupported geo file field type (" +
2448  std::to_string(static_cast<int>(field_type)) +
2449  ")");
2450  }
2451 
2452  static CopyParams default_copy_params;
2453  import_buffers[col_idx]->add_value(
2454  cd, value_string, false, default_copy_params);
2455  ++col_idx;
2456  }
2457  }
2458  import_status.rows_completed++;
2459  } catch (const std::exception& e) {
2460  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2461  import_buffers[col_idx_to_pop]->pop_value();
2462  }
2463  import_status.rows_rejected++;
2464  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2465  }
2466  };
2467 
2468  if (pGeometry && copy_params.geo_explode_collections) {
2469  // explode and import
2470  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2471  explode_collections_step1(col_descs);
2472  explode_collections_step2(pGeometry,
2473  collection_child_type,
2474  collection_col_name,
2475  firstFeature + iFeature + 1,
2476  execute_import_feature);
2477  } else {
2478  // import non-collection or null feature just once
2479  execute_import_feature(pGeometry);
2480  }
2481  } // end features
2482 
2483  float convert_ms =
2484  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2485  convert_timer)) /
2486  1000.0f;
2487 
2488  float load_ms = 0.0f;
2489  if (import_status.rows_completed > 0) {
2490  auto load_timer = timer_start();
2491  importer->load(import_buffers, import_status.rows_completed);
2492  load_ms =
2493  float(
2494  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2495  load_timer)) /
2496  1000.0f;
2497  }
2498 
2499  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2500  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2501  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2502  }
2503 
2504  import_status.thread_id = thread_id;
2505 
2506  if (DEBUG_TIMING) {
2507  LOG(INFO) << "DEBUG: Total "
2508  << float(timer_stop<std::chrono::steady_clock::time_point,
2509  std::chrono::microseconds>(convert_timer)) /
2510  1000.0f
2511  << "ms";
2512  }
2513 
2514  return import_status;
2515 }
SQLTypes
Definition: sqltypes.h:37
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1718
#define LOG(tag)
Definition: Logger.h:188
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1114
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:142
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:907
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1684
ThreadId thread_id()
Definition: Logger.cpp:732
#define CHECK(condition)
Definition: Logger.h:197
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
SQLTypeInfo ti 
)

Definition at line 1413 of file Importer.cpp.

References Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1416  {
1417  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1418  return false;
1419  }
1420  if (ti.transforms()) {
1421  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1422  if (!pt.transform(ti)) {
1423  return false;
1424  }
1425  pt.getColumns(coords);
1426  return true;
1427  }
1428  coords.push_back(lon);
1429  coords.push_back(lat);
1430  return true;
1431 }
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:562
bool transforms() const
Definition: sqltypes.h:496

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 371 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

371  {
372  SQLTypeInfo elem_ti = ti.get_elem_type();
373  auto len = ti.get_size();
374 
375  if (elem_ti.is_string()) {
376  // must not be called for array of strings
377  CHECK(false);
378  return ArrayDatum(0, NULL, true);
379  }
380 
381  if (len > 0) {
382  // Compose a NULL fixlen array
383  int8_t* buf = (int8_t*)checked_malloc(len);
384  // First scalar is a NULL_ARRAY sentinel
385  Datum d = NullArrayDatum(elem_ti);
386  int8_t* p = appendDatum(buf, d, elem_ti);
387  // Rest is filled with normal NULL sentinels
388  Datum d0 = NullDatum(elem_ti);
389  while ((p - buf) < len) {
390  p = appendDatum(p, d0, elem_ti);
391  }
392  CHECK((p - buf) == len);
393  return ArrayDatum(len, buf, true);
394  }
395  // NULL varlen array
396  return ArrayDatum(0, NULL, true);
397 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:199
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:236
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:922
#define CHECK(condition)
Definition: Logger.h:197
bool is_string() const
Definition: sqltypes.h:478
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:277
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 277 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

277  {
278  Datum d;
279  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
280  switch (type) {
281  case kBOOLEAN:
283  break;
284  case kBIGINT:
286  break;
287  case kINT:
289  break;
290  case kSMALLINT:
292  break;
293  case kTINYINT:
295  break;
296  case kFLOAT:
298  break;
299  case kDOUBLE:
301  break;
302  case kTIME:
303  case kTIMESTAMP:
304  case kDATE:
306  break;
307  case kPOINT:
308  case kLINESTRING:
309  case kPOLYGON:
310  case kMULTIPOLYGON:
311  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
312  default:
313  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
314  }
315  return d;
316 }
int8_t tinyintval
Definition: sqltypes.h:203
Definition: sqltypes.h:48
bool boolval
Definition: sqltypes.h:202
int32_t intval
Definition: sqltypes.h:205
float floatval
Definition: sqltypes.h:207
int64_t bigintval
Definition: sqltypes.h:206
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: sqltypes.h:204
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:303
Definition: sqltypes.h:52
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:44
bool is_decimal() const
Definition: sqltypes.h:481
double doubleval
Definition: sqltypes.h:208

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullDatum ( SQLTypeInfo ti)

Definition at line 236 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

236  {
237  Datum d;
238  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
239  switch (type) {
240  case kBOOLEAN:
242  break;
243  case kBIGINT:
245  break;
246  case kINT:
248  break;
249  case kSMALLINT:
251  break;
252  case kTINYINT:
254  break;
255  case kFLOAT:
256  d.floatval = NULL_FLOAT;
257  break;
258  case kDOUBLE:
260  break;
261  case kTIME:
262  case kTIMESTAMP:
263  case kDATE:
265  break;
266  case kPOINT:
267  case kLINESTRING:
268  case kPOLYGON:
269  case kMULTIPOLYGON:
270  throw std::runtime_error("Internal error: geometry type in NullDatum.");
271  default:
272  throw std::runtime_error("Internal error: invalid type in NullDatum.");
273  }
274  return d;
275 }
int8_t tinyintval
Definition: sqltypes.h:203
#define NULL_DOUBLE
Definition: sqltypes.h:48
#define NULL_FLOAT
bool boolval
Definition: sqltypes.h:202
int32_t intval
Definition: sqltypes.h:205
float floatval
Definition: sqltypes.h:207
int64_t bigintval
Definition: sqltypes.h:206
int16_t smallintval
Definition: sqltypes.h:204
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:303
Definition: sqltypes.h:52
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:44
bool is_decimal() const
Definition: sqltypes.h:481
double doubleval
Definition: sqltypes.h:208

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<SQLTypes, bool> import_export::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4421 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptors().

4421  {
4422  switch (ogr_type) {
4423  case OFTInteger:
4424  return std::make_pair(kINT, false);
4425  case OFTIntegerList:
4426  return std::make_pair(kINT, true);
4427 #if GDAL_VERSION_MAJOR > 1
4428  case OFTInteger64:
4429  return std::make_pair(kBIGINT, false);
4430  case OFTInteger64List:
4431  return std::make_pair(kBIGINT, true);
4432 #endif
4433  case OFTReal:
4434  return std::make_pair(kDOUBLE, false);
4435  case OFTRealList:
4436  return std::make_pair(kDOUBLE, true);
4437  case OFTString:
4438  return std::make_pair(kTEXT, false);
4439  case OFTStringList:
4440  return std::make_pair(kTEXT, true);
4441  case OFTDate:
4442  return std::make_pair(kDATE, false);
4443  case OFTTime:
4444  return std::make_pair(kTIME, false);
4445  case OFTDateTime:
4446  return std::make_pair(kTIMESTAMP, false);
4447  case OFTBinary:
4448  // Interpret binary blobs as byte arrays here
4449  // but actual import will store NULL as GDAL will not
4450  // extract the blob (OGRFeature::GetFieldAsString will
4451  // result in the import buffers having an empty string)
4452  return std::make_pair(kTINYINT, true);
4453  default:
4454  break;
4455  }
4456  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4457 }
Definition: sqltypes.h:48
std::string to_string(char const *&&v)
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes import_export::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4459 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4459  {
4460  switch (ogr_type) {
4461  case wkbPoint:
4462  return kPOINT;
4463  case wkbLineString:
4464  return kLINESTRING;
4465  case wkbPolygon:
4466  return kPOLYGON;
4467  case wkbMultiPolygon:
4468  return kMULTIPOLYGON;
4469  default:
4470  break;
4471  }
4472  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4473 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 5189 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by Parser::AddColumnStmt::execute(), and DBHandler::prepare_columnar_loader().

5191  {
5192  CHECK(td);
5193  auto col_descs = loader->get_column_descs();
5194 
5195  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
5196  for (auto cd : col_descs) {
5197  import_buffers.emplace_back(
5198  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
5199  }
5200 
5201  return import_buffers;
5202 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 318 of file Importer.cpp.

References appendDatum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value().

320  {
321  SQLTypeInfo elem_ti = ti.get_elem_type();
322  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
323  return ArrayDatum(0, NULL, true);
324  }
325  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
326  LOG(WARNING) << "Malformed array: " << s;
327  return ArrayDatum(0, NULL, true);
328  }
329  std::vector<std::string> elem_strs;
330  size_t last = 1;
331  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
332  i = s.find(copy_params.array_delim, last)) {
333  elem_strs.push_back(s.substr(last, i - last));
334  last = i + 1;
335  }
336  if (last + 1 <= s.size()) {
337  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
338  }
339  if (elem_strs.size() == 1) {
340  auto str = elem_strs.front();
341  auto str_trimmed = trim_space(str.c_str(), str.length());
342  if (str_trimmed == "") {
343  elem_strs.clear(); // Empty array
344  }
345  }
346  if (!elem_ti.is_string()) {
347  size_t len = elem_strs.size() * elem_ti.get_size();
348  int8_t* buf = (int8_t*)checked_malloc(len);
349  int8_t* p = buf;
350  for (auto& es : elem_strs) {
351  auto e = trim_space(es.c_str(), es.length());
352  bool is_null = (e == copy_params.null_str) || e == "NULL";
353  if (!elem_ti.is_string() && e == "") {
354  is_null = true;
355  }
356  if (elem_ti.is_number() || elem_ti.is_time()) {
357  if (!isdigit(e[0]) && e[0] != '-') {
358  is_null = true;
359  }
360  }
361  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
362  p = appendDatum(p, d, elem_ti);
363  }
364  return ArrayDatum(len, buf, false);
365  }
366  // must not be called for array of strings
367  CHECK(false);
368  return ArrayDatum(0, NULL, true);
369 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
#define LOG(tag)
Definition: Logger.h:188
bool is_number() const
Definition: sqltypes.h:483
bool is_time() const
Definition: sqltypes.h:484
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:199
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:236
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:922
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:197
bool is_string() const
Definition: sqltypes.h:478
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 455 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

455  {
456  SQLTypeInfo elem_ti = ti.get_elem_type();
457 
458  CHECK(!elem_ti.is_string());
459 
460  if (datum.is_null) {
461  return NullArray(ti);
462  }
463 
464  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
465  int8_t* buf = (int8_t*)checked_malloc(len);
466  int8_t* p = buf;
467  for (auto& e : datum.val.arr_val) {
468  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
469  }
470 
471  return ArrayDatum(len, buf, false);
472 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:410
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:199
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:922
#define CHECK(condition)
Definition: Logger.h:197
bool is_string() const
Definition: sqltypes.h:478
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 410 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

410  {
411  Datum d;
412  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
413  switch (type) {
414  case kBOOLEAN:
415  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
416  break;
417  case kBIGINT:
418  d.bigintval =
419  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
420  break;
421  case kINT:
422  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
423  break;
424  case kSMALLINT:
425  d.smallintval =
426  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
427  break;
428  case kTINYINT:
429  d.tinyintval =
430  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
431  break;
432  case kFLOAT:
433  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
434  break;
435  case kDOUBLE:
436  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
437  break;
438  case kTIME:
439  case kTIMESTAMP:
440  case kDATE:
441  d.bigintval =
442  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
443  break;
444  case kPOINT:
445  case kLINESTRING:
446  case kPOLYGON:
447  case kMULTIPOLYGON:
448  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
449  default:
450  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
451  }
452  return d;
453 }
int8_t tinyintval
Definition: sqltypes.h:203
#define NULL_DOUBLE
Definition: sqltypes.h:48
#define NULL_FLOAT
bool boolval
Definition: sqltypes.h:202
int32_t intval
Definition: sqltypes.h:205
float floatval
Definition: sqltypes.h:207
int64_t bigintval
Definition: sqltypes.h:206
int16_t smallintval
Definition: sqltypes.h:204
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:303
Definition: sqltypes.h:52
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:44
bool is_decimal() const
Definition: sqltypes.h:481
double doubleval
Definition: sqltypes.h:208

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 224 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

224  {
225  size_t i = 0;
226  size_t j = len;
227  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
228  i++;
229  }
230  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
231  j--;
232  }
233  return std::string(field + i, j - i);
234 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 2998 of file Importer.cpp.

References omnisci.dtypes::T.

2998  {
2999  try {
3000  boost::lexical_cast<T>(str);
3001  } catch (const boost::bad_lexical_cast& e) {
3002  return false;
3003  }
3004  return true;
3005 }
char* import_export::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 3007 of file Importer.cpp.

Referenced by import_export::Detector::detect_sqltype().

3007  {
3008  std::tm tm_struct;
3009  char* buf;
3010  for (auto format : formats) {
3011  buf = strptime(str, format.c_str(), &tm_struct);
3012  if (buf) {
3013  return buf;
3014  }
3015  }
3016  return nullptr;
3017 }

+ Here is the caller graph for this function:

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 32 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static