OmniSciDB  a47db9e897
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Importer_NS Namespace Reference

Namespaces

 anonymous_namespace{Importer.cpp}
 

Classes

struct  CopyParams
 
class  DelimitedParserUtils
 
struct  GeoImportException
 
struct  BadRowsTracker
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  ImporterUtils
 
class  RenderGroupAnalyzer
 
class  Importer
 
class  ImportDriver
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

std::vector< uint8_t > compress_coords (std::vector< double > &coords, const SQLTypeInfo &ti)
 
static const std::string trim_space (const char *field, const size_t len)
 
int8_t * appendDatum (int8_t *buf, Datum d, const SQLTypeInfo &ti)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords)
 
uint64_t compress_coord (double coord, const SQLTypeInfo &ti, bool x)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
void GDALErrorHandler (CPLErr eErrClass, int err_no, const char *msg)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 

Typedef Documentation

using Importer_NS::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 74 of file Importer.h.

using Importer_NS::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 135 of file Importer.cpp.

using Importer_NS::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 133 of file Importer.cpp.

using Importer_NS::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 136 of file Importer.cpp.

using Importer_NS::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 132 of file Importer.cpp.

Enumeration Type Documentation

enum Importer_NS::FileType
strong
Enumerator
DELIMITED 
POLYGON 

Definition at line 34 of file CopyParams.h.

34  {
35  DELIMITED,
36  POLYGON
37 #ifdef ENABLE_IMPORT_PARQUET
38  ,
39  PARQUET
40 #endif
41 };

Function Documentation

void Importer_NS::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 435 of file Importer.cpp.

Referenced by Importer_NS::TypedImportBuffer::add_value().

435  {
436  const auto& arr = datum.val.arr_val;
437  for (const auto& elem_datum : arr) {
438  string_vec.push_back(elem_datum.val.str_val);
439  }
440 }

+ Here is the caller graph for this function:

int8_t * Importer_NS::appendDatum ( int8_t *  buf,
Datum  d,
const SQLTypeInfo ti 
)

Definition at line 232 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, Datum::smallintval, and Datum::tinyintval.

Referenced by Executor::executeSimpleInsert(), NullArray(), StringToArray(), and TDatumToArrayDatum().

232  {
233  switch (ti.get_type()) {
234  case kBOOLEAN:
235  *(bool*)buf = d.boolval;
236  return buf + sizeof(bool);
237  case kNUMERIC:
238  case kDECIMAL:
239  case kBIGINT:
240  *(int64_t*)buf = d.bigintval;
241  return buf + sizeof(int64_t);
242  case kINT:
243  *(int32_t*)buf = d.intval;
244  return buf + sizeof(int32_t);
245  case kSMALLINT:
246  *(int16_t*)buf = d.smallintval;
247  return buf + sizeof(int16_t);
248  case kTINYINT:
249  *(int8_t*)buf = d.tinyintval;
250  return buf + sizeof(int8_t);
251  case kFLOAT:
252  *(float*)buf = d.floatval;
253  return buf + sizeof(float);
254  case kDOUBLE:
255  *(double*)buf = d.doubleval;
256  return buf + sizeof(double);
257  case kTIME:
258  case kTIMESTAMP:
259  case kDATE:
260  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
261  return buf + sizeof(int64_t);
262  default:
263  return NULL;
264  }
265  return NULL;
266 }
int8_t tinyintval
Definition: sqltypes.h:126
Definition: sqltypes.h:52
bool boolval
Definition: sqltypes.h:125
int32_t intval
Definition: sqltypes.h:128
float floatval
Definition: sqltypes.h:130
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
int64_t bigintval
Definition: sqltypes.h:129
int16_t smallintval
Definition: sqltypes.h:127
Definition: sqltypes.h:56
Definition: sqltypes.h:48
double doubleval
Definition: sqltypes.h:131

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

uint64_t Importer_NS::compress_coord ( double  coord,
const SQLTypeInfo ti,
bool  x 
)

Definition at line 1414 of file Importer.cpp.

References Geo_namespace::compress_lattitude_coord_geoint32(), Geo_namespace::compress_longitude_coord_geoint32(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_comp_param(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), and kENCODING_GEOINT.

Referenced by compress_coords().

1414  {
1415  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
1417  : Geo_namespace::compress_lattitude_coord_geoint32(coord);
1418  }
1419  return *reinterpret_cast<uint64_t*>(may_alias_ptr(&coord));
1420 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:335
DEVICE uint64_t compress_longitude_coord_geoint32(const double coord)
DEVICE uint64_t compress_lattitude_coord_geoint32(const double coord)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< uint8_t > Importer_NS::compress_coords ( std::vector< double > &  coords,
const SQLTypeInfo ti 
)

Definition at line 1422 of file Importer.cpp.

References compress_coord(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_comp_param(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_output_srid(), kENCODING_GEOINT, and to_string().

Referenced by Parser::InsertValuesStmt::analyze(), import_thread_shapefile(), Importer_NS::Importer::set_geo_physical_import_buffer(), Importer_NS::Importer::set_geo_physical_import_buffer_columnar(), GeoPointValueConverter::toCompressedCoords(), and RelAlgTranslator::translateGeoLiteral().

1422  {
1423  std::vector<uint8_t> compressed_coords;
1424  bool x = true;
1425  for (auto coord : coords) {
1426  auto coord_data_ptr = reinterpret_cast<uint64_t*>(&coord);
1427  uint64_t coord_data = *coord_data_ptr;
1428  size_t coord_data_size = sizeof(double);
1429 
1430  if (ti.get_output_srid() == 4326) {
1431  if (x) {
1432  if (coord < -180.0 || coord > 180.0) {
1433  throw std::runtime_error("WGS84 longitude " + std::to_string(coord) +
1434  " is out of bounds");
1435  }
1436  } else {
1437  if (coord < -90.0 || coord > 90.0) {
1438  throw std::runtime_error("WGS84 latitude " + std::to_string(coord) +
1439  " is out of bounds");
1440  }
1441  }
1442  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
1443  coord_data = compress_coord(coord, ti, x);
1444  coord_data_size = ti.get_comp_param() / 8;
1445  }
1446  x = !x;
1447  }
1448 
1449  for (size_t i = 0; i < coord_data_size; i++) {
1450  compressed_coords.push_back(coord_data & 0xFF);
1451  coord_data >>= 8;
1452  }
1453  }
1454  return compressed_coords;
1455 }
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:332
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:335
std::string to_string(char const *&&v)
uint64_t compress_coord(double coord, const SQLTypeInfo &ti, bool x)
Definition: Importer.cpp:1414

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Importer_NS::GDALErrorHandler ( CPLErr  eErrClass,
int  err_no,
const char *  msg 
)

Definition at line 4017 of file Importer.cpp.

References CHECK(), logger::INFO, LOG, and to_string().

Referenced by Importer_NS::Importer::initGDAL().

4017  {
4018  CHECK(eErrClass >= CE_None && eErrClass <= CE_Fatal);
4019  static const char* errClassStrings[5] = {
4020  "Info",
4021  "Debug",
4022  "Warning",
4023  "Failure",
4024  "Fatal",
4025  };
4026  std::string log_msg = std::string("GDAL ") + errClassStrings[eErrClass] +
4027  std::string(": ") + msg + std::string(" (") +
4028  std::to_string(err_no) + std::string(")");
4029  if (eErrClass >= CE_Failure) {
4030  throw std::runtime_error(log_msg);
4031  } else {
4032  LOG(INFO) << log_msg;
4033  }
4034 }
#define LOG(tag)
Definition: Logger.h:185
std::string to_string(char const *&&v)
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void Importer_NS::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4467 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by Importer_NS::Importer::gdalGetAllFilesInArchive().

4468  {
4469  // prepare to gather subdirectories
4470  std::vector<std::string> subdirectories;
4471 
4472  // get entries
4473  char** entries = VSIReadDir(archive_path.c_str());
4474  if (!entries) {
4475  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4476  return;
4477  }
4478 
4479  // force scope
4480  {
4481  // request clean-up
4482  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4483 
4484  // check all the entries
4485  int index = 0;
4486  while (true) {
4487  // get next entry, or drop out if there isn't one
4488  char* entry_c = entries[index++];
4489  if (!entry_c) {
4490  break;
4491  }
4492  std::string entry(entry_c);
4493 
4494  // ignore '.' and '..'
4495  if (entry == "." || entry == "..") {
4496  continue;
4497  }
4498 
4499  // build the full path
4500  std::string entry_path = archive_path + std::string("/") + entry;
4501 
4502  // is it a file or a sub-folder
4503  VSIStatBufL sb;
4504  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4505  if (result < 0) {
4506  break;
4507  }
4508 
4509  if (VSI_ISDIR(sb.st_mode)) {
4510  // a directory that ends with .gdb could be a Geodatabase bundle
4511  // arguably dangerous to decide this purely by name, but any further
4512  // validation would be very complex especially at this scope
4513  if (boost::iends_with(entry_path, ".gdb")) {
4514  // add the directory as if it was a file and don't recurse into it
4515  files.push_back(entry_path);
4516  } else {
4517  // add subdirectory to be recursed into
4518  subdirectories.push_back(entry_path);
4519  }
4520  } else {
4521  // add this file
4522  files.push_back(entry_path);
4523  }
4524  }
4525  }
4526 
4527  // recurse into each subdirectories we found
4528  for (const auto& subdirectory : subdirectories) {
4529  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4530  }
4531 }
#define LOG(tag)
Definition: Logger.h:185
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4467

+ Here is the caller graph for this function:

static ImportStatus Importer_NS::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1777 of file Importer.cpp.

References CHECK(), CHECK_LT, DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step1(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step2(), Importer_NS::DelimitedParserUtils::find_beginning(), Importer_NS::CopyParams::geo_explode_collections, Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), Importer_NS::Importer::get_is_array(), Importer_NS::DelimitedParserUtils::get_row(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Importer_NS::Importer::getCatalog(), Geo_namespace::GeoTypesFactory::getGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, anonymous_namespace{TypedDataAccessors.h}::is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::lonlat, Importer_NS::CopyParams::max_reject, Importer_NS::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::Importer::set_geo_physical_import_buffer(), Importer_NS::ImportStatus::thread_id, and to_string().

Referenced by Importer_NS::Importer::importDelimited().

1785  {
1786  ImportStatus import_status;
1787  int64_t total_get_row_time_us = 0;
1788  int64_t total_str_to_val_time_us = 0;
1789  CHECK(scratch_buffer);
1790  auto buffer = scratch_buffer.get();
1791  auto load_ms = measure<>::execution([]() {});
1792  auto ms = measure<>::execution([&]() {
1793  const CopyParams& copy_params = importer->get_copy_params();
1794  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1795  size_t begin =
1796  DelimitedParserUtils::find_beginning(buffer, begin_pos, end_pos, copy_params);
1797  const char* thread_buf = buffer + begin_pos + begin;
1798  const char* thread_buf_end = buffer + end_pos;
1799  const char* buf_end = buffer + total_size;
1800  bool try_single_thread = false;
1801  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1802  importer->get_import_buffers(thread_id);
1804  int phys_cols = 0;
1805  int point_cols = 0;
1806  for (const auto cd : col_descs) {
1807  const auto& col_ti = cd->columnType;
1808  phys_cols += col_ti.get_physical_cols();
1809  if (cd->columnType.get_type() == kPOINT) {
1810  point_cols++;
1811  }
1812  }
1813  auto num_cols = col_descs.size() - phys_cols;
1814  for (const auto& p : import_buffers) {
1815  p->clear();
1816  }
1817  std::vector<std::string> row;
1818  size_t row_index_plus_one = 0;
1819  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1820  row.clear();
1821  if (DEBUG_TIMING) {
1824  thread_buf_end,
1825  buf_end,
1826  copy_params,
1827  importer->get_is_array(),
1828  row,
1829  try_single_thread);
1830  });
1831  total_get_row_time_us += us;
1832  } else {
1834  thread_buf_end,
1835  buf_end,
1836  copy_params,
1837  importer->get_is_array(),
1838  row,
1839  try_single_thread);
1840  }
1841  row_index_plus_one++;
1842  // Each POINT could consume two separate coords instead of a single WKT
1843  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1844  import_status.rows_rejected++;
1845  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1846  << row.size() << "): " << row;
1847  if (import_status.rows_rejected > copy_params.max_reject) {
1848  break;
1849  }
1850  continue;
1851  }
1852 
1853  //
1854  // lambda for importing a row (perhaps multiple times if exploding a collection)
1855  //
1856 
1857  auto execute_import_row = [&](OGRGeometry* import_geometry) {
1858  size_t import_idx = 0;
1859  size_t col_idx = 0;
1860  try {
1861  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1862  auto cd = *cd_it;
1863  const auto& col_ti = cd->columnType;
1864  if (col_ti.get_physical_cols() == 0) {
1865  // not geo
1866 
1867  // store the string (possibly null)
1868  bool is_null =
1869  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1870  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1871  // So initially nullness may be missed and not passed to add_value,
1872  // which then might also check and still decide it's actually a NULL,
1873  // e.g. if kINT doesn't start with a digit or a '-' then it's considered
1874  // NULL. So "NULL" is not recognized as NULL but then it's not
1875  // recognized as a valid kINT, so it's a NULL after all. Checking for
1876  // "NULL" here too, as a widely accepted notation for NULL.
1877  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1878  is_null = true;
1879  }
1880  import_buffers[col_idx]->add_value(
1881  cd, row[import_idx], is_null, copy_params);
1882 
1883  // next
1884  ++import_idx;
1885  ++col_idx;
1886  } else {
1887  // geo
1888 
1889  // store null string in the base column
1890  import_buffers[col_idx]->add_value(
1891  cd, copy_params.null_str, true, copy_params);
1892 
1893  // WKT from string we're not storing
1894  auto const& wkt = row[import_idx];
1895 
1896  // next
1897  ++import_idx;
1898  ++col_idx;
1899 
1900  SQLTypes col_type = col_ti.get_type();
1901  CHECK(IS_GEO(col_type));
1902 
1903  std::vector<double> coords;
1904  std::vector<double> bounds;
1905  std::vector<int> ring_sizes;
1906  std::vector<int> poly_rings;
1907  int render_group = 0;
1908 
1909  if (col_type == kPOINT && wkt.size() > 0 &&
1910  (wkt[0] == '.' || isdigit(wkt[0]) || wkt[0] == '-')) {
1911  // Invalid WKT, looks more like a scalar.
1912  // Try custom POINT import: from two separate scalars rather than WKT
1913  // string
1914  double lon = std::atof(wkt.c_str());
1915  double lat = NAN;
1916  std::string lat_str{row[import_idx]};
1917  ++import_idx;
1918  if (lat_str.size() > 0 &&
1919  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1920  lat = std::atof(lat_str.c_str());
1921  }
1922  // Swap coordinates if this table uses a reverse order: lat/lon
1923  if (!copy_params.lonlat) {
1924  std::swap(lat, lon);
1925  }
1926  // TODO: should check if POINT column should have been declared with
1927  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1928  // throw std::runtime_error("POINT column " + cd->columnName + " is
1929  // not WGS84, cannot insert lon/lat");
1930  // }
1931  if (!importGeoFromLonLat(lon, lat, coords)) {
1932  throw std::runtime_error(
1933  "Cannot read lon/lat to insert into POINT column " +
1934  cd->columnName);
1935  }
1936  } else {
1937  // import it
1938  SQLTypeInfo import_ti;
1939  if (import_geometry) {
1940  // geometry already exploded
1942  import_geometry,
1943  import_ti,
1944  coords,
1945  bounds,
1946  ring_sizes,
1947  poly_rings,
1949  std::string msg =
1950  "Failed to extract valid geometry from exploded row " +
1951  std::to_string(first_row_index_this_buffer + row_index_plus_one) +
1952  " for column " + cd->columnName;
1953  throw std::runtime_error(msg);
1954  }
1955  } else {
1956  // extract geometry directly from WKT
1958  wkt,
1959  import_ti,
1960  coords,
1961  bounds,
1962  ring_sizes,
1963  poly_rings,
1965  std::string msg =
1966  "Failed to extract valid geometry from row " +
1967  std::to_string(first_row_index_this_buffer + row_index_plus_one) +
1968  " for column " + cd->columnName;
1969  throw std::runtime_error(msg);
1970  }
1971  }
1972 
1973  // validate types
1974  if (col_type != import_ti.get_type()) {
1976  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
1977  col_type == SQLTypes::kMULTIPOLYGON)) {
1978  throw std::runtime_error(
1979  "Imported geometry doesn't match the type of column " +
1980  cd->columnName);
1981  }
1982  }
1983 
1984  // assign render group?
1985  if (columnIdToRenderGroupAnalyzerMap.size()) {
1986  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1987  if (ring_sizes.size()) {
1988  // get a suitable render group for these poly coords
1989  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
1990  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
1991  render_group =
1992  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
1993  } else {
1994  // empty poly
1995  render_group = -1;
1996  }
1997  }
1998  }
1999  }
2000 
2001  // import extracted geo
2002  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2003  cd,
2004  import_buffers,
2005  col_idx,
2006  coords,
2007  bounds,
2008  ring_sizes,
2009  poly_rings,
2010  render_group);
2011 
2012  // skip remaining physical columns
2013  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2014  ++cd_it;
2015  }
2016  }
2017  }
2018  import_status.rows_completed++;
2019  } catch (const std::exception& e) {
2020  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2021  import_buffers[col_idx_to_pop]->pop_value();
2022  }
2023  import_status.rows_rejected++;
2024  LOG(ERROR) << "Input exception thrown: " << e.what()
2025  << ". Row discarded. Data: " << row;
2026  }
2027  };
2028 
2029  if (copy_params.geo_explode_collections) {
2030  // explode and import
2031  // @TODO(se) convert to structure-bindings when we can use C++17 here
2032  auto collection_idx_type_name = explode_collections_step1(col_descs);
2033  int collection_col_idx = std::get<0>(collection_idx_type_name);
2034  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2035  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2036  // pull out the collection WKT
2037  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2038  auto const& collection_wkt = row[collection_col_idx];
2039  // convert to OGR
2040  OGRGeometry* ogr_geometry = nullptr;
2041  ScopeGuard destroy_ogr_geometry = [&] {
2042  if (ogr_geometry) {
2043  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2044  }
2045  };
2046  OGRErr ogr_status = OGRGeometryFactory::createFromWkt(
2047  collection_wkt.c_str(), nullptr, &ogr_geometry);
2048  if (ogr_status != OGRERR_NONE) {
2049  throw std::runtime_error("Failed to convert WKT to geometry");
2050  }
2051  // do the explode and import
2052  us = explode_collections_step2(ogr_geometry,
2053  collection_child_type,
2054  collection_col_name,
2055  first_row_index_this_buffer + row_index_plus_one,
2056  execute_import_row);
2057  } else {
2058  // import non-collection row just once
2060  [&] { execute_import_row(nullptr); });
2061  }
2062  total_str_to_val_time_us += us;
2063  } // end thread
2064  if (import_status.rows_completed > 0) {
2065  load_ms = measure<>::execution(
2066  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2067  }
2068  });
2069  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2070  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2071  << import_status.rows_completed << " rows inserted in "
2072  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2073  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2074  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2075  << "sec" << std::endl;
2076  }
2077 
2078  import_status.thread_id = thread_id;
2079  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2080 
2081  return import_status;
2082 }
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1653
SQLTypes
Definition: sqltypes.h:41
static const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const Importer_NS::CopyParams &copy_params, const bool *is_array, std::vector< std::string > &row, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
#define LOG(tag)
Definition: Logger.h:185
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:138
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1687
CHECK(cgen_state)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords)
Definition: Importer.cpp:1402
#define CHECK_LT(x, y)
Definition: Logger.h:200
bool is_null(const T &v, const SQLTypeInfo &t)
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
#define IS_GEO(T)
Definition: sqltypes.h:167

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus Importer_NS::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2084 of file Importer.cpp.

References CHECK(), compress_coords(), DEBUG_TIMING, logger::ERROR, Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step1(), Importer_NS::anonymous_namespace{Importer.cpp}::explode_collections_step2(), Importer_NS::CopyParams::geo_explode_collections, Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Geo_namespace::GeoTypesFactory::getGeoColumns(), logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::ImportStatus::thread_id, timer_start(), timer_stop(), and to_string().

Referenced by Importer_NS::Importer::importGDAL().

2093  {
2094  ImportStatus import_status;
2095  const CopyParams& copy_params = importer->get_copy_params();
2096  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2097  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2098  importer->get_import_buffers(thread_id);
2099 
2100  for (const auto& p : import_buffers) {
2101  p->clear();
2102  }
2103 
2104  auto convert_timer = timer_start();
2105 
2106  // we create this on the fly based on the first feature's SR
2107  std::unique_ptr<OGRCoordinateTransformation> coordinate_transformation;
2108 
2109  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2110  if (!features[iFeature]) {
2111  continue;
2112  }
2113 
2114  // get this feature's geometry
2115  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2116  if (pGeometry) {
2117  // for geodatabase, we need to consider features with no geometry
2118  // as we still want to create a table, even if it has no geo column
2119 
2120  // transform it
2121  // avoid GDAL error if not transformable
2122  auto geometry_sr = pGeometry->getSpatialReference();
2123  if (geometry_sr) {
2124  // create an OGRCoordinateTransformation (CT) on the fly
2125  // we must assume that all geo in this file will have
2126  // the same source SR, so the CT will be valid for all
2127  // transforming to a reusable CT is faster than to an SR
2128  if (coordinate_transformation == nullptr) {
2129  coordinate_transformation.reset(
2130  OGRCreateCoordinateTransformation(geometry_sr, poGeographicSR));
2131  if (coordinate_transformation == nullptr) {
2132  throw std::runtime_error(
2133  "Failed to create a GDAL CoordinateTransformation for incoming geo");
2134  }
2135  }
2136  pGeometry->transform(coordinate_transformation.get());
2137  }
2138  }
2139 
2140  //
2141  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2142  //
2143 
2144  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2145  size_t col_idx = 0;
2146  try {
2147  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2148  auto cd = *cd_it;
2149 
2150  // is this a geo column?
2151  const auto& col_ti = cd->columnType;
2152  if (col_ti.is_geometry()) {
2153  // some Shapefiles get us here, but the OGRGeometryRef is null
2154  if (!import_geometry) {
2155  std::string msg = "Geometry feature " +
2156  std::to_string(firstFeature + iFeature + 1) +
2157  " has null GeometryRef";
2158  throw std::runtime_error(msg);
2159  }
2160 
2161  // Note that this assumes there is one and only one geo column in the table.
2162  // Currently, the importer only supports reading a single geospatial feature
2163  // from an input shapefile / geojson file, but this code will need to be
2164  // modified if that changes
2165  SQLTypes col_type = col_ti.get_type();
2166 
2167  // store null string in the base column
2168  import_buffers[col_idx]->add_value(
2169  cd, copy_params.null_str, true, copy_params);
2170  ++col_idx;
2171 
2172  // the data we now need to extract for the other columns
2173  std::vector<double> coords;
2174  std::vector<double> bounds;
2175  std::vector<int> ring_sizes;
2176  std::vector<int> poly_rings;
2177  int render_group = 0;
2178 
2179  // extract it
2180  SQLTypeInfo import_ti;
2181 
2183  import_geometry,
2184  import_ti,
2185  coords,
2186  bounds,
2187  ring_sizes,
2188  poly_rings,
2190  std::string msg = "Failed to extract valid geometry from feature " +
2191  std::to_string(firstFeature + iFeature + 1) +
2192  " for column " + cd->columnName;
2193  throw std::runtime_error(msg);
2194  }
2195 
2196  // validate types
2197  if (col_type != import_ti.get_type()) {
2199  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2200  col_type == SQLTypes::kMULTIPOLYGON)) {
2201  throw std::runtime_error(
2202  "Imported geometry doesn't match the type of column " +
2203  cd->columnName);
2204  }
2205  }
2206 
2207  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2208  if (ring_sizes.size()) {
2209  // get a suitable render group for these poly coords
2210  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2211  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2212  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2213  } else {
2214  // empty poly
2215  render_group = -1;
2216  }
2217  }
2218 
2219  // create coords array value and add it to the physical column
2220  ++cd_it;
2221  auto cd_coords = *cd_it;
2222  std::vector<TDatum> td_coord_data;
2223  std::vector<uint8_t> compressed_coords = compress_coords(coords, col_ti);
2224  for (auto cc : compressed_coords) {
2225  TDatum td_byte;
2226  td_byte.val.int_val = cc;
2227  td_coord_data.push_back(td_byte);
2228  }
2229  TDatum tdd_coords;
2230  tdd_coords.val.arr_val = td_coord_data;
2231  tdd_coords.is_null = false;
2232  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2233  ++col_idx;
2234 
2235  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2236  // Create ring_sizes array value and add it to the physical column
2237  ++cd_it;
2238  auto cd_ring_sizes = *cd_it;
2239  std::vector<TDatum> td_ring_sizes;
2240  for (auto ring_size : ring_sizes) {
2241  TDatum td_ring_size;
2242  td_ring_size.val.int_val = ring_size;
2243  td_ring_sizes.push_back(td_ring_size);
2244  }
2245  TDatum tdd_ring_sizes;
2246  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2247  tdd_ring_sizes.is_null = false;
2248  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2249  ++col_idx;
2250  }
2251 
2252  if (col_type == kMULTIPOLYGON) {
2253  // Create poly_rings array value and add it to the physical column
2254  ++cd_it;
2255  auto cd_poly_rings = *cd_it;
2256  std::vector<TDatum> td_poly_rings;
2257  for (auto num_rings : poly_rings) {
2258  TDatum td_num_rings;
2259  td_num_rings.val.int_val = num_rings;
2260  td_poly_rings.push_back(td_num_rings);
2261  }
2262  TDatum tdd_poly_rings;
2263  tdd_poly_rings.val.arr_val = td_poly_rings;
2264  tdd_poly_rings.is_null = false;
2265  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2266  ++col_idx;
2267  }
2268 
2269  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2270  col_type == kMULTIPOLYGON) {
2271  // Create bounds array value and add it to the physical column
2272  ++cd_it;
2273  auto cd_bounds = *cd_it;
2274  std::vector<TDatum> td_bounds_data;
2275  for (auto b : bounds) {
2276  TDatum td_double;
2277  td_double.val.real_val = b;
2278  td_bounds_data.push_back(td_double);
2279  }
2280  TDatum tdd_bounds;
2281  tdd_bounds.val.arr_val = td_bounds_data;
2282  tdd_bounds.is_null = false;
2283  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2284  ++col_idx;
2285  }
2286 
2287  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2288  // Create render_group value and add it to the physical column
2289  ++cd_it;
2290  auto cd_render_group = *cd_it;
2291  TDatum td_render_group;
2292  td_render_group.val.int_val = render_group;
2293  td_render_group.is_null = false;
2294  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2295  ++col_idx;
2296  }
2297  } else {
2298  // regular column
2299  // pull from GDAL metadata
2300  const auto cit = columnNameToSourceNameMap.find(cd->columnName);
2301  CHECK(cit != columnNameToSourceNameMap.end());
2302  const std::string& fieldName = cit->second;
2303  const auto fit = fieldNameToIndexMap.find(fieldName);
2304  CHECK(fit != fieldNameToIndexMap.end());
2305  size_t iField = fit->second;
2306  CHECK(iField < fieldNameToIndexMap.size());
2307  std::string fieldContents = features[iFeature]->GetFieldAsString(iField);
2308  import_buffers[col_idx]->add_value(cd, fieldContents, false, copy_params);
2309  ++col_idx;
2310  }
2311  }
2312  import_status.rows_completed++;
2313  } catch (const std::exception& e) {
2314  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2315  import_buffers[col_idx_to_pop]->pop_value();
2316  }
2317  import_status.rows_rejected++;
2318  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2319  }
2320  };
2321 
2322  if (pGeometry && copy_params.geo_explode_collections) {
2323  // explode and import
2324  // @TODO(se) convert to structure-bindings when we can use C++17 here
2325  auto collection_idx_type_name = explode_collections_step1(col_descs);
2326  SQLTypes collection_child_type = std::get<1>(collection_idx_type_name);
2327  std::string collection_col_name = std::get<2>(collection_idx_type_name);
2328  explode_collections_step2(pGeometry,
2329  collection_child_type,
2330  collection_col_name,
2331  firstFeature + iFeature + 1,
2332  execute_import_feature);
2333  } else {
2334  // import non-collection or null feature just once
2335  execute_import_feature(pGeometry);
2336  }
2337  } // end features
2338 
2339  float convert_ms =
2340  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2341  convert_timer)) /
2342  1000.0f;
2343 
2344  float load_ms = 0.0f;
2345  if (import_status.rows_completed > 0) {
2346  auto load_timer = timer_start();
2347  importer->load(import_buffers, import_status.rows_completed);
2348  load_ms =
2349  float(
2350  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2351  load_timer)) /
2352  1000.0f;
2353  }
2354 
2355  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2356  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2357  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2358  }
2359 
2360  import_status.thread_id = thread_id;
2361 
2362  if (DEBUG_TIMING) {
2363  LOG(INFO) << "DEBUG: Total "
2364  << float(timer_stop<std::chrono::steady_clock::time_point,
2365  std::chrono::microseconds>(convert_timer)) /
2366  1000.0f
2367  << "ms";
2368  }
2369 
2370  return import_status;
2371 }
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1653
SQLTypes
Definition: sqltypes.h:41
#define LOG(tag)
Definition: Logger.h:185
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Importer.cpp:1422
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:138
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1687
CHECK(cgen_state)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:144
Type timer_start()
Definition: measure.h:40

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool Importer_NS::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords 
)

Definition at line 1402 of file Importer.cpp.

Referenced by import_thread_delimited().

1402  {
1403  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1404  return false;
1405  }
1406  // we don't need to do any coordinate-system transformation
1407  // here (yet) so we don't need to use any OGR API or types
1408  // just use the values directly (assumed to be in 4326)
1409  coords.push_back(lon);
1410  coords.push_back(lat);
1411  return true;
1412 }

+ Here is the caller graph for this function:

ArrayDatum Importer_NS::NullArray ( const SQLTypeInfo ti)

Definition at line 403 of file Importer.cpp.

References appendDatum(), CHECK(), checked_malloc(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), NullArrayDatum(), and NullDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value(), Importer_NS::TypedImportBuffer::add_values(), Importer_NS::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

403  {
404  SQLTypeInfo elem_ti = ti.get_elem_type();
405  auto len = ti.get_size();
406 
407  if (elem_ti.is_string()) {
408  // must not be called for array of strings
409  CHECK(false);
410  return ArrayDatum(0, NULL, true);
411  }
412 
413  if (len > 0) {
414  // Compose a NULL fixlen array
415  int8_t* buf = (int8_t*)checked_malloc(len);
416  // First scalar is a NULL_ARRAY sentinel
417  Datum d = NullArrayDatum(elem_ti);
418  int8_t* p = appendDatum(buf, d, elem_ti);
419  // Rest is filled with normal NULL sentinels
420  Datum d0 = NullDatum(elem_ti);
421  while ((p - buf) < len) {
422  p = appendDatum(p, d0, elem_ti);
423  }
424  CHECK((p - buf) == len);
425  return ArrayDatum(len, buf, true);
426  }
427  // NULL varlen array
428  return ArrayDatum(0, NULL, true);
429 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:268
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:232
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:309
bool is_string() const
Definition: sqltypes.h:477
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum Importer_NS::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 309 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

309  {
310  Datum d;
311  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
312  switch (type) {
313  case kBOOLEAN:
315  break;
316  case kBIGINT:
318  break;
319  case kINT:
321  break;
322  case kSMALLINT:
324  break;
325  case kTINYINT:
327  break;
328  case kFLOAT:
330  break;
331  case kDOUBLE:
333  break;
334  case kTIME:
335  case kTIMESTAMP:
336  case kDATE:
338  break;
339  case kPOINT:
340  case kLINESTRING:
341  case kPOLYGON:
342  case kMULTIPOLYGON:
343  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
344  default:
345  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
346  }
347  return d;
348 }
int8_t tinyintval
Definition: sqltypes.h:126
Definition: sqltypes.h:52
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:187
bool boolval
Definition: sqltypes.h:125
int32_t intval
Definition: sqltypes.h:128
float floatval
Definition: sqltypes.h:130
int64_t bigintval
Definition: sqltypes.h:129
int16_t smallintval
Definition: sqltypes.h:127
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:56
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:48
bool is_decimal() const
Definition: sqltypes.h:480
double doubleval
Definition: sqltypes.h:131
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:186

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum Importer_NS::NullDatum ( SQLTypeInfo ti)

Definition at line 268 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

268  {
269  Datum d;
270  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
271  switch (type) {
272  case kBOOLEAN:
274  break;
275  case kBIGINT:
277  break;
278  case kINT:
280  break;
281  case kSMALLINT:
283  break;
284  case kTINYINT:
286  break;
287  case kFLOAT:
288  d.floatval = NULL_FLOAT;
289  break;
290  case kDOUBLE:
292  break;
293  case kTIME:
294  case kTIMESTAMP:
295  case kDATE:
297  break;
298  case kPOINT:
299  case kLINESTRING:
300  case kPOLYGON:
301  case kMULTIPOLYGON:
302  throw std::runtime_error("Internal error: geometry type in NullDatum.");
303  default:
304  throw std::runtime_error("Internal error: invalid type in NullDatum.");
305  }
306  return d;
307 }
int8_t tinyintval
Definition: sqltypes.h:126
#define NULL_DOUBLE
Definition: sqltypes.h:179
Definition: sqltypes.h:52
bool boolval
Definition: sqltypes.h:125
int32_t intval
Definition: sqltypes.h:128
float floatval
Definition: sqltypes.h:130
int64_t bigintval
Definition: sqltypes.h:129
#define NULL_FLOAT
Definition: sqltypes.h:178
int16_t smallintval
Definition: sqltypes.h:127
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:56
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:48
bool is_decimal() const
Definition: sqltypes.h:480
double doubleval
Definition: sqltypes.h:131

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<SQLTypes, bool> Importer_NS::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4284 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, and to_string().

Referenced by Importer_NS::Importer::gdalToColumnDescriptors().

4284  {
4285  switch (ogr_type) {
4286  case OFTInteger:
4287  return std::make_pair(kINT, false);
4288  case OFTIntegerList:
4289  return std::make_pair(kINT, true);
4290 #if GDAL_VERSION_MAJOR > 1
4291  case OFTInteger64:
4292  return std::make_pair(kBIGINT, false);
4293  case OFTInteger64List:
4294  return std::make_pair(kBIGINT, true);
4295 #endif
4296  case OFTReal:
4297  return std::make_pair(kDOUBLE, false);
4298  case OFTRealList:
4299  return std::make_pair(kDOUBLE, true);
4300  case OFTString:
4301  return std::make_pair(kTEXT, false);
4302  case OFTStringList:
4303  return std::make_pair(kTEXT, true);
4304  case OFTDate:
4305  return std::make_pair(kDATE, false);
4306  case OFTTime:
4307  return std::make_pair(kTIME, false);
4308  case OFTDateTime:
4309  return std::make_pair(kTIMESTAMP, false);
4310  case OFTBinary:
4311  default:
4312  break;
4313  }
4314  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4315 }
Definition: sqltypes.h:52
std::string to_string(char const *&&v)
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Definition: sqltypes.h:48

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes Importer_NS::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4317 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4317  {
4318  switch (ogr_type) {
4319  case wkbPoint:
4320  return kPOINT;
4321  case wkbLineString:
4322  return kLINESTRING;
4323  case wkbPolygon:
4324  return kPOLYGON;
4325  case wkbMultiPolygon:
4326  return kMULTIPOLYGON;
4327  default:
4328  break;
4329  }
4330  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4331 }
std::string to_string(char const *&&v)

+ Here is the call graph for this function:

ArrayDatum Importer_NS::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 350 of file Importer.cpp.

References appendDatum(), Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_delim, Importer_NS::CopyParams::array_end, CHECK(), checked_malloc(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_number(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), LOG, Importer_NS::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by Importer_NS::TypedImportBuffer::add_value().

352  {
353  SQLTypeInfo elem_ti = ti.get_elem_type();
354  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
355  return ArrayDatum(0, NULL, true);
356  }
357  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
358  LOG(WARNING) << "Malformed array: " << s;
359  return ArrayDatum(0, NULL, true);
360  }
361  std::vector<std::string> elem_strs;
362  size_t last = 1;
363  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
364  i = s.find(copy_params.array_delim, last)) {
365  elem_strs.push_back(s.substr(last, i - last));
366  last = i + 1;
367  }
368  if (last + 1 <= s.size()) {
369  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
370  }
371  if (elem_strs.size() == 1) {
372  auto str = elem_strs.front();
373  auto str_trimmed = trim_space(str.c_str(), str.length());
374  if (str_trimmed == "") {
375  elem_strs.clear(); // Empty array
376  }
377  }
378  if (!elem_ti.is_string()) {
379  size_t len = elem_strs.size() * elem_ti.get_size();
380  int8_t* buf = (int8_t*)checked_malloc(len);
381  int8_t* p = buf;
382  for (auto& es : elem_strs) {
383  auto e = trim_space(es.c_str(), es.length());
384  bool is_null = (e == copy_params.null_str) || e == "NULL";
385  if (!elem_ti.is_string() && e == "") {
386  is_null = true;
387  }
388  if (elem_ti.is_number() || elem_ti.is_time()) {
389  if (!isdigit(e[0]) && e[0] != '-') {
390  is_null = true;
391  }
392  }
393  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
394  p = appendDatum(p, d, elem_ti);
395  }
396  return ArrayDatum(len, buf, false);
397  }
398  // must not be called for array of strings
399  CHECK(false);
400  return ArrayDatum(0, NULL, true);
401 }
#define LOG(tag)
Definition: Logger.h:185
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
bool is_number() const
Definition: sqltypes.h:482
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:268
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:232
CHECK(cgen_state)
bool is_time() const
Definition: sqltypes.h:483
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
void trim_space(const char *&field_begin, const char *&field_end)
bool is_null(const T &v, const SQLTypeInfo &t)
Datum StringToDatum(const std::string &s, SQLTypeInfo &ti)
Definition: Datum.cpp:90
bool is_string() const
Definition: sqltypes.h:477
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum Importer_NS::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 487 of file Importer.cpp.

References appendDatum(), CHECK(), checked_malloc(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), NullArray(), and TDatumToDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value().

487  {
488  SQLTypeInfo elem_ti = ti.get_elem_type();
489 
490  CHECK(!elem_ti.is_string());
491 
492  if (datum.is_null) {
493  return NullArray(ti);
494  }
495 
496  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
497  int8_t* buf = (int8_t*)checked_malloc(len);
498  int8_t* p = buf;
499  for (auto& e : datum.val.arr_val) {
500  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
501  }
502 
503  return ArrayDatum(len, buf, false);
504 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:336
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:232
CHECK(cgen_state)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:403
bool is_string() const
Definition: sqltypes.h:477
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:659
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:442
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum Importer_NS::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 442 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

442  {
443  Datum d;
444  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
445  switch (type) {
446  case kBOOLEAN:
447  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
448  break;
449  case kBIGINT:
450  d.bigintval =
451  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
452  break;
453  case kINT:
454  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
455  break;
456  case kSMALLINT:
457  d.smallintval =
458  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
459  break;
460  case kTINYINT:
461  d.tinyintval =
462  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
463  break;
464  case kFLOAT:
465  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
466  break;
467  case kDOUBLE:
468  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
469  break;
470  case kTIME:
471  case kTIMESTAMP:
472  case kDATE:
473  d.bigintval =
474  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
475  break;
476  case kPOINT:
477  case kLINESTRING:
478  case kPOLYGON:
479  case kMULTIPOLYGON:
480  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
481  default:
482  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
483  }
484  return d;
485 }
int8_t tinyintval
Definition: sqltypes.h:126
#define NULL_DOUBLE
Definition: sqltypes.h:179
Definition: sqltypes.h:52
bool boolval
Definition: sqltypes.h:125
int32_t intval
Definition: sqltypes.h:128
float floatval
Definition: sqltypes.h:130
int64_t bigintval
Definition: sqltypes.h:129
#define NULL_FLOAT
Definition: sqltypes.h:178
int16_t smallintval
Definition: sqltypes.h:127
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:56
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:48
bool is_decimal() const
Definition: sqltypes.h:480
double doubleval
Definition: sqltypes.h:131

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string Importer_NS::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 220 of file Importer.cpp.

Referenced by Importer_NS::DelimitedParserUtils::get_row(), and StringToArray().

220  {
221  size_t i = 0;
222  size_t j = len;
223  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
224  i++;
225  }
226  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
227  j--;
228  }
229  return std::string(field + i, j - i);
230 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool Importer_NS::try_cast ( const std::string &  str)

Definition at line 2835 of file Importer.cpp.

2835  {
2836  try {
2837  boost::lexical_cast<T>(str);
2838  } catch (const boost::bad_lexical_cast& e) {
2839  return false;
2840  }
2841  return true;
2842 }
char* Importer_NS::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 2844 of file Importer.cpp.

Referenced by Importer_NS::Detector::detect_sqltype().

2844  {
2845  std::tm tm_struct;
2846  char* buf;
2847  for (auto format : formats) {
2848  buf = strptime(str, format.c_str(), &tm_struct);
2849  if (buf) {
2850  return buf;
2851  }
2852  }
2853  return nullptr;
2854 }

+ Here is the caller graph for this function:

Variable Documentation

std::map<std::string, ImportStatus> Importer_NS::import_status_map
static
constexpr size_t Importer_NS::kImportFileBufferSize = (1 << 23)
static

Definition at line 32 of file CopyParams.h.

constexpr bool Importer_NS::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static