OmniSciDB  5ade3759e0
Importer_NS Namespace Reference

Namespaces

 anonymous_namespace{Importer.cpp}
 

Classes

struct  BadRowsTracker
 
struct  CopyParams
 
class  DataStreamSink
 
class  Detector
 
struct  GeoImportException
 
class  ImportDriver
 
class  Importer
 
class  ImporterUtils
 
struct  ImportStatus
 
class  Loader
 
class  RenderGroupAnalyzer
 
class  TypedImportBuffer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer > >
 
using FeaturePtrVector = std::vector< OGRFeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 

Enumerations

enum  FileType { FileType::DELIMITED, FileType::POLYGON }
 
enum  ImportHeaderRow { ImportHeaderRow::AUTODETECT, ImportHeaderRow::NO_HEADER, ImportHeaderRow::HAS_HEADER }
 

Functions

std::vector< uint8_t > compress_coords (std::vector< double > &coords, const SQLTypeInfo &ti)
 
static const std::string trim_space (const char *field, const size_t len)
 
static const bool is_eol (const char &p, const std::string &line_delims)
 
static const char * get_row (const char *buf, const char *buf_end, const char *entire_buf_end, const CopyParams &copy_params, bool is_begin, const bool *is_array, std::vector< std::string > &row, bool &try_single_thread)
 
int8_t * appendDatum (int8_t *buf, Datum d, const SQLTypeInfo &ti)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
static size_t find_beginning (const char *buffer, size_t begin, size_t end, const CopyParams &copy_params)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords)
 
uint64_t compress_coord (double coord, const SQLTypeInfo &ti, bool x)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRSpatialReference *poGeographicSR, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap)
 
static size_t find_end (const char *buffer, size_t size, const CopyParams &copy_params)
 
template<class T >
bool try_cast (const std::string &str)
 
char * try_strptimes (const char *str, const std::vector< std::string > &formats)
 
void GDALErrorHandler (CPLErr eErrClass, int err_no, const char *msg)
 
std::pair< SQLTypes, bool > ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
bool parseStringArray (const std::string &s, const CopyParams &copy_params, std::vector< std::string > &string_vec)
 

Variables

static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string, ImportStatusimport_status_map
 

Typedef Documentation

◆ ArraySliceRange

using Importer_NS::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 76 of file Importer.h.

◆ ColumnIdToRenderGroupAnalyzerMapType

using Importer_NS::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer> >

Definition at line 132 of file Importer.cpp.

◆ ColumnNameToSourceNameMapType

using Importer_NS::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 130 of file Importer.cpp.

◆ FeaturePtrVector

using Importer_NS::FeaturePtrVector = typedef std::vector<OGRFeatureUqPtr>

Definition at line 133 of file Importer.cpp.

◆ FieldNameToIndexMapType

using Importer_NS::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 129 of file Importer.cpp.

Enumeration Type Documentation

◆ FileType

enum Importer_NS::FileType
strong
Enumerator
DELIMITED 
POLYGON 

Definition at line 87 of file Importer.h.

87  {
88  DELIMITED,
89  POLYGON
90 #ifdef ENABLE_IMPORT_PARQUET
91  ,
92  PARQUET
93 #endif
94 };

◆ ImportHeaderRow

Function Documentation

◆ addBinaryStringArray()

void Importer_NS::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 526 of file Importer.cpp.

Referenced by Importer_NS::TypedImportBuffer::add_value().

526  {
527  const auto& arr = datum.val.arr_val;
528  for (const auto& elem_datum : arr) {
529  string_vec.push_back(elem_datum.val.str_val);
530  }
531 }
+ Here is the caller graph for this function:

◆ appendDatum()

int8_t * Importer_NS::appendDatum ( int8_t *  buf,
Datum  d,
const SQLTypeInfo ti 
)

Definition at line 323 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, Datum::smallintval, and Datum::tinyintval.

Referenced by Executor::executeSimpleInsert(), anonymous_namespace{Execute.cpp}::insert_one_dict_str(), NullArray(), StringToArray(), and TDatumToArrayDatum().

323  {
324  switch (ti.get_type()) {
325  case kBOOLEAN:
326  *(bool*)buf = d.boolval;
327  return buf + sizeof(bool);
328  case kNUMERIC:
329  case kDECIMAL:
330  case kBIGINT:
331  *(int64_t*)buf = d.bigintval;
332  return buf + sizeof(int64_t);
333  case kINT:
334  *(int32_t*)buf = d.intval;
335  return buf + sizeof(int32_t);
336  case kSMALLINT:
337  *(int16_t*)buf = d.smallintval;
338  return buf + sizeof(int16_t);
339  case kTINYINT:
340  *(int8_t*)buf = d.tinyintval;
341  return buf + sizeof(int8_t);
342  case kFLOAT:
343  *(float*)buf = d.floatval;
344  return buf + sizeof(float);
345  case kDOUBLE:
346  *(double*)buf = d.doubleval;
347  return buf + sizeof(double);
348  case kTIME:
349  case kTIMESTAMP:
350  case kDATE:
351  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
352  return buf + sizeof(int64_t);
353  default:
354  return NULL;
355  }
356  return NULL;
357 }
int8_t tinyintval
Definition: sqltypes.h:123
Definition: sqltypes.h:51
bool boolval
Definition: sqltypes.h:122
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
int32_t intval
Definition: sqltypes.h:125
float floatval
Definition: sqltypes.h:127
int64_t bigintval
Definition: sqltypes.h:126
int16_t smallintval
Definition: sqltypes.h:124
Definition: sqltypes.h:55
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:128
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compress_coord()

uint64_t Importer_NS::compress_coord ( double  coord,
const SQLTypeInfo ti,
bool  x 
)

Definition at line 1538 of file Importer.cpp.

References Geo_namespace::compress_lattitude_coord_geoint32(), Geo_namespace::compress_longitude_coord_geoint32(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_comp_param(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), and kENCODING_GEOINT.

Referenced by compress_coords().

1538  {
1539  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
1542  }
1543  return *reinterpret_cast<uint64_t*>(may_alias_ptr(&coord));
1544 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
DEVICE uint64_t compress_longitude_coord_geoint32(const double coord)
DEVICE uint64_t compress_lattitude_coord_geoint32(const double coord)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:332
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compress_coords()

std::vector< uint8_t > Importer_NS::compress_coords ( std::vector< double > &  coords,
const SQLTypeInfo ti 
)

Definition at line 1546 of file Importer.cpp.

References compress_coord(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_comp_param(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_output_srid(), kENCODING_GEOINT, and to_string().

Referenced by Parser::InsertValuesStmt::analyze(), import_thread_shapefile(), Importer_NS::Importer::set_geo_physical_import_buffer(), Importer_NS::Importer::set_geo_physical_import_buffer_columnar(), GeoPointValueConverter::toCompressedCoords(), RelAlgTranslator::translateGeoColumn(), and RelAlgTranslator::translateGeoLiteral().

1546  {
1547  std::vector<uint8_t> compressed_coords;
1548  bool x = true;
1549  for (auto coord : coords) {
1550  auto coord_data_ptr = reinterpret_cast<uint64_t*>(&coord);
1551  uint64_t coord_data = *coord_data_ptr;
1552  size_t coord_data_size = sizeof(double);
1553 
1554  if (ti.get_output_srid() == 4326) {
1555  if (x) {
1556  if (coord < -180.0 || coord > 180.0) {
1557  throw std::runtime_error("WGS84 longitude " + std::to_string(coord) +
1558  " is out of bounds");
1559  }
1560  } else {
1561  if (coord < -90.0 || coord > 90.0) {
1562  throw std::runtime_error("WGS84 latitude " + std::to_string(coord) +
1563  " is out of bounds");
1564  }
1565  }
1566  if (ti.get_compression() == kENCODING_GEOINT && ti.get_comp_param() == 32) {
1567  coord_data = compress_coord(coord, ti, x);
1568  coord_data_size = ti.get_comp_param() / 8;
1569  }
1570  x = !x;
1571  }
1572 
1573  for (size_t i = 0; i < coord_data_size; i++) {
1574  compressed_coords.push_back(coord_data & 0xFF);
1575  coord_data >>= 8;
1576  }
1577  }
1578  return compressed_coords;
1579 }
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
std::string to_string(char const *&&v)
uint64_t compress_coord(double coord, const SQLTypeInfo &ti, bool x)
Definition: Importer.cpp:1538
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:329
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:332
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ find_beginning()

static size_t Importer_NS::find_beginning ( const char *  buffer,
size_t  begin,
size_t  end,
const CopyParams copy_params 
)
static

Definition at line 597 of file Importer.cpp.

References Importer_NS::CopyParams::line_delim.

Referenced by import_thread_delimited().

600  {
601  // @TODO(wei) line_delim is in quotes note supported
602  if (begin == 0 || (begin > 0 && buffer[begin - 1] == copy_params.line_delim)) {
603  return 0;
604  }
605  size_t i;
606  const char* buf = buffer + begin;
607  for (i = 0; i < end - begin; i++) {
608  if (buf[i] == copy_params.line_delim) {
609  return i + 1;
610  }
611  }
612  return i;
613 }
+ Here is the caller graph for this function:

◆ find_end()

static size_t Importer_NS::find_end ( const char *  buffer,
size_t  size,
const CopyParams copy_params 
)
static

Definition at line 2269 of file Importer.cpp.

References logger::ERROR, Importer_NS::CopyParams::line_delim, and LOG.

Referenced by Importer_NS::Importer::importDelimited().

2269  {
2270  int i;
2271  // @TODO(wei) line_delim is in quotes note supported
2272  for (i = size - 1; i >= 0 && buffer[i] != copy_params.line_delim; i--) {
2273  ;
2274  }
2275 
2276  if (i < 0) {
2277  int slen = size < 50 ? size : 50;
2278  std::string showMsgStr(buffer, buffer + slen);
2279  LOG(ERROR) << "No line delimiter in block. Block was of size " << size
2280  << " bytes, first few characters " << showMsgStr;
2281  return size;
2282  }
2283  return i + 1;
2284 }
#define LOG(tag)
Definition: Logger.h:182
+ Here is the caller graph for this function:

◆ GDALErrorHandler()

void Importer_NS::GDALErrorHandler ( CPLErr  eErrClass,
int  err_no,
const char *  msg 
)

Definition at line 3948 of file Importer.cpp.

References CHECK, logger::INFO, Importer_NS::Importer::init_gdal_mutex, LOG, and to_string().

Referenced by Importer_NS::Importer::initGDAL().

3948  {
3949  CHECK(eErrClass >= CE_None && eErrClass <= CE_Fatal);
3950  static const char* errClassStrings[5] = {
3951  "Info",
3952  "Debug",
3953  "Warning",
3954  "Failure",
3955  "Fatal",
3956  };
3957  std::string log_msg = std::string("GDAL ") + errClassStrings[eErrClass] +
3958  std::string(": ") + msg + std::string(" (") +
3959  std::to_string(err_no) + std::string(")");
3960  if (eErrClass >= CE_Failure) {
3961  throw std::runtime_error(log_msg);
3962  } else {
3963  LOG(INFO) << log_msg;
3964  }
3965 }
#define LOG(tag)
Definition: Logger.h:182
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ gdalGatherFilesInArchiveRecursive()

void Importer_NS::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 4377 of file Importer.cpp.

References LOG, run-benchmark-import::result, and logger::WARNING.

Referenced by Importer_NS::Importer::gdalGetAllFilesInArchive().

4378  {
4379  // prepare to gather subdirectories
4380  std::vector<std::string> subdirectories;
4381 
4382  // get entries
4383  char** entries = VSIReadDir(archive_path.c_str());
4384  if (!entries) {
4385  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
4386  return;
4387  }
4388 
4389  // force scope
4390  {
4391  // request clean-up
4392  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
4393 
4394  // check all the entries
4395  int index = 0;
4396  while (true) {
4397  // get next entry, or drop out if there isn't one
4398  char* entry_c = entries[index++];
4399  if (!entry_c) {
4400  break;
4401  }
4402  std::string entry(entry_c);
4403 
4404  // ignore '.' and '..'
4405  if (entry == "." || entry == "..") {
4406  continue;
4407  }
4408 
4409  // build the full path
4410  std::string entry_path = archive_path + std::string("/") + entry;
4411 
4412  // is it a file or a sub-folder
4413  VSIStatBufL sb;
4414  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
4415  if (result < 0) {
4416  break;
4417  }
4418 
4419  if (VSI_ISDIR(sb.st_mode)) {
4420  // a directory that ends with .gdb could be a Geodatabase bundle
4421  // arguably dangerous to decide this purely by name, but any further
4422  // validation would be very complex especially at this scope
4423  if (boost::iends_with(entry_path, ".gdb")) {
4424  // add the directory as if it was a file and don't recurse into it
4425  files.push_back(entry_path);
4426  } else {
4427  // add subdirectory to be recursed into
4428  subdirectories.push_back(entry_path);
4429  }
4430  } else {
4431  // add this file
4432  files.push_back(entry_path);
4433  }
4434  }
4435  }
4436 
4437  // recurse into each subdirectories we found
4438  for (const auto& subdirectory : subdirectories) {
4439  gdalGatherFilesInArchiveRecursive(subdirectory, files);
4440  }
4441 }
#define LOG(tag)
Definition: Logger.h:182
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:4377
+ Here is the caller graph for this function:

◆ get_row()

static const char* Importer_NS::get_row ( const char *  buf,
const char *  buf_end,
const char *  entire_buf_end,
const CopyParams copy_params,
bool  is_begin,
const bool *  is_array,
std::vector< std::string > &  row,
bool &  try_single_thread 
)
static

Definition at line 238 of file Importer.cpp.

References Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_end, Importer_NS::CopyParams::delimiter, logger::ERROR, Importer_NS::CopyParams::escape, field(), is_eol(), Importer_NS::CopyParams::line_delim, LOG, Importer_NS::CopyParams::quote, Importer_NS::CopyParams::quoted, Importer_NS::CopyParams::threads, and trim_space().

Referenced by import_thread_delimited(), and Importer_NS::Detector::split_raw_data().

245  {
246  const char* field = buf;
247  const char* p;
248  bool in_quote = false;
249  bool in_array = false;
250  bool has_escape = false;
251  bool strip_quotes = false;
252  try_single_thread = false;
253  std::string line_endings({copy_params.line_delim, '\r', '\n'});
254  for (p = buf; p < entire_buf_end; p++) {
255  if (*p == copy_params.escape && p < entire_buf_end - 1 &&
256  *(p + 1) == copy_params.quote) {
257  p++;
258  has_escape = true;
259  } else if (copy_params.quoted && *p == copy_params.quote) {
260  in_quote = !in_quote;
261  if (in_quote) {
262  strip_quotes = true;
263  }
264  } else if (!in_quote && is_array != nullptr && *p == copy_params.array_begin &&
265  is_array[row.size()]) {
266  in_array = true;
267  } else if (!in_quote && is_array != nullptr && *p == copy_params.array_end &&
268  is_array[row.size()]) {
269  in_array = false;
270  } else if (*p == copy_params.delimiter || is_eol(*p, line_endings)) {
271  if (!in_quote && !in_array) {
272  if (!has_escape && !strip_quotes) {
273  std::string s = trim_space(field, p - field);
274  row.push_back(s);
275  } else {
276  auto field_buf = std::make_unique<char[]>(p - field + 1);
277  int j = 0, i = 0;
278  for (; i < p - field; i++, j++) {
279  if (has_escape && field[i] == copy_params.escape &&
280  field[i + 1] == copy_params.quote) {
281  field_buf[j] = copy_params.quote;
282  i++;
283  } else {
284  field_buf[j] = field[i];
285  }
286  }
287  std::string s = trim_space(field_buf.get(), j);
288  if (copy_params.quoted && s.size() > 0 && s.front() == copy_params.quote) {
289  s.erase(0, 1);
290  }
291  if (copy_params.quoted && s.size() > 0 && s.back() == copy_params.quote) {
292  s.pop_back();
293  }
294  row.push_back(s);
295  }
296  field = p + 1;
297  has_escape = false;
298  strip_quotes = false;
299  }
300  if (is_eol(*p, line_endings) &&
301  ((!in_quote && !in_array) || copy_params.threads != 1)) {
302  while (p + 1 < buf_end && is_eol(*(p + 1), line_endings)) {
303  p++;
304  }
305  break;
306  }
307  }
308  }
309  /*
310  @TODO(wei) do error handling
311  */
312  if (in_quote) {
313  LOG(ERROR) << "Unmatched quote.";
314  try_single_thread = true;
315  }
316  if (in_array) {
317  LOG(ERROR) << "Unmatched array.";
318  try_single_thread = true;
319  }
320  return p;
321 }
static const bool is_eol(const char &p, const std::string &line_delims)
Definition: Importer.cpp:229
#define LOG(tag)
Definition: Logger.h:182
static const std::string trim_space(const char *field, const size_t len)
Definition: Importer.cpp:217
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ import_thread_delimited()

static ImportStatus Importer_NS::import_thread_delimited ( int  thread_id,
Importer importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer 
)
static

Definition at line 1775 of file Importer.cpp.

References Importer_NS::Importer::buffer, CHECK, Importer_NS::DataStreamSink::copy_params, DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), find_beginning(), Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), Importer_NS::Importer::get_is_array(), get_row(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Importer_NS::Importer::getCatalog(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Importer_NS::DataStreamSink::import_status, importGeoFromLonLat(), logger::INFO, IS_GEO, anonymous_namespace{TypedDataAccessors.h}::is_null(), kMULTIPOLYGON, kPOINT, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::lonlat, Importer_NS::CopyParams::max_reject, Importer_NS::CopyParams::null_str, Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::Importer::set_geo_physical_import_buffer(), Importer_NS::ImportStatus::thread_id, and to_string().

Referenced by Importer_NS::Importer::importDelimited().

1783  {
1784  ImportStatus import_status;
1785  int64_t total_get_row_time_us = 0;
1786  int64_t total_str_to_val_time_us = 0;
1787  CHECK(scratch_buffer);
1788  auto buffer = scratch_buffer.get();
1789  auto load_ms = measure<>::execution([]() {});
1790  auto ms = measure<>::execution([&]() {
1791  const CopyParams& copy_params = importer->get_copy_params();
1792  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
1793  size_t begin = find_beginning(buffer, begin_pos, end_pos, copy_params);
1794  const char* thread_buf = buffer + begin_pos + begin;
1795  const char* thread_buf_end = buffer + end_pos;
1796  const char* buf_end = buffer + total_size;
1797  bool try_single_thread = false;
1798  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
1799  importer->get_import_buffers(thread_id);
1801  int phys_cols = 0;
1802  int point_cols = 0;
1803  for (const auto cd : col_descs) {
1804  const auto& col_ti = cd->columnType;
1805  phys_cols += col_ti.get_physical_cols();
1806  if (cd->columnType.get_type() == kPOINT) {
1807  point_cols++;
1808  }
1809  }
1810  auto num_cols = col_descs.size() - phys_cols;
1811  for (const auto& p : import_buffers) {
1812  p->clear();
1813  }
1814  std::vector<std::string> row;
1815  size_t row_index_plus_one = 0;
1816  for (const char* p = thread_buf; p < thread_buf_end; p++) {
1817  row.clear();
1818  if (DEBUG_TIMING) {
1820  p = get_row(p,
1821  thread_buf_end,
1822  buf_end,
1823  copy_params,
1824  p == thread_buf,
1825  importer->get_is_array(),
1826  row,
1827  try_single_thread);
1828  });
1829  total_get_row_time_us += us;
1830  } else {
1831  p = get_row(p,
1832  thread_buf_end,
1833  buf_end,
1834  copy_params,
1835  p == thread_buf,
1836  importer->get_is_array(),
1837  row,
1838  try_single_thread);
1839  }
1840  row_index_plus_one++;
1841  // Each POINT could consume two separate coords instead of a single WKT
1842  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
1843  import_status.rows_rejected++;
1844  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
1845  << row.size() << "): " << row;
1846  if (import_status.rows_rejected > copy_params.max_reject) {
1847  break;
1848  }
1849  continue;
1850  }
1852  size_t import_idx = 0;
1853  size_t col_idx = 0;
1854  try {
1855  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
1856  auto cd = *cd_it;
1857  const auto& col_ti = cd->columnType;
1858  if (col_ti.get_physical_cols() == 0) {
1859  // not geo
1860 
1861  // store the string (possibly null)
1862  bool is_null =
1863  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
1864  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
1865  // So initially nullness may be missed and not passed to add_value,
1866  // which then might also check and still decide it's actually a NULL, e.g.
1867  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
1868  // So "NULL" is not recognized as NULL but then it's not recognized as
1869  // a valid kINT, so it's a NULL after all.
1870  // Checking for "NULL" here too, as a widely accepted notation for NULL.
1871  if (!cd->columnType.is_string() && row[import_idx].empty()) {
1872  is_null = true;
1873  }
1874  import_buffers[col_idx]->add_value(
1875  cd, row[import_idx], is_null, copy_params);
1876 
1877  // next
1878  ++import_idx;
1879  ++col_idx;
1880  } else {
1881  // geo
1882 
1883  // store null string in the base column
1884  import_buffers[col_idx]->add_value(
1885  cd, copy_params.null_str, true, copy_params);
1886 
1887  // WKT from string we're not storing
1888  std::string wkt{row[import_idx]};
1889 
1890  // next
1891  ++import_idx;
1892  ++col_idx;
1893 
1894  SQLTypes col_type = col_ti.get_type();
1895  CHECK(IS_GEO(col_type));
1896 
1897  std::vector<double> coords;
1898  std::vector<double> bounds;
1899  std::vector<int> ring_sizes;
1900  std::vector<int> poly_rings;
1901  int render_group = 0;
1902 
1903  if (col_type == kPOINT && wkt.size() > 0 &&
1904  (wkt[0] == '.' || isdigit(wkt[0]) || wkt[0] == '-')) {
1905  // Invalid WKT, looks more like a scalar.
1906  // Try custom POINT import: from two separate scalars rather than WKT
1907  // string
1908  double lon = std::atof(wkt.c_str());
1909  double lat = NAN;
1910  std::string lat_str{row[import_idx]};
1911  ++import_idx;
1912  if (lat_str.size() > 0 &&
1913  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
1914  lat = std::atof(lat_str.c_str());
1915  }
1916  // Swap coordinates if this table uses a reverse order: lat/lon
1917  if (!copy_params.lonlat) {
1918  std::swap(lat, lon);
1919  }
1920  // TODO: should check if POINT column should have been declared with SRID
1921  // WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
1922  // throw std::runtime_error("POINT column " + cd->columnName + " is not
1923  // WGS84, cannot insert lon/lat");
1924  // }
1925  if (!importGeoFromLonLat(lon, lat, coords)) {
1926  throw std::runtime_error(
1927  "Cannot read lon/lat to insert into POINT column " +
1928  cd->columnName);
1929  }
1930  } else {
1931  // import it
1932  SQLTypeInfo import_ti;
1934  wkt,
1935  import_ti,
1936  coords,
1937  bounds,
1938  ring_sizes,
1939  poly_rings,
1941  std::string msg =
1942  "Failed to extract valid geometry from row " +
1943  std::to_string(first_row_index_this_buffer + row_index_plus_one) +
1944  " for column " + cd->columnName;
1945  throw std::runtime_error(msg);
1946  }
1947 
1948  // validate types
1949  if (col_type != import_ti.get_type()) {
1951  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
1952  col_type == SQLTypes::kMULTIPOLYGON)) {
1953  throw std::runtime_error(
1954  "Imported geometry doesn't match the type of column " +
1955  cd->columnName);
1956  }
1957  }
1958 
1959  if (columnIdToRenderGroupAnalyzerMap.size()) {
1960  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
1961  if (ring_sizes.size()) {
1962  // get a suitable render group for these poly coords
1963  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
1964  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
1965  render_group =
1966  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
1967  } else {
1968  // empty poly
1969  render_group = -1;
1970  }
1971  }
1972  }
1973  }
1974 
1975  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
1976  cd,
1977  import_buffers,
1978  col_idx,
1979  coords,
1980  bounds,
1981  ring_sizes,
1982  poly_rings,
1983  render_group);
1984  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
1985  ++cd_it;
1986  }
1987  }
1988  }
1989  import_status.rows_completed++;
1990  } catch (const std::exception& e) {
1991  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
1992  import_buffers[col_idx_to_pop]->pop_value();
1993  }
1994  import_status.rows_rejected++;
1995  LOG(ERROR) << "Input exception thrown: " << e.what()
1996  << ". Row discarded. Data: " << row;
1997  }
1998  });
1999  total_str_to_val_time_us += us;
2000  }
2001  if (import_status.rows_completed > 0) {
2002  load_ms = measure<>::execution(
2003  [&]() { importer->load(import_buffers, import_status.rows_completed); });
2004  }
2005  });
2006  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2007  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2008  << import_status.rows_completed << " rows inserted in "
2009  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2010  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2011  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2012  << "sec" << std::endl;
2013  }
2014 
2015  import_status.thread_id = thread_id;
2016  // LOG(INFO) << " return " << import_status.thread_id << std::endl;
2017 
2018  return import_status;
2019 }
SQLTypes
Definition: sqltypes.h:40
#define LOG(tag)
Definition: Logger.h:182
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
std::string null_str
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:135
static const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const CopyParams &copy_params, bool is_begin, const bool *is_array, std::vector< std::string > &row, bool &try_single_thread)
Definition: Importer.cpp:238
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords)
Definition: Importer.cpp:1519
bool is_null(const T &v, const SQLTypeInfo &t)
static size_t find_beginning(const char *buffer, size_t begin, size_t end, const CopyParams &copy_params)
Definition: Importer.cpp:597
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
#define CHECK(condition)
Definition: Logger.h:187
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:141
#define IS_GEO(T)
Definition: sqltypes.h:164
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ import_thread_shapefile()

static ImportStatus Importer_NS::import_thread_shapefile ( int  thread_id,
Importer importer,
OGRSpatialReference *  poGeographicSR,
const FeaturePtrVector features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType fieldNameToIndexMap,
const ColumnNameToSourceNameMapType columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType columnIdToRenderGroupAnalyzerMap 
)
static

Definition at line 2021 of file Importer.cpp.

References CHECK, compress_coords(), Importer_NS::DataStreamSink::copy_params, DEBUG_TIMING, logger::ERROR, Importer_NS::Importer::get_column_descs(), Importer_NS::Importer::get_copy_params(), Importer_NS::Importer::get_import_buffers(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Importer_NS::DataStreamSink::import_status, logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, Importer_NS::Importer::load(), LOG, Importer_NS::CopyParams::null_str, Importer_NS::ImportStatus::rows_completed, Importer_NS::ImportStatus::rows_rejected, Importer_NS::ImportStatus::thread_id, timer_start(), timer_stop(), and to_string().

Referenced by Importer_NS::Importer::importGDAL().

2030  {
2031  ImportStatus import_status;
2032  const CopyParams& copy_params = importer->get_copy_params();
2033  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2034  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2035  importer->get_import_buffers(thread_id);
2036 
2037  for (const auto& p : import_buffers) {
2038  p->clear();
2039  }
2040 
2041  auto convert_timer = timer_start();
2042 
2043  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2044  if (!features[iFeature]) {
2045  continue;
2046  }
2047 
2048  // get this feature's geometry
2049  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2050  if (pGeometry) {
2051  // for geodatabase, we need to consider features with no geometry
2052  // as we still want to create a table, even if it has no geo column
2053 
2054  // transform it
2055  // avoid GDAL error if not transformable
2056  if (pGeometry->getSpatialReference()) {
2057  pGeometry->transformTo(poGeographicSR);
2058  }
2059  }
2060 
2061  size_t col_idx = 0;
2062  try {
2063  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2064  auto cd = *cd_it;
2065 
2066  // is this a geo column?
2067  const auto& col_ti = cd->columnType;
2068  if (col_ti.is_geometry()) {
2069  // some Shapefiles get us here, but the OGRGeometryRef is null
2070  if (!pGeometry) {
2071  std::string msg = "Geometry feature " +
2072  std::to_string(firstFeature + iFeature + 1) +
2073  " has null GeometryRef";
2074  throw std::runtime_error(msg);
2075  }
2076 
2077  // Note that this assumes there is one and only one geo column in the table.
2078  // Currently, the importer only supports reading a single geospatial feature
2079  // from an input shapefile / geojson file, but this code will need to be
2080  // modified if that changes
2081  SQLTypes col_type = col_ti.get_type();
2082 
2083  // store null string in the base column
2084  import_buffers[col_idx]->add_value(cd, copy_params.null_str, true, copy_params);
2085  ++col_idx;
2086 
2087  // the data we now need to extract for the other columns
2088  std::vector<double> coords;
2089  std::vector<double> bounds;
2090  std::vector<int> ring_sizes;
2091  std::vector<int> poly_rings;
2092  int render_group = 0;
2093 
2094  // extract it
2095  SQLTypeInfo import_ti;
2096 
2098  pGeometry,
2099  import_ti,
2100  coords,
2101  bounds,
2102  ring_sizes,
2103  poly_rings,
2105  std::string msg = "Failed to extract valid geometry from feature " +
2106  std::to_string(firstFeature + iFeature + 1) +
2107  " for column " + cd->columnName;
2108  throw std::runtime_error(msg);
2109  }
2110 
2111  // validate types
2112  if (col_type != import_ti.get_type()) {
2114  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2115  col_type == SQLTypes::kMULTIPOLYGON)) {
2116  throw std::runtime_error(
2117  "Imported geometry doesn't match the type of column " + cd->columnName);
2118  }
2119  }
2120 
2121  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2122  if (ring_sizes.size()) {
2123  // get a suitable render group for these poly coords
2124  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2125  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2126  render_group = (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2127  } else {
2128  // empty poly
2129  render_group = -1;
2130  }
2131  }
2132 
2133  // create coords array value and add it to the physical column
2134  ++cd_it;
2135  auto cd_coords = *cd_it;
2136  std::vector<TDatum> td_coord_data;
2137  std::vector<uint8_t> compressed_coords = compress_coords(coords, col_ti);
2138  for (auto cc : compressed_coords) {
2139  TDatum td_byte;
2140  td_byte.val.int_val = cc;
2141  td_coord_data.push_back(td_byte);
2142  }
2143  TDatum tdd_coords;
2144  tdd_coords.val.arr_val = td_coord_data;
2145  tdd_coords.is_null = false;
2146  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2147  ++col_idx;
2148 
2149  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2150  // Create ring_sizes array value and add it to the physical column
2151  ++cd_it;
2152  auto cd_ring_sizes = *cd_it;
2153  std::vector<TDatum> td_ring_sizes;
2154  for (auto ring_size : ring_sizes) {
2155  TDatum td_ring_size;
2156  td_ring_size.val.int_val = ring_size;
2157  td_ring_sizes.push_back(td_ring_size);
2158  }
2159  TDatum tdd_ring_sizes;
2160  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2161  tdd_ring_sizes.is_null = false;
2162  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2163  ++col_idx;
2164  }
2165 
2166  if (col_type == kMULTIPOLYGON) {
2167  // Create poly_rings array value and add it to the physical column
2168  ++cd_it;
2169  auto cd_poly_rings = *cd_it;
2170  std::vector<TDatum> td_poly_rings;
2171  for (auto num_rings : poly_rings) {
2172  TDatum td_num_rings;
2173  td_num_rings.val.int_val = num_rings;
2174  td_poly_rings.push_back(td_num_rings);
2175  }
2176  TDatum tdd_poly_rings;
2177  tdd_poly_rings.val.arr_val = td_poly_rings;
2178  tdd_poly_rings.is_null = false;
2179  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2180  ++col_idx;
2181  }
2182 
2183  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2184  col_type == kMULTIPOLYGON) {
2185  // Create bounds array value and add it to the physical column
2186  ++cd_it;
2187  auto cd_bounds = *cd_it;
2188  std::vector<TDatum> td_bounds_data;
2189  for (auto b : bounds) {
2190  TDatum td_double;
2191  td_double.val.real_val = b;
2192  td_bounds_data.push_back(td_double);
2193  }
2194  TDatum tdd_bounds;
2195  tdd_bounds.val.arr_val = td_bounds_data;
2196  tdd_bounds.is_null = false;
2197  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2198  ++col_idx;
2199  }
2200 
2201  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2202  // Create render_group value and add it to the physical column
2203  ++cd_it;
2204  auto cd_render_group = *cd_it;
2205  TDatum td_render_group;
2206  td_render_group.val.int_val = render_group;
2207  td_render_group.is_null = false;
2208  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2209  ++col_idx;
2210  }
2211  } else {
2212  // regular column
2213  // pull from GDAL metadata
2214  const auto cit = columnNameToSourceNameMap.find(cd->columnName);
2215  CHECK(cit != columnNameToSourceNameMap.end());
2216  const std::string& fieldName = cit->second;
2217  const auto fit = fieldNameToIndexMap.find(fieldName);
2218  CHECK(fit != fieldNameToIndexMap.end());
2219  size_t iField = fit->second;
2220  CHECK(iField < fieldNameToIndexMap.size());
2221  std::string fieldContents = features[iFeature]->GetFieldAsString(iField);
2222  import_buffers[col_idx]->add_value(cd, fieldContents, false, copy_params);
2223  ++col_idx;
2224  }
2225  }
2226  import_status.rows_completed++;
2227  } catch (const std::exception& e) {
2228  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2229  import_buffers[col_idx_to_pop]->pop_value();
2230  }
2231  import_status.rows_rejected++;
2232  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2233  }
2234  }
2235  float convert_ms =
2236  float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2237  convert_timer)) /
2238  1000.0f;
2239 
2240  float load_ms = 0.0f;
2241  if (import_status.rows_completed > 0) {
2242  auto load_timer = timer_start();
2243  importer->load(import_buffers, import_status.rows_completed);
2244  load_ms =
2245  float(
2246  timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>(
2247  load_timer)) /
2248  1000.0f;
2249  }
2250 
2251  if (DEBUG_TIMING && import_status.rows_completed > 0) {
2252  LOG(INFO) << "DEBUG: Process " << convert_ms << "ms";
2253  LOG(INFO) << "DEBUG: Load " << load_ms << "ms";
2254  }
2255 
2256  import_status.thread_id = thread_id;
2257 
2258  if (DEBUG_TIMING) {
2259  LOG(INFO) << "DEBUG: Total "
2260  << float(timer_stop<std::chrono::steady_clock::time_point,
2261  std::chrono::microseconds>(convert_timer)) /
2262  1000.0f
2263  << "ms";
2264  }
2265 
2266  return import_status;
2267 }
SQLTypes
Definition: sqltypes.h:40
#define LOG(tag)
Definition: Logger.h:182
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Importer.cpp:1546
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
std::string null_str
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:135
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:459
#define CHECK(condition)
Definition: Logger.h:187
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:141
Type timer_start()
Definition: measure.h:40
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ importGeoFromLonLat()

bool Importer_NS::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords 
)

Definition at line 1519 of file Importer.cpp.

Referenced by import_thread_delimited().

1519  {
1520  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1521  return false;
1522  }
1523  auto point = new OGRPoint(lon, lat);
1524  // NOTE(adb): Use OGRSpatialReferenceUqPtr to ensure proper deletion
1525  // auto poSR0 = new OGRSpatialReference();
1526  // poSR0->importFromEPSG(4326);
1527  // point->assignSpatialReference(poSR0);
1528 
1529  // auto poSR = new OGRSpatialReference();
1530  // poSR->importFromEPSG(3857);
1531  // point->transformTo(poSR);
1532 
1533  coords.push_back(point->getX());
1534  coords.push_back(point->getY());
1535  return true;
1536 }
+ Here is the caller graph for this function:

◆ is_eol()

static const bool Importer_NS::is_eol ( const char &  p,
const std::string &  line_delims 
)
static

Definition at line 229 of file Importer.cpp.

Referenced by get_row().

229  {
230  for (auto i : line_delims) {
231  if (p == i) {
232  return true;
233  }
234  }
235  return false;
236 }
+ Here is the caller graph for this function:

◆ NullArray()

ArrayDatum Importer_NS::NullArray ( const SQLTypeInfo ti)

Definition at line 494 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), anonymous_namespace{ImportTest.cpp}::d(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), NullArrayDatum(), and NullDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value(), Importer_NS::TypedImportBuffer::add_values(), Importer_NS::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

494  {
495  SQLTypeInfo elem_ti = ti.get_elem_type();
496  auto len = ti.get_size();
497 
498  if (elem_ti.is_string()) {
499  // must not be called for array of strings
500  CHECK(false);
501  return ArrayDatum(0, NULL, true);
502  }
503 
504  if (len > 0) {
505  // Compose a NULL fixlen array
506  int8_t* buf = (int8_t*)checked_malloc(len);
507  // First scalar is a NULL_ARRAY sentinel
508  Datum d = NullArrayDatum(elem_ti);
509  int8_t* p = appendDatum(buf, d, elem_ti);
510  // Rest is filled with normal NULL sentinels
511  Datum d0 = NullDatum(elem_ti);
512  while ((p - buf) < len) {
513  p = appendDatum(p, d0, elem_ti);
514  }
515  CHECK((p - buf) == len);
516  return ArrayDatum(len, buf, true);
517  }
518  // NULL varlen array
519  return ArrayDatum(0, NULL, true);
520 }
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:359
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:323
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:400
#define CHECK(condition)
Definition: Logger.h:187
bool is_string() const
Definition: sqltypes.h:450
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullArrayDatum()

Datum Importer_NS::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 400 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, anonymous_namespace{ImportTest.cpp}::d(), decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_array_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run-benchmark-import::type.

Referenced by NullArray().

400  {
401  Datum d;
402  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
403  switch (type) {
404  case kBOOLEAN:
406  break;
407  case kBIGINT:
409  break;
410  case kINT:
412  break;
413  case kSMALLINT:
415  break;
416  case kTINYINT:
418  break;
419  case kFLOAT:
421  break;
422  case kDOUBLE:
424  break;
425  case kTIME:
426  case kTIMESTAMP:
427  case kDATE:
429  break;
430  case kPOINT:
431  case kLINESTRING:
432  case kPOLYGON:
433  case kMULTIPOLYGON:
434  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
435  default:
436  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
437  }
438  return d;
439 }
int8_t tinyintval
Definition: sqltypes.h:123
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
Definition: sqltypes.h:51
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:184
bool boolval
Definition: sqltypes.h:122
int32_t intval
Definition: sqltypes.h:125
float floatval
Definition: sqltypes.h:127
bool is_decimal() const
Definition: sqltypes.h:453
int64_t bigintval
Definition: sqltypes.h:126
int16_t smallintval
Definition: sqltypes.h:124
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:128
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:183
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ NullDatum()

Datum Importer_NS::NullDatum ( SQLTypeInfo ti)

Definition at line 359 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, anonymous_namespace{ImportTest.cpp}::d(), decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run-benchmark-import::type.

Referenced by NullArray(), and StringToArray().

359  {
360  Datum d;
361  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
362  switch (type) {
363  case kBOOLEAN:
365  break;
366  case kBIGINT:
368  break;
369  case kINT:
371  break;
372  case kSMALLINT:
374  break;
375  case kTINYINT:
377  break;
378  case kFLOAT:
379  d.floatval = NULL_FLOAT;
380  break;
381  case kDOUBLE:
383  break;
384  case kTIME:
385  case kTIMESTAMP:
386  case kDATE:
388  break;
389  case kPOINT:
390  case kLINESTRING:
391  case kPOLYGON:
392  case kMULTIPOLYGON:
393  throw std::runtime_error("Internal error: geometry type in NullDatum.");
394  default:
395  throw std::runtime_error("Internal error: invalid type in NullDatum.");
396  }
397  return d;
398 }
int8_t tinyintval
Definition: sqltypes.h:123
#define NULL_DOUBLE
Definition: sqltypes.h:176
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
Definition: sqltypes.h:51
bool boolval
Definition: sqltypes.h:122
int32_t intval
Definition: sqltypes.h:125
float floatval
Definition: sqltypes.h:127
bool is_decimal() const
Definition: sqltypes.h:453
int64_t bigintval
Definition: sqltypes.h:126
#define NULL_FLOAT
Definition: sqltypes.h:175
int16_t smallintval
Definition: sqltypes.h:124
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:128
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [1/2]

std::pair<SQLTypes, bool> Importer_NS::ogr_to_type ( const OGRFieldType &  ogr_type)

Definition at line 4215 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kINT, kTEXT, kTIME, kTIMESTAMP, and to_string().

Referenced by Importer_NS::Importer::gdalToColumnDescriptors().

4215  {
4216  switch (ogr_type) {
4217  case OFTInteger:
4218  return std::make_pair(kINT, false);
4219  case OFTIntegerList:
4220  return std::make_pair(kINT, true);
4221 #if GDAL_VERSION_MAJOR > 1
4222  case OFTInteger64:
4223  return std::make_pair(kBIGINT, false);
4224  case OFTInteger64List:
4225  return std::make_pair(kBIGINT, true);
4226 #endif
4227  case OFTReal:
4228  return std::make_pair(kDOUBLE, false);
4229  case OFTRealList:
4230  return std::make_pair(kDOUBLE, true);
4231  case OFTString:
4232  return std::make_pair(kTEXT, false);
4233  case OFTStringList:
4234  return std::make_pair(kTEXT, true);
4235  case OFTDate:
4236  return std::make_pair(kDATE, false);
4237  case OFTTime:
4238  return std::make_pair(kTIME, false);
4239  case OFTDateTime:
4240  return std::make_pair(kTIMESTAMP, false);
4241  case OFTBinary:
4242  default:
4243  break;
4244  }
4245  throw std::runtime_error("Unknown OGR field type: " + std::to_string(ogr_type));
4246 }
Definition: sqltypes.h:51
std::string to_string(char const *&&v)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:47
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ ogr_to_type() [2/2]

SQLTypes Importer_NS::ogr_to_type ( const OGRwkbGeometryType &  ogr_type)

Definition at line 4248 of file Importer.cpp.

References kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, and to_string().

4248  {
4249  switch (ogr_type) {
4250  case wkbPoint:
4251  return kPOINT;
4252  case wkbLineString:
4253  return kLINESTRING;
4254  case wkbPolygon:
4255  return kPOLYGON;
4256  case wkbMultiPolygon:
4257  return kMULTIPOLYGON;
4258  default:
4259  break;
4260  }
4261  throw std::runtime_error("Unknown OGR geom type: " + std::to_string(ogr_type));
4262 }
std::string to_string(char const *&&v)
+ Here is the call graph for this function:

◆ parseStringArray()

bool Importer_NS::parseStringArray ( const std::string &  s,
const CopyParams copy_params,
std::vector< std::string > &  string_vec 
)

Referenced by TEST().

+ Here is the caller graph for this function:

◆ StringToArray()

ArrayDatum Importer_NS::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams copy_params 
)

Definition at line 441 of file Importer.cpp.

References appendDatum(), Importer_NS::CopyParams::array_begin, Importer_NS::CopyParams::array_delim, Importer_NS::CopyParams::array_end, CHECK, checked_malloc(), anonymous_namespace{ImportTest.cpp}::d(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), anonymous_namespace{TypedDataAccessors.h}::is_null(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_number(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_time(), LOG, Importer_NS::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by Importer_NS::TypedImportBuffer::add_value(), anonymous_namespace{ExecuteTest.cpp}::import_array_test(), and TEST().

443  {
444  SQLTypeInfo elem_ti = ti.get_elem_type();
445  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
446  return ArrayDatum(0, NULL, true);
447  }
448  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
449  LOG(WARNING) << "Malformed array: " << s;
450  return ArrayDatum(0, NULL, true);
451  }
452  std::vector<std::string> elem_strs;
453  size_t last = 1;
454  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
455  i = s.find(copy_params.array_delim, last)) {
456  elem_strs.push_back(s.substr(last, i - last));
457  last = i + 1;
458  }
459  if (last + 1 <= s.size()) {
460  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
461  }
462  if (elem_strs.size() == 1) {
463  auto str = elem_strs.front();
464  auto str_trimmed = trim_space(str.c_str(), str.length());
465  if (str_trimmed == "") {
466  elem_strs.clear(); // Empty array
467  }
468  }
469  if (!elem_ti.is_string()) {
470  size_t len = elem_strs.size() * elem_ti.get_size();
471  int8_t* buf = (int8_t*)checked_malloc(len);
472  int8_t* p = buf;
473  for (auto& es : elem_strs) {
474  auto e = trim_space(es.c_str(), es.length());
475  bool is_null = (e == copy_params.null_str) || e == "NULL";
476  if (!elem_ti.is_string() && e == "") {
477  is_null = true;
478  }
479  if (elem_ti.is_number() || elem_ti.is_time()) {
480  if (!isdigit(e[0]) && e[0] != '-') {
481  is_null = true;
482  }
483  }
484  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
485  p = appendDatum(p, d, elem_ti);
486  }
487  return ArrayDatum(len, buf, false);
488  }
489  // must not be called for array of strings
490  CHECK(false);
491  return ArrayDatum(0, NULL, true);
492 }
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
bool is_time() const
Definition: sqltypes.h:456
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
#define LOG(tag)
Definition: Logger.h:182
std::string null_str
static const std::string trim_space(const char *field, const size_t len)
Definition: Importer.cpp:217
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:359
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:323
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
bool is_null(const T &v, const SQLTypeInfo &t)
Datum StringToDatum(const std::string &s, SQLTypeInfo &ti)
Definition: Datum.cpp:90
bool is_number() const
Definition: sqltypes.h:455
#define CHECK(condition)
Definition: Logger.h:187
bool is_string() const
Definition: sqltypes.h:450
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToArrayDatum()

ArrayDatum Importer_NS::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 578 of file Importer.cpp.

References appendDatum(), CHECK, checked_malloc(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_elem_type(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_size(), SQLTypeInfoCore< TYPE_FACET_PACK >::is_string(), NullArray(), and TDatumToDatum().

Referenced by Importer_NS::TypedImportBuffer::add_value().

578  {
579  SQLTypeInfo elem_ti = ti.get_elem_type();
580 
581  CHECK(!elem_ti.is_string());
582 
583  if (datum.is_null) {
584  return NullArray(ti);
585  }
586 
587  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
588  int8_t* buf = (int8_t*)checked_malloc(len);
589  int8_t* p = buf;
590  for (auto& e : datum.val.arr_val) {
591  p = appendDatum(p, TDatumToDatum(e, elem_ti), elem_ti);
592  }
593 
594  return ArrayDatum(len, buf, false);
595 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: Importer.cpp:323
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:494
#define CHECK(condition)
Definition: Logger.h:187
bool is_string() const
Definition: sqltypes.h:450
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:533
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ TDatumToDatum()

Datum Importer_NS::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 533 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, anonymous_namespace{ImportTest.cpp}::d(), decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfoCore< TYPE_FACET_PACK >::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run-benchmark-import::type.

Referenced by TDatumToArrayDatum().

533  {
534  Datum d;
535  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
536  switch (type) {
537  case kBOOLEAN:
538  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
539  break;
540  case kBIGINT:
541  d.bigintval =
542  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
543  break;
544  case kINT:
545  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
546  break;
547  case kSMALLINT:
548  d.smallintval =
549  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
550  break;
551  case kTINYINT:
552  d.tinyintval =
553  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
554  break;
555  case kFLOAT:
556  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
557  break;
558  case kDOUBLE:
559  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
560  break;
561  case kTIME:
562  case kTIMESTAMP:
563  case kDATE:
564  d.bigintval =
565  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
566  break;
567  case kPOINT:
568  case kLINESTRING:
569  case kPOLYGON:
570  case kMULTIPOLYGON:
571  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
572  default:
573  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
574  }
575  return d;
576 }
int8_t tinyintval
Definition: sqltypes.h:123
#define NULL_DOUBLE
Definition: sqltypes.h:176
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
Definition: sqltypes.h:51
bool boolval
Definition: sqltypes.h:122
int32_t intval
Definition: sqltypes.h:125
float floatval
Definition: sqltypes.h:127
bool is_decimal() const
Definition: sqltypes.h:453
int64_t bigintval
Definition: sqltypes.h:126
#define NULL_FLOAT
Definition: sqltypes.h:175
int16_t smallintval
Definition: sqltypes.h:124
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:268
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
double doubleval
Definition: sqltypes.h:128
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ trim_space()

static const std::string Importer_NS::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 217 of file Importer.cpp.

Referenced by get_row(), and StringToArray().

217  {
218  size_t i = 0;
219  size_t j = len;
220  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
221  i++;
222  }
223  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
224  j--;
225  }
226  return std::string(field + i, j - i);
227 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
+ Here is the caller graph for this function:

◆ try_cast()

template<class T >
bool Importer_NS::try_cast ( const std::string &  str)

Definition at line 2747 of file Importer.cpp.

2747  {
2748  try {
2749  boost::lexical_cast<T>(str);
2750  } catch (const boost::bad_lexical_cast& e) {
2751  return false;
2752  }
2753  return true;
2754 }

◆ try_strptimes()

char* Importer_NS::try_strptimes ( const char *  str,
const std::vector< std::string > &  formats 
)
inline

Definition at line 2756 of file Importer.cpp.

Referenced by Importer_NS::Detector::detect_sqltype().

2756  {
2757  std::tm tm_struct;
2758  char* buf;
2759  for (auto format : formats) {
2760  buf = strptime(str, format.c_str(), &tm_struct);
2761  if (buf) {
2762  return buf;
2763  }
2764  }
2765  return nullptr;
2766 }
+ Here is the caller graph for this function:

Variable Documentation

◆ import_status_map

std::map<std::string, ImportStatus> Importer_NS::import_status_map
static

Definition at line 144 of file Importer.cpp.

◆ PROMOTE_POLYGON_TO_MULTIPOLYGON

constexpr bool Importer_NS::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static

Definition at line 141 of file Importer.cpp.

◆ status_mutex

mapd_shared_mutex Importer_NS::status_mutex
static

Definition at line 143 of file Importer.cpp.